From deb7ecf1e46862d54f4b102f2d163cfbcfc37f3b Mon Sep 17 00:00:00 2001
From: Adin Scannell <ascannell@google.com>
Date: Sat, 1 Jun 2019 22:34:39 -0700
Subject: [PATCH 001/211] Initial commit for virtual Go branch.

This branch is a synthetic branch that will allow for the use of
standard Go tools for building. It will be updated automatically.
---
 README.md | 5 +++++
 1 file changed, 5 insertions(+)
 create mode 100644 README.md

diff --git a/README.md b/README.md
new file mode 100644
index 0000000000..d12ba732d6
--- /dev/null
+++ b/README.md
@@ -0,0 +1,5 @@
+# gVisor
+
+This branch is a synthetic branch, containing only Go sources, that is
+compatible with standard Go tools. See the `master` branch for authoritative
+sources and tests.

From ae1cdd6d5a522e4de94747348e35d1964dc43c77 Mon Sep 17 00:00:00 2001
From: Ghanan Gowripalan <ghanan@google.com>
Date: Fri, 14 Aug 2020 17:27:23 -0700
Subject: [PATCH 002/211] Use a single NetworkEndpoint per NIC per protocol

The NetworkEndpoint does not need to be created for each address.
Most of the work the NetworkEndpoint does is address agnostic.

PiperOrigin-RevId: 326759605
---
 pkg/tcpip/network/BUILD             |  1 +
 pkg/tcpip/network/arp/arp.go        | 15 +----
 pkg/tcpip/network/ip_test.go        | 77 +++++++++++++-------------
 pkg/tcpip/network/ipv4/icmp.go      |  7 ++-
 pkg/tcpip/network/ipv4/ipv4.go      | 20 +------
 pkg/tcpip/network/ipv6/icmp.go      |  7 ++-
 pkg/tcpip/network/ipv6/icmp_test.go |  6 +-
 pkg/tcpip/network/ipv6/ipv6.go      | 20 ++-----
 pkg/tcpip/network/ipv6/ndp_test.go  |  5 +-
 pkg/tcpip/stack/forwarder_test.go   | 18 +-----
 pkg/tcpip/stack/ndp.go              |  8 +--
 pkg/tcpip/stack/nic.go              | 86 +++++++++++++----------------
 pkg/tcpip/stack/nic_test.go         | 30 +++-------
 pkg/tcpip/stack/registration.go     |  8 +--
 pkg/tcpip/stack/stack.go            |  6 +-
 pkg/tcpip/stack/stack_test.go       | 20 ++-----
 pkg/tcpip/transport/udp/udp_test.go | 10 +---
 17 files changed, 123 insertions(+), 221 deletions(-)

diff --git a/pkg/tcpip/network/BUILD b/pkg/tcpip/network/BUILD
index 6a4839fb89..46083925c5 100644
--- a/pkg/tcpip/network/BUILD
+++ b/pkg/tcpip/network/BUILD
@@ -12,6 +12,7 @@ go_test(
         "//pkg/tcpip",
         "//pkg/tcpip/buffer",
         "//pkg/tcpip/header",
+        "//pkg/tcpip/link/channel",
         "//pkg/tcpip/link/loopback",
         "//pkg/tcpip/network/ipv4",
         "//pkg/tcpip/network/ipv6",
diff --git a/pkg/tcpip/network/arp/arp.go b/pkg/tcpip/network/arp/arp.go
index 1ad788a174..920872c3f2 100644
--- a/pkg/tcpip/network/arp/arp.go
+++ b/pkg/tcpip/network/arp/arp.go
@@ -66,14 +66,6 @@ func (e *endpoint) Capabilities() stack.LinkEndpointCapabilities {
 	return e.linkEP.Capabilities()
 }
 
-func (e *endpoint) ID() *stack.NetworkEndpointID {
-	return &stack.NetworkEndpointID{ProtocolAddress}
-}
-
-func (e *endpoint) PrefixLen() int {
-	return 0
-}
-
 func (e *endpoint) MaxHeaderLength() uint16 {
 	return e.linkEP.MaxHeaderLength() + header.ARPSize
 }
@@ -142,16 +134,13 @@ func (*protocol) ParseAddresses(v buffer.View) (src, dst tcpip.Address) {
 	return tcpip.Address(h.ProtocolAddressSender()), ProtocolAddress
 }
 
-func (p *protocol) NewEndpoint(nicID tcpip.NICID, addrWithPrefix tcpip.AddressWithPrefix, linkAddrCache stack.LinkAddressCache, dispatcher stack.TransportDispatcher, sender stack.LinkEndpoint, st *stack.Stack) (stack.NetworkEndpoint, *tcpip.Error) {
-	if addrWithPrefix.Address != ProtocolAddress {
-		return nil, tcpip.ErrBadLocalAddress
-	}
+func (p *protocol) NewEndpoint(nicID tcpip.NICID, linkAddrCache stack.LinkAddressCache, dispatcher stack.TransportDispatcher, sender stack.LinkEndpoint, st *stack.Stack) stack.NetworkEndpoint {
 	return &endpoint{
 		protocol:      p,
 		nicID:         nicID,
 		linkEP:        sender,
 		linkAddrCache: linkAddrCache,
-	}, nil
+	}
 }
 
 // LinkAddressProtocol implements stack.LinkAddressResolver.LinkAddressProtocol.
diff --git a/pkg/tcpip/network/ip_test.go b/pkg/tcpip/network/ip_test.go
index 491d936a1c..9007346fe0 100644
--- a/pkg/tcpip/network/ip_test.go
+++ b/pkg/tcpip/network/ip_test.go
@@ -20,6 +20,7 @@ import (
 	"gvisor.dev/gvisor/pkg/tcpip"
 	"gvisor.dev/gvisor/pkg/tcpip/buffer"
 	"gvisor.dev/gvisor/pkg/tcpip/header"
+	"gvisor.dev/gvisor/pkg/tcpip/link/channel"
 	"gvisor.dev/gvisor/pkg/tcpip/link/loopback"
 	"gvisor.dev/gvisor/pkg/tcpip/network/ipv4"
 	"gvisor.dev/gvisor/pkg/tcpip/network/ipv6"
@@ -41,6 +42,7 @@ const (
 	ipv6SubnetAddr     = "\x0a\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 	ipv6SubnetMask     = "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x00"
 	ipv6Gateway        = "\x0a\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x03"
+	nicID              = 1
 )
 
 // testObject implements two interfaces: LinkEndpoint and TransportDispatcher.
@@ -195,15 +197,15 @@ func buildIPv4Route(local, remote tcpip.Address) (stack.Route, *tcpip.Error) {
 		NetworkProtocols:   []stack.NetworkProtocol{ipv4.NewProtocol()},
 		TransportProtocols: []stack.TransportProtocol{udp.NewProtocol(), tcp.NewProtocol()},
 	})
-	s.CreateNIC(1, loopback.New())
-	s.AddAddress(1, ipv4.ProtocolNumber, local)
+	s.CreateNIC(nicID, loopback.New())
+	s.AddAddress(nicID, ipv4.ProtocolNumber, local)
 	s.SetRouteTable([]tcpip.Route{{
 		Destination: header.IPv4EmptySubnet,
 		Gateway:     ipv4Gateway,
 		NIC:         1,
 	}})
 
-	return s.FindRoute(1, local, remote, ipv4.ProtocolNumber, false /* multicastLoop */)
+	return s.FindRoute(nicID, local, remote, ipv4.ProtocolNumber, false /* multicastLoop */)
 }
 
 func buildIPv6Route(local, remote tcpip.Address) (stack.Route, *tcpip.Error) {
@@ -211,31 +213,45 @@ func buildIPv6Route(local, remote tcpip.Address) (stack.Route, *tcpip.Error) {
 		NetworkProtocols:   []stack.NetworkProtocol{ipv6.NewProtocol()},
 		TransportProtocols: []stack.TransportProtocol{udp.NewProtocol(), tcp.NewProtocol()},
 	})
-	s.CreateNIC(1, loopback.New())
-	s.AddAddress(1, ipv6.ProtocolNumber, local)
+	s.CreateNIC(nicID, loopback.New())
+	s.AddAddress(nicID, ipv6.ProtocolNumber, local)
 	s.SetRouteTable([]tcpip.Route{{
 		Destination: header.IPv6EmptySubnet,
 		Gateway:     ipv6Gateway,
 		NIC:         1,
 	}})
 
-	return s.FindRoute(1, local, remote, ipv6.ProtocolNumber, false /* multicastLoop */)
+	return s.FindRoute(nicID, local, remote, ipv6.ProtocolNumber, false /* multicastLoop */)
 }
 
-func buildDummyStack() *stack.Stack {
-	return stack.New(stack.Options{
-		NetworkProtocols:   []stack.NetworkProtocol{ipv6.NewProtocol()},
+func buildDummyStack(t *testing.T) *stack.Stack {
+	t.Helper()
+
+	s := stack.New(stack.Options{
+		NetworkProtocols:   []stack.NetworkProtocol{ipv4.NewProtocol(), ipv6.NewProtocol()},
 		TransportProtocols: []stack.TransportProtocol{udp.NewProtocol(), tcp.NewProtocol()},
 	})
+	e := channel.New(0, 1280, "")
+	if err := s.CreateNIC(nicID, e); err != nil {
+		t.Fatalf("CreateNIC(%d, _) = %s", nicID, err)
+	}
+
+	if err := s.AddAddress(nicID, header.IPv4ProtocolNumber, localIpv4Addr); err != nil {
+		t.Fatalf("AddAddress(%d, %d, %s) = %s", nicID, header.IPv4ProtocolNumber, localIpv4Addr, err)
+	}
+
+	if err := s.AddAddress(nicID, header.IPv6ProtocolNumber, localIpv6Addr); err != nil {
+		t.Fatalf("AddAddress(%d, %d, %s) = %s", nicID, header.IPv6ProtocolNumber, localIpv6Addr, err)
+	}
+
+	return s
 }
 
 func TestIPv4Send(t *testing.T) {
 	o := testObject{t: t, v4: true}
 	proto := ipv4.NewProtocol()
-	ep, err := proto.NewEndpoint(1, tcpip.AddressWithPrefix{localIpv4Addr, localIpv4PrefixLen}, nil, nil, &o, buildDummyStack())
-	if err != nil {
-		t.Fatalf("NewEndpoint failed: %v", err)
-	}
+	ep := proto.NewEndpoint(nicID, nil, nil, &o, buildDummyStack(t))
+	defer ep.Close()
 
 	// Allocate and initialize the payload view.
 	payload := buffer.NewView(100)
@@ -271,10 +287,8 @@ func TestIPv4Send(t *testing.T) {
 func TestIPv4Receive(t *testing.T) {
 	o := testObject{t: t, v4: true}
 	proto := ipv4.NewProtocol()
-	ep, err := proto.NewEndpoint(1, tcpip.AddressWithPrefix{localIpv4Addr, localIpv4PrefixLen}, nil, &o, nil, buildDummyStack())
-	if err != nil {
-		t.Fatalf("NewEndpoint failed: %v", err)
-	}
+	ep := proto.NewEndpoint(nicID, nil, &o, nil, buildDummyStack(t))
+	defer ep.Close()
 
 	totalLen := header.IPv4MinimumSize + 30
 	view := buffer.NewView(totalLen)
@@ -343,10 +357,7 @@ func TestIPv4ReceiveControl(t *testing.T) {
 		t.Run(c.name, func(t *testing.T) {
 			o := testObject{t: t}
 			proto := ipv4.NewProtocol()
-			ep, err := proto.NewEndpoint(1, tcpip.AddressWithPrefix{localIpv4Addr, localIpv4PrefixLen}, nil, &o, nil, buildDummyStack())
-			if err != nil {
-				t.Fatalf("NewEndpoint failed: %v", err)
-			}
+			ep := proto.NewEndpoint(nicID, nil, &o, nil, buildDummyStack(t))
 			defer ep.Close()
 
 			const dataOffset = header.IPv4MinimumSize*2 + header.ICMPv4MinimumSize
@@ -407,10 +418,8 @@ func TestIPv4ReceiveControl(t *testing.T) {
 func TestIPv4FragmentationReceive(t *testing.T) {
 	o := testObject{t: t, v4: true}
 	proto := ipv4.NewProtocol()
-	ep, err := proto.NewEndpoint(1, tcpip.AddressWithPrefix{localIpv4Addr, localIpv4PrefixLen}, nil, &o, nil, buildDummyStack())
-	if err != nil {
-		t.Fatalf("NewEndpoint failed: %v", err)
-	}
+	ep := proto.NewEndpoint(nicID, nil, &o, nil, buildDummyStack(t))
+	defer ep.Close()
 
 	totalLen := header.IPv4MinimumSize + 24
 
@@ -486,10 +495,8 @@ func TestIPv4FragmentationReceive(t *testing.T) {
 func TestIPv6Send(t *testing.T) {
 	o := testObject{t: t}
 	proto := ipv6.NewProtocol()
-	ep, err := proto.NewEndpoint(1, tcpip.AddressWithPrefix{localIpv6Addr, localIpv6PrefixLen}, nil, nil, &o, buildDummyStack())
-	if err != nil {
-		t.Fatalf("NewEndpoint failed: %v", err)
-	}
+	ep := proto.NewEndpoint(nicID, nil, &o, channel.New(0, 1280, ""), buildDummyStack(t))
+	defer ep.Close()
 
 	// Allocate and initialize the payload view.
 	payload := buffer.NewView(100)
@@ -525,10 +532,8 @@ func TestIPv6Send(t *testing.T) {
 func TestIPv6Receive(t *testing.T) {
 	o := testObject{t: t}
 	proto := ipv6.NewProtocol()
-	ep, err := proto.NewEndpoint(1, tcpip.AddressWithPrefix{localIpv6Addr, localIpv6PrefixLen}, nil, &o, nil, buildDummyStack())
-	if err != nil {
-		t.Fatalf("NewEndpoint failed: %v", err)
-	}
+	ep := proto.NewEndpoint(nicID, nil, &o, nil, buildDummyStack(t))
+	defer ep.Close()
 
 	totalLen := header.IPv6MinimumSize + 30
 	view := buffer.NewView(totalLen)
@@ -606,11 +611,7 @@ func TestIPv6ReceiveControl(t *testing.T) {
 		t.Run(c.name, func(t *testing.T) {
 			o := testObject{t: t}
 			proto := ipv6.NewProtocol()
-			ep, err := proto.NewEndpoint(1, tcpip.AddressWithPrefix{localIpv6Addr, localIpv6PrefixLen}, nil, &o, nil, buildDummyStack())
-			if err != nil {
-				t.Fatalf("NewEndpoint failed: %v", err)
-			}
-
+			ep := proto.NewEndpoint(nicID, nil, &o, nil, buildDummyStack(t))
 			defer ep.Close()
 
 			dataOffset := header.IPv6MinimumSize*2 + header.ICMPv6MinimumSize
diff --git a/pkg/tcpip/network/ipv4/icmp.go b/pkg/tcpip/network/ipv4/icmp.go
index 067d770f3e..b5659a36b8 100644
--- a/pkg/tcpip/network/ipv4/icmp.go
+++ b/pkg/tcpip/network/ipv4/icmp.go
@@ -37,8 +37,9 @@ func (e *endpoint) handleControl(typ stack.ControlType, extra uint32, pkt *stack
 	// false.
 	//
 	// Drop packet if it doesn't have the basic IPv4 header or if the
-	// original source address doesn't match the endpoint's address.
-	if hdr.SourceAddress() != e.id.LocalAddress {
+	// original source address doesn't match an address we own.
+	src := hdr.SourceAddress()
+	if e.stack.CheckLocalAddress(e.NICID(), ProtocolNumber, src) == 0 {
 		return
 	}
 
@@ -53,7 +54,7 @@ func (e *endpoint) handleControl(typ stack.ControlType, extra uint32, pkt *stack
 	// Skip the ip header, then deliver control message.
 	pkt.Data.TrimFront(hlen)
 	p := hdr.TransportProtocol()
-	e.dispatcher.DeliverTransportControlPacket(e.id.LocalAddress, hdr.DestinationAddress(), ProtocolNumber, p, typ, extra, pkt)
+	e.dispatcher.DeliverTransportControlPacket(src, hdr.DestinationAddress(), ProtocolNumber, p, typ, extra, pkt)
 }
 
 func (e *endpoint) handleICMP(r *stack.Route, pkt *stack.PacketBuffer) {
diff --git a/pkg/tcpip/network/ipv4/ipv4.go b/pkg/tcpip/network/ipv4/ipv4.go
index 3cd48ceb3b..79872ec9a0 100644
--- a/pkg/tcpip/network/ipv4/ipv4.go
+++ b/pkg/tcpip/network/ipv4/ipv4.go
@@ -52,8 +52,6 @@ const (
 
 type endpoint struct {
 	nicID      tcpip.NICID
-	id         stack.NetworkEndpointID
-	prefixLen  int
 	linkEP     stack.LinkEndpoint
 	dispatcher stack.TransportDispatcher
 	protocol   *protocol
@@ -61,18 +59,14 @@ type endpoint struct {
 }
 
 // NewEndpoint creates a new ipv4 endpoint.
-func (p *protocol) NewEndpoint(nicID tcpip.NICID, addrWithPrefix tcpip.AddressWithPrefix, linkAddrCache stack.LinkAddressCache, dispatcher stack.TransportDispatcher, linkEP stack.LinkEndpoint, st *stack.Stack) (stack.NetworkEndpoint, *tcpip.Error) {
-	e := &endpoint{
+func (p *protocol) NewEndpoint(nicID tcpip.NICID, linkAddrCache stack.LinkAddressCache, dispatcher stack.TransportDispatcher, linkEP stack.LinkEndpoint, st *stack.Stack) stack.NetworkEndpoint {
+	return &endpoint{
 		nicID:      nicID,
-		id:         stack.NetworkEndpointID{LocalAddress: addrWithPrefix.Address},
-		prefixLen:  addrWithPrefix.PrefixLen,
 		linkEP:     linkEP,
 		dispatcher: dispatcher,
 		protocol:   p,
 		stack:      st,
 	}
-
-	return e, nil
 }
 
 // DefaultTTL is the default time-to-live value for this endpoint.
@@ -96,16 +90,6 @@ func (e *endpoint) NICID() tcpip.NICID {
 	return e.nicID
 }
 
-// ID returns the ipv4 endpoint ID.
-func (e *endpoint) ID() *stack.NetworkEndpointID {
-	return &e.id
-}
-
-// PrefixLen returns the ipv4 endpoint subnet prefix length in bits.
-func (e *endpoint) PrefixLen() int {
-	return e.prefixLen
-}
-
 // MaxHeaderLength returns the maximum length needed by ipv4 headers (and
 // underlying protocols).
 func (e *endpoint) MaxHeaderLength() uint16 {
diff --git a/pkg/tcpip/network/ipv6/icmp.go b/pkg/tcpip/network/ipv6/icmp.go
index 39ae19295f..66d3a953a1 100644
--- a/pkg/tcpip/network/ipv6/icmp.go
+++ b/pkg/tcpip/network/ipv6/icmp.go
@@ -39,8 +39,9 @@ func (e *endpoint) handleControl(typ stack.ControlType, extra uint32, pkt *stack
 	// is truncated, which would cause IsValid to return false.
 	//
 	// Drop packet if it doesn't have the basic IPv6 header or if the
-	// original source address doesn't match the endpoint's address.
-	if hdr.SourceAddress() != e.id.LocalAddress {
+	// original source address doesn't match an address we own.
+	src := hdr.SourceAddress()
+	if e.stack.CheckLocalAddress(e.NICID(), ProtocolNumber, src) == 0 {
 		return
 	}
 
@@ -67,7 +68,7 @@ func (e *endpoint) handleControl(typ stack.ControlType, extra uint32, pkt *stack
 	}
 
 	// Deliver the control packet to the transport endpoint.
-	e.dispatcher.DeliverTransportControlPacket(e.id.LocalAddress, hdr.DestinationAddress(), ProtocolNumber, p, typ, extra, pkt)
+	e.dispatcher.DeliverTransportControlPacket(src, hdr.DestinationAddress(), ProtocolNumber, p, typ, extra, pkt)
 }
 
 func (e *endpoint) handleICMP(r *stack.Route, pkt *stack.PacketBuffer, hasFragmentHeader bool) {
diff --git a/pkg/tcpip/network/ipv6/icmp_test.go b/pkg/tcpip/network/ipv6/icmp_test.go
index 2a2f7de015..9e4eeea77f 100644
--- a/pkg/tcpip/network/ipv6/icmp_test.go
+++ b/pkg/tcpip/network/ipv6/icmp_test.go
@@ -114,10 +114,8 @@ func TestICMPCounts(t *testing.T) {
 	if netProto == nil {
 		t.Fatalf("cannot find protocol instance for network protocol %d", ProtocolNumber)
 	}
-	ep, err := netProto.NewEndpoint(0, tcpip.AddressWithPrefix{lladdr1, netProto.DefaultPrefixLen()}, &stubLinkAddressCache{}, &stubDispatcher{}, nil, s)
-	if err != nil {
-		t.Fatalf("NewEndpoint(_) = _, %s, want = _, nil", err)
-	}
+	ep := netProto.NewEndpoint(0, &stubLinkAddressCache{}, &stubDispatcher{}, nil, s)
+	defer ep.Close()
 
 	r, err := s.FindRoute(1, lladdr0, lladdr1, ProtocolNumber, false /* multicastLoop */)
 	if err != nil {
diff --git a/pkg/tcpip/network/ipv6/ipv6.go b/pkg/tcpip/network/ipv6/ipv6.go
index 0ade655b27..0eafe97900 100644
--- a/pkg/tcpip/network/ipv6/ipv6.go
+++ b/pkg/tcpip/network/ipv6/ipv6.go
@@ -46,12 +46,11 @@ const (
 
 type endpoint struct {
 	nicID         tcpip.NICID
-	id            stack.NetworkEndpointID
-	prefixLen     int
 	linkEP        stack.LinkEndpoint
 	linkAddrCache stack.LinkAddressCache
 	dispatcher    stack.TransportDispatcher
 	protocol      *protocol
+	stack         *stack.Stack
 }
 
 // DefaultTTL is the default hop limit for this endpoint.
@@ -70,16 +69,6 @@ func (e *endpoint) NICID() tcpip.NICID {
 	return e.nicID
 }
 
-// ID returns the ipv6 endpoint ID.
-func (e *endpoint) ID() *stack.NetworkEndpointID {
-	return &e.id
-}
-
-// PrefixLen returns the ipv6 endpoint subnet prefix length in bits.
-func (e *endpoint) PrefixLen() int {
-	return e.prefixLen
-}
-
 // Capabilities implements stack.NetworkEndpoint.Capabilities.
 func (e *endpoint) Capabilities() stack.LinkEndpointCapabilities {
 	return e.linkEP.Capabilities()
@@ -464,16 +453,15 @@ func (*protocol) ParseAddresses(v buffer.View) (src, dst tcpip.Address) {
 }
 
 // NewEndpoint creates a new ipv6 endpoint.
-func (p *protocol) NewEndpoint(nicID tcpip.NICID, addrWithPrefix tcpip.AddressWithPrefix, linkAddrCache stack.LinkAddressCache, dispatcher stack.TransportDispatcher, linkEP stack.LinkEndpoint, st *stack.Stack) (stack.NetworkEndpoint, *tcpip.Error) {
+func (p *protocol) NewEndpoint(nicID tcpip.NICID, linkAddrCache stack.LinkAddressCache, dispatcher stack.TransportDispatcher, linkEP stack.LinkEndpoint, st *stack.Stack) stack.NetworkEndpoint {
 	return &endpoint{
 		nicID:         nicID,
-		id:            stack.NetworkEndpointID{LocalAddress: addrWithPrefix.Address},
-		prefixLen:     addrWithPrefix.PrefixLen,
 		linkEP:        linkEP,
 		linkAddrCache: linkAddrCache,
 		dispatcher:    dispatcher,
 		protocol:      p,
-	}, nil
+		stack:         st,
+	}
 }
 
 // SetOption implements NetworkProtocol.SetOption.
diff --git a/pkg/tcpip/network/ipv6/ndp_test.go b/pkg/tcpip/network/ipv6/ndp_test.go
index 2efa82e60b..af71a7d6bf 100644
--- a/pkg/tcpip/network/ipv6/ndp_test.go
+++ b/pkg/tcpip/network/ipv6/ndp_test.go
@@ -63,10 +63,7 @@ func setupStackAndEndpoint(t *testing.T, llladdr, rlladdr tcpip.Address) (*stack
 		t.Fatalf("cannot find protocol instance for network protocol %d", ProtocolNumber)
 	}
 
-	ep, err := netProto.NewEndpoint(0, tcpip.AddressWithPrefix{rlladdr, netProto.DefaultPrefixLen()}, &stubLinkAddressCache{}, &stubDispatcher{}, nil, s)
-	if err != nil {
-		t.Fatalf("NewEndpoint(_) = _, %s, want = _, nil", err)
-	}
+	ep := netProto.NewEndpoint(0, &stubLinkAddressCache{}, &stubDispatcher{}, nil, s)
 
 	return s, ep
 }
diff --git a/pkg/tcpip/stack/forwarder_test.go b/pkg/tcpip/stack/forwarder_test.go
index 944f622fdf..5a684eb9df 100644
--- a/pkg/tcpip/stack/forwarder_test.go
+++ b/pkg/tcpip/stack/forwarder_test.go
@@ -46,8 +46,6 @@ const (
 // protocol. They're all one byte fields to simplify parsing.
 type fwdTestNetworkEndpoint struct {
 	nicID      tcpip.NICID
-	id         NetworkEndpointID
-	prefixLen  int
 	proto      *fwdTestNetworkProtocol
 	dispatcher TransportDispatcher
 	ep         LinkEndpoint
@@ -61,18 +59,10 @@ func (f *fwdTestNetworkEndpoint) NICID() tcpip.NICID {
 	return f.nicID
 }
 
-func (f *fwdTestNetworkEndpoint) PrefixLen() int {
-	return f.prefixLen
-}
-
 func (*fwdTestNetworkEndpoint) DefaultTTL() uint8 {
 	return 123
 }
 
-func (f *fwdTestNetworkEndpoint) ID() *NetworkEndpointID {
-	return &f.id
-}
-
 func (f *fwdTestNetworkEndpoint) HandlePacket(r *Route, pkt *PacketBuffer) {
 	// Dispatch the packet to the transport protocol.
 	f.dispatcher.DeliverTransportPacket(r, tcpip.TransportProtocolNumber(pkt.NetworkHeader().View()[protocolNumberOffset]), pkt)
@@ -99,7 +89,7 @@ func (f *fwdTestNetworkEndpoint) WritePacket(r *Route, gso *GSO, params NetworkH
 	// endpoint.
 	b := pkt.NetworkHeader().Push(fwdTestNetHeaderLen)
 	b[dstAddrOffset] = r.RemoteAddress[0]
-	b[srcAddrOffset] = f.id.LocalAddress[0]
+	b[srcAddrOffset] = r.LocalAddress[0]
 	b[protocolNumberOffset] = byte(params.Protocol)
 
 	return f.ep.WritePacket(r, gso, fwdTestNetNumber, pkt)
@@ -151,15 +141,13 @@ func (*fwdTestNetworkProtocol) Parse(pkt *PacketBuffer) (tcpip.TransportProtocol
 	return tcpip.TransportProtocolNumber(netHeader[protocolNumberOffset]), true, true
 }
 
-func (f *fwdTestNetworkProtocol) NewEndpoint(nicID tcpip.NICID, addrWithPrefix tcpip.AddressWithPrefix, linkAddrCache LinkAddressCache, dispatcher TransportDispatcher, ep LinkEndpoint, _ *Stack) (NetworkEndpoint, *tcpip.Error) {
+func (f *fwdTestNetworkProtocol) NewEndpoint(nicID tcpip.NICID, linkAddrCache LinkAddressCache, dispatcher TransportDispatcher, ep LinkEndpoint, _ *Stack) NetworkEndpoint {
 	return &fwdTestNetworkEndpoint{
 		nicID:      nicID,
-		id:         NetworkEndpointID{LocalAddress: addrWithPrefix.Address},
-		prefixLen:  addrWithPrefix.PrefixLen,
 		proto:      f,
 		dispatcher: dispatcher,
 		ep:         ep,
-	}, nil
+	}
 }
 
 func (f *fwdTestNetworkProtocol) SetOption(option interface{}) *tcpip.Error {
diff --git a/pkg/tcpip/stack/ndp.go b/pkg/tcpip/stack/ndp.go
index 93567806b5..b0873d1af7 100644
--- a/pkg/tcpip/stack/ndp.go
+++ b/pkg/tcpip/stack/ndp.go
@@ -728,7 +728,7 @@ func (ndp *ndpState) startDuplicateAddressDetection(addr tcpip.Address, ref *ref
 func (ndp *ndpState) sendDADPacket(addr tcpip.Address, ref *referencedNetworkEndpoint) *tcpip.Error {
 	snmc := header.SolicitedNodeAddr(addr)
 
-	r := makeRoute(header.IPv6ProtocolNumber, ref.ep.ID().LocalAddress, snmc, ndp.nic.linkEP.LinkAddress(), ref, false, false)
+	r := makeRoute(header.IPv6ProtocolNumber, ref.address(), snmc, ndp.nic.linkEP.LinkAddress(), ref, false, false)
 	defer r.Release()
 
 	// Route should resolve immediately since snmc is a multicast address so a
@@ -1353,7 +1353,7 @@ func (ndp *ndpState) generateTempSLAACAddr(prefix tcpip.Subnet, prefixState *sla
 		return false
 	}
 
-	stableAddr := prefixState.stableAddr.ref.ep.ID().LocalAddress
+	stableAddr := prefixState.stableAddr.ref.address()
 	now := time.Now()
 
 	// As per RFC 4941 section 3.3 step 4, the valid lifetime of a temporary
@@ -1690,7 +1690,7 @@ func (ndp *ndpState) cleanupSLAACAddrResourcesAndNotify(addr tcpip.AddressWithPr
 
 	prefix := addr.Subnet()
 	state, ok := ndp.slaacPrefixes[prefix]
-	if !ok || state.stableAddr.ref == nil || addr.Address != state.stableAddr.ref.ep.ID().LocalAddress {
+	if !ok || state.stableAddr.ref == nil || addr.Address != state.stableAddr.ref.address() {
 		return
 	}
 
@@ -1867,7 +1867,7 @@ func (ndp *ndpState) startSolicitingRouters() {
 		}
 		ndp.nic.mu.Unlock()
 
-		localAddr := ref.ep.ID().LocalAddress
+		localAddr := ref.address()
 		r := makeRoute(header.IPv6ProtocolNumber, localAddr, header.IPv6AllRoutersMulticastAddress, ndp.nic.linkEP.LinkAddress(), ref, false, false)
 		defer r.Release()
 
diff --git a/pkg/tcpip/stack/nic.go b/pkg/tcpip/stack/nic.go
index 2315ea5b92..10d2b7964c 100644
--- a/pkg/tcpip/stack/nic.go
+++ b/pkg/tcpip/stack/nic.go
@@ -45,8 +45,9 @@ type NIC struct {
 	linkEP  LinkEndpoint
 	context NICContext
 
-	stats NICStats
-	neigh *neighborCache
+	stats            NICStats
+	neigh            *neighborCache
+	networkEndpoints map[tcpip.NetworkProtocolNumber]NetworkEndpoint
 
 	mu struct {
 		sync.RWMutex
@@ -114,12 +115,13 @@ func newNIC(stack *Stack, id tcpip.NICID, name string, ep LinkEndpoint, ctx NICC
 	// of IPv6 is supported on this endpoint's LinkEndpoint.
 
 	nic := &NIC{
-		stack:   stack,
-		id:      id,
-		name:    name,
-		linkEP:  ep,
-		context: ctx,
-		stats:   makeNICStats(),
+		stack:            stack,
+		id:               id,
+		name:             name,
+		linkEP:           ep,
+		context:          ctx,
+		stats:            makeNICStats(),
+		networkEndpoints: make(map[tcpip.NetworkProtocolNumber]NetworkEndpoint),
 	}
 	nic.mu.primary = make(map[tcpip.NetworkProtocolNumber][]*referencedNetworkEndpoint)
 	nic.mu.endpoints = make(map[NetworkEndpointID]*referencedNetworkEndpoint)
@@ -140,7 +142,9 @@ func newNIC(stack *Stack, id tcpip.NICID, name string, ep LinkEndpoint, ctx NICC
 		nic.mu.packetEPs[netProto] = []PacketEndpoint{}
 	}
 	for _, netProto := range stack.networkProtocols {
-		nic.mu.packetEPs[netProto.Number()] = []PacketEndpoint{}
+		netNum := netProto.Number()
+		nic.mu.packetEPs[netNum] = nil
+		nic.networkEndpoints[netNum] = netProto.NewEndpoint(id, stack, nic, ep, stack)
 	}
 
 	// Check for Neighbor Unreachability Detection support.
@@ -205,7 +209,7 @@ func (n *NIC) disableLocked() *tcpip.Error {
 		// Stop DAD for all the unicast IPv6 endpoints that are in the
 		// permanentTentative state.
 		for _, r := range n.mu.endpoints {
-			if addr := r.ep.ID().LocalAddress; r.getKind() == permanentTentative && header.IsV6UnicastAddress(addr) {
+			if addr := r.address(); r.getKind() == permanentTentative && header.IsV6UnicastAddress(addr) {
 				n.mu.ndp.stopDuplicateAddressDetection(addr)
 			}
 		}
@@ -300,7 +304,7 @@ func (n *NIC) enable() *tcpip.Error {
 	// Addresses may have aleady completed DAD but in the time since the NIC was
 	// last enabled, other devices may have acquired the same addresses.
 	for _, r := range n.mu.endpoints {
-		addr := r.ep.ID().LocalAddress
+		addr := r.address()
 		if k := r.getKind(); (k != permanent && k != permanentTentative) || !header.IsV6UnicastAddress(addr) {
 			continue
 		}
@@ -362,6 +366,11 @@ func (n *NIC) remove() *tcpip.Error {
 		}
 	}
 
+	// Release any resources the network endpoint may hold.
+	for _, ep := range n.networkEndpoints {
+		ep.Close()
+	}
+
 	// Detach from link endpoint, so no packet comes in.
 	n.linkEP.Attach(nil)
 
@@ -510,7 +519,7 @@ func (n *NIC) primaryIPv6EndpointRLocked(remoteAddr tcpip.Address) *referencedNe
 			continue
 		}
 
-		addr := r.ep.ID().LocalAddress
+		addr := r.address()
 		scope, err := header.ScopeForIPv6Address(addr)
 		if err != nil {
 			// Should never happen as we got r from the primary IPv6 endpoint list and
@@ -539,10 +548,10 @@ func (n *NIC) primaryIPv6EndpointRLocked(remoteAddr tcpip.Address) *referencedNe
 		sb := cs[j]
 
 		// Prefer same address as per RFC 6724 section 5 rule 1.
-		if sa.ref.ep.ID().LocalAddress == remoteAddr {
+		if sa.ref.address() == remoteAddr {
 			return true
 		}
-		if sb.ref.ep.ID().LocalAddress == remoteAddr {
+		if sb.ref.address() == remoteAddr {
 			return false
 		}
 
@@ -819,17 +828,11 @@ func (n *NIC) addAddressLocked(protocolAddress tcpip.ProtocolAddress, peb Primar
 		}
 	}
 
-	netProto, ok := n.stack.networkProtocols[protocolAddress.Protocol]
+	ep, ok := n.networkEndpoints[protocolAddress.Protocol]
 	if !ok {
 		return nil, tcpip.ErrUnknownProtocol
 	}
 
-	// Create the new network endpoint.
-	ep, err := netProto.NewEndpoint(n.id, protocolAddress.AddressWithPrefix, n.stack, n, n.linkEP, n.stack)
-	if err != nil {
-		return nil, err
-	}
-
 	isIPv6Unicast := protocolAddress.Protocol == header.IPv6ProtocolNumber && header.IsV6UnicastAddress(protocolAddress.AddressWithPrefix.Address)
 
 	// If the address is an IPv6 address and it is a permanent address,
@@ -842,6 +845,7 @@ func (n *NIC) addAddressLocked(protocolAddress tcpip.ProtocolAddress, peb Primar
 
 	ref := &referencedNetworkEndpoint{
 		refs:       1,
+		addr:       protocolAddress.AddressWithPrefix,
 		ep:         ep,
 		nic:        n,
 		protocol:   protocolAddress.Protocol,
@@ -898,7 +902,7 @@ func (n *NIC) AllAddresses() []tcpip.ProtocolAddress {
 	defer n.mu.RUnlock()
 
 	addrs := make([]tcpip.ProtocolAddress, 0, len(n.mu.endpoints))
-	for nid, ref := range n.mu.endpoints {
+	for _, ref := range n.mu.endpoints {
 		// Don't include tentative, expired or temporary endpoints to
 		// avoid confusion and prevent the caller from using those.
 		switch ref.getKind() {
@@ -907,11 +911,8 @@ func (n *NIC) AllAddresses() []tcpip.ProtocolAddress {
 		}
 
 		addrs = append(addrs, tcpip.ProtocolAddress{
-			Protocol: ref.protocol,
-			AddressWithPrefix: tcpip.AddressWithPrefix{
-				Address:   nid.LocalAddress,
-				PrefixLen: ref.ep.PrefixLen(),
-			},
+			Protocol:          ref.protocol,
+			AddressWithPrefix: ref.addrWithPrefix(),
 		})
 	}
 	return addrs
@@ -934,11 +935,8 @@ func (n *NIC) PrimaryAddresses() []tcpip.ProtocolAddress {
 			}
 
 			addrs = append(addrs, tcpip.ProtocolAddress{
-				Protocol: proto,
-				AddressWithPrefix: tcpip.AddressWithPrefix{
-					Address:   ref.ep.ID().LocalAddress,
-					PrefixLen: ref.ep.PrefixLen(),
-				},
+				Protocol:          proto,
+				AddressWithPrefix: ref.addrWithPrefix(),
 			})
 		}
 	}
@@ -969,10 +967,7 @@ func (n *NIC) primaryAddress(proto tcpip.NetworkProtocolNumber) tcpip.AddressWit
 		}
 
 		if !ref.deprecated {
-			return tcpip.AddressWithPrefix{
-				Address:   ref.ep.ID().LocalAddress,
-				PrefixLen: ref.ep.PrefixLen(),
-			}
+			return ref.addrWithPrefix()
 		}
 
 		if deprecatedEndpoint == nil {
@@ -981,10 +976,7 @@ func (n *NIC) primaryAddress(proto tcpip.NetworkProtocolNumber) tcpip.AddressWit
 	}
 
 	if deprecatedEndpoint != nil {
-		return tcpip.AddressWithPrefix{
-			Address:   deprecatedEndpoint.ep.ID().LocalAddress,
-			PrefixLen: deprecatedEndpoint.ep.PrefixLen(),
-		}
+		return deprecatedEndpoint.addrWithPrefix()
 	}
 
 	return tcpip.AddressWithPrefix{}
@@ -1048,7 +1040,7 @@ func (n *NIC) insertPrimaryEndpointLocked(r *referencedNetworkEndpoint, peb Prim
 }
 
 func (n *NIC) removeEndpointLocked(r *referencedNetworkEndpoint) {
-	id := *r.ep.ID()
+	id := NetworkEndpointID{LocalAddress: r.address()}
 
 	// Nothing to do if the reference has already been replaced with a different
 	// one. This happens in the case where 1) this endpoint's ref count hit zero
@@ -1072,8 +1064,6 @@ func (n *NIC) removeEndpointLocked(r *referencedNetworkEndpoint) {
 			break
 		}
 	}
-
-	r.ep.Close()
 }
 
 func (n *NIC) removeEndpoint(r *referencedNetworkEndpoint) {
@@ -1718,6 +1708,7 @@ const (
 
 type referencedNetworkEndpoint struct {
 	ep       NetworkEndpoint
+	addr     tcpip.AddressWithPrefix
 	nic      *NIC
 	protocol tcpip.NetworkProtocolNumber
 
@@ -1743,11 +1734,12 @@ type referencedNetworkEndpoint struct {
 	deprecated bool
 }
 
+func (r *referencedNetworkEndpoint) address() tcpip.Address {
+	return r.addr.Address
+}
+
 func (r *referencedNetworkEndpoint) addrWithPrefix() tcpip.AddressWithPrefix {
-	return tcpip.AddressWithPrefix{
-		Address:   r.ep.ID().LocalAddress,
-		PrefixLen: r.ep.PrefixLen(),
-	}
+	return r.addr
 }
 
 func (r *referencedNetworkEndpoint) getKind() networkEndpointKind {
diff --git a/pkg/tcpip/stack/nic_test.go b/pkg/tcpip/stack/nic_test.go
index 0870c8d9c2..d312a79eb2 100644
--- a/pkg/tcpip/stack/nic_test.go
+++ b/pkg/tcpip/stack/nic_test.go
@@ -101,11 +101,9 @@ var _ NetworkEndpoint = (*testIPv6Endpoint)(nil)
 // We use this instead of ipv6.endpoint because the ipv6 package depends on
 // the stack package which this test lives in, causing a cyclic dependency.
 type testIPv6Endpoint struct {
-	nicID     tcpip.NICID
-	id        NetworkEndpointID
-	prefixLen int
-	linkEP    LinkEndpoint
-	protocol  *testIPv6Protocol
+	nicID    tcpip.NICID
+	linkEP   LinkEndpoint
+	protocol *testIPv6Protocol
 }
 
 // DefaultTTL implements NetworkEndpoint.DefaultTTL.
@@ -146,16 +144,6 @@ func (*testIPv6Endpoint) WriteHeaderIncludedPacket(*Route, *PacketBuffer) *tcpip
 	return tcpip.ErrNotSupported
 }
 
-// ID implements NetworkEndpoint.ID.
-func (e *testIPv6Endpoint) ID() *NetworkEndpointID {
-	return &e.id
-}
-
-// PrefixLen implements NetworkEndpoint.PrefixLen.
-func (e *testIPv6Endpoint) PrefixLen() int {
-	return e.prefixLen
-}
-
 // NICID implements NetworkEndpoint.NICID.
 func (e *testIPv6Endpoint) NICID() tcpip.NICID {
 	return e.nicID
@@ -204,14 +192,12 @@ func (*testIPv6Protocol) ParseAddresses(v buffer.View) (src, dst tcpip.Address)
 }
 
 // NewEndpoint implements NetworkProtocol.NewEndpoint.
-func (p *testIPv6Protocol) NewEndpoint(nicID tcpip.NICID, addrWithPrefix tcpip.AddressWithPrefix, _ LinkAddressCache, _ TransportDispatcher, linkEP LinkEndpoint, _ *Stack) (NetworkEndpoint, *tcpip.Error) {
+func (p *testIPv6Protocol) NewEndpoint(nicID tcpip.NICID, _ LinkAddressCache, _ TransportDispatcher, linkEP LinkEndpoint, _ *Stack) NetworkEndpoint {
 	return &testIPv6Endpoint{
-		nicID:     nicID,
-		id:        NetworkEndpointID{LocalAddress: addrWithPrefix.Address},
-		prefixLen: addrWithPrefix.PrefixLen,
-		linkEP:    linkEP,
-		protocol:  p,
-	}, nil
+		nicID:    nicID,
+		linkEP:   linkEP,
+		protocol: p,
+	}
 }
 
 // SetOption implements NetworkProtocol.SetOption.
diff --git a/pkg/tcpip/stack/registration.go b/pkg/tcpip/stack/registration.go
index 4570e89691..aca2f77f89 100644
--- a/pkg/tcpip/stack/registration.go
+++ b/pkg/tcpip/stack/registration.go
@@ -262,12 +262,6 @@ type NetworkEndpoint interface {
 	// header to the given destination address. It takes ownership of pkt.
 	WriteHeaderIncludedPacket(r *Route, pkt *PacketBuffer) *tcpip.Error
 
-	// ID returns the network protocol endpoint ID.
-	ID() *NetworkEndpointID
-
-	// PrefixLen returns the network endpoint's subnet prefix length in bits.
-	PrefixLen() int
-
 	// NICID returns the id of the NIC this endpoint belongs to.
 	NICID() tcpip.NICID
 
@@ -304,7 +298,7 @@ type NetworkProtocol interface {
 	ParseAddresses(v buffer.View) (src, dst tcpip.Address)
 
 	// NewEndpoint creates a new endpoint of this protocol.
-	NewEndpoint(nicID tcpip.NICID, addrWithPrefix tcpip.AddressWithPrefix, linkAddrCache LinkAddressCache, dispatcher TransportDispatcher, sender LinkEndpoint, st *Stack) (NetworkEndpoint, *tcpip.Error)
+	NewEndpoint(nicID tcpip.NICID, linkAddrCache LinkAddressCache, dispatcher TransportDispatcher, sender LinkEndpoint, st *Stack) NetworkEndpoint
 
 	// SetOption allows enabling/disabling protocol specific features.
 	// SetOption returns an error if the option is not supported or the
diff --git a/pkg/tcpip/stack/stack.go b/pkg/tcpip/stack/stack.go
index 5b19c5d59f..9a1c8e4091 100644
--- a/pkg/tcpip/stack/stack.go
+++ b/pkg/tcpip/stack/stack.go
@@ -1321,7 +1321,7 @@ func (s *Stack) FindRoute(id tcpip.NICID, localAddr, remoteAddr tcpip.Address, n
 	if id != 0 && !needRoute {
 		if nic, ok := s.nics[id]; ok && nic.enabled() {
 			if ref := s.getRefEP(nic, localAddr, remoteAddr, netProto); ref != nil {
-				return makeRoute(netProto, ref.ep.ID().LocalAddress, remoteAddr, nic.linkEP.LinkAddress(), ref, s.handleLocal && !nic.isLoopback(), multicastLoop && !nic.isLoopback()), nil
+				return makeRoute(netProto, ref.address(), remoteAddr, nic.linkEP.LinkAddress(), ref, s.handleLocal && !nic.isLoopback(), multicastLoop && !nic.isLoopback()), nil
 			}
 		}
 	} else {
@@ -1334,10 +1334,10 @@ func (s *Stack) FindRoute(id tcpip.NICID, localAddr, remoteAddr tcpip.Address, n
 					if len(remoteAddr) == 0 {
 						// If no remote address was provided, then the route
 						// provided will refer to the link local address.
-						remoteAddr = ref.ep.ID().LocalAddress
+						remoteAddr = ref.address()
 					}
 
-					r := makeRoute(netProto, ref.ep.ID().LocalAddress, remoteAddr, nic.linkEP.LinkAddress(), ref, s.handleLocal && !nic.isLoopback(), multicastLoop && !nic.isLoopback())
+					r := makeRoute(netProto, ref.address(), remoteAddr, nic.linkEP.LinkAddress(), ref, s.handleLocal && !nic.isLoopback(), multicastLoop && !nic.isLoopback())
 					r.directedBroadcast = route.Destination.IsBroadcast(remoteAddr)
 
 					if len(route.Gateway) > 0 {
diff --git a/pkg/tcpip/stack/stack_test.go b/pkg/tcpip/stack/stack_test.go
index 0273b3c637..b5a603098a 100644
--- a/pkg/tcpip/stack/stack_test.go
+++ b/pkg/tcpip/stack/stack_test.go
@@ -70,8 +70,6 @@ const (
 // protocol. They're all one byte fields to simplify parsing.
 type fakeNetworkEndpoint struct {
 	nicID      tcpip.NICID
-	id         stack.NetworkEndpointID
-	prefixLen  int
 	proto      *fakeNetworkProtocol
 	dispatcher stack.TransportDispatcher
 	ep         stack.LinkEndpoint
@@ -85,21 +83,13 @@ func (f *fakeNetworkEndpoint) NICID() tcpip.NICID {
 	return f.nicID
 }
 
-func (f *fakeNetworkEndpoint) PrefixLen() int {
-	return f.prefixLen
-}
-
 func (*fakeNetworkEndpoint) DefaultTTL() uint8 {
 	return 123
 }
 
-func (f *fakeNetworkEndpoint) ID() *stack.NetworkEndpointID {
-	return &f.id
-}
-
 func (f *fakeNetworkEndpoint) HandlePacket(r *stack.Route, pkt *stack.PacketBuffer) {
 	// Increment the received packet count in the protocol descriptor.
-	f.proto.packetCount[int(f.id.LocalAddress[0])%len(f.proto.packetCount)]++
+	f.proto.packetCount[int(r.LocalAddress[0])%len(f.proto.packetCount)]++
 
 	// Handle control packets.
 	if pkt.NetworkHeader().View()[protocolNumberOffset] == uint8(fakeControlProtocol) {
@@ -145,7 +135,7 @@ func (f *fakeNetworkEndpoint) WritePacket(r *stack.Route, gso *stack.GSO, params
 	// endpoint.
 	hdr := pkt.NetworkHeader().Push(fakeNetHeaderLen)
 	hdr[dstAddrOffset] = r.RemoteAddress[0]
-	hdr[srcAddrOffset] = f.id.LocalAddress[0]
+	hdr[srcAddrOffset] = r.LocalAddress[0]
 	hdr[protocolNumberOffset] = byte(params.Protocol)
 
 	if r.Loop&stack.PacketLoop != 0 {
@@ -208,15 +198,13 @@ func (*fakeNetworkProtocol) ParseAddresses(v buffer.View) (src, dst tcpip.Addres
 	return tcpip.Address(v[srcAddrOffset : srcAddrOffset+1]), tcpip.Address(v[dstAddrOffset : dstAddrOffset+1])
 }
 
-func (f *fakeNetworkProtocol) NewEndpoint(nicID tcpip.NICID, addrWithPrefix tcpip.AddressWithPrefix, linkAddrCache stack.LinkAddressCache, dispatcher stack.TransportDispatcher, ep stack.LinkEndpoint, _ *stack.Stack) (stack.NetworkEndpoint, *tcpip.Error) {
+func (f *fakeNetworkProtocol) NewEndpoint(nicID tcpip.NICID, linkAddrCache stack.LinkAddressCache, dispatcher stack.TransportDispatcher, ep stack.LinkEndpoint, _ *stack.Stack) stack.NetworkEndpoint {
 	return &fakeNetworkEndpoint{
 		nicID:      nicID,
-		id:         stack.NetworkEndpointID{LocalAddress: addrWithPrefix.Address},
-		prefixLen:  addrWithPrefix.PrefixLen,
 		proto:      f,
 		dispatcher: dispatcher,
 		ep:         ep,
-	}, nil
+	}
 }
 
 func (f *fakeNetworkProtocol) SetOption(option interface{}) *tcpip.Error {
diff --git a/pkg/tcpip/transport/udp/udp_test.go b/pkg/tcpip/transport/udp/udp_test.go
index 71776d6dbc..f87d99d5a4 100644
--- a/pkg/tcpip/transport/udp/udp_test.go
+++ b/pkg/tcpip/transport/udp/udp_test.go
@@ -1469,13 +1469,10 @@ func TestTTL(t *testing.T) {
 				} else {
 					p = ipv6.NewProtocol()
 				}
-				ep, err := p.NewEndpoint(0, tcpip.AddressWithPrefix{}, nil, nil, nil, stack.New(stack.Options{
+				ep := p.NewEndpoint(0, nil, nil, nil, stack.New(stack.Options{
 					NetworkProtocols:   []stack.NetworkProtocol{ipv4.NewProtocol(), ipv6.NewProtocol()},
 					TransportProtocols: []stack.TransportProtocol{udp.NewProtocol()},
 				}))
-				if err != nil {
-					t.Fatal(err)
-				}
 				wantTTL = ep.DefaultTTL()
 				ep.Close()
 			}
@@ -1505,13 +1502,10 @@ func TestSetTTL(t *testing.T) {
 					} else {
 						p = ipv6.NewProtocol()
 					}
-					ep, err := p.NewEndpoint(0, tcpip.AddressWithPrefix{}, nil, nil, nil, stack.New(stack.Options{
+					ep := p.NewEndpoint(0, nil, nil, nil, stack.New(stack.Options{
 						NetworkProtocols:   []stack.NetworkProtocol{ipv4.NewProtocol(), ipv6.NewProtocol()},
 						TransportProtocols: []stack.TransportProtocol{udp.NewProtocol()},
 					}))
-					if err != nil {
-						t.Fatal(err)
-					}
 					ep.Close()
 
 					testWrite(c, flow, checker.TTL(wantTTL))

From 703b0d0b47641bcee80402eb7b6cf9b8c1f2cf70 Mon Sep 17 00:00:00 2001
From: Ghanan Gowripalan <ghanan@google.com>
Date: Sat, 15 Aug 2020 00:04:30 -0700
Subject: [PATCH 003/211] Don't support address ranges

Previously the netstack supported assignment of a range of addresses.
This feature is not used so remove it.

PiperOrigin-RevId: 326791119
---
 pkg/tcpip/stack/nic.go        |  63 ++---------
 pkg/tcpip/stack/stack.go      |  29 -----
 pkg/tcpip/stack/stack_test.go | 194 ----------------------------------
 3 files changed, 8 insertions(+), 278 deletions(-)

diff --git a/pkg/tcpip/stack/nic.go b/pkg/tcpip/stack/nic.go
index 10d2b7964c..8a9a085f04 100644
--- a/pkg/tcpip/stack/nic.go
+++ b/pkg/tcpip/stack/nic.go
@@ -51,13 +51,12 @@ type NIC struct {
 
 	mu struct {
 		sync.RWMutex
-		enabled       bool
-		spoofing      bool
-		promiscuous   bool
-		primary       map[tcpip.NetworkProtocolNumber][]*referencedNetworkEndpoint
-		endpoints     map[NetworkEndpointID]*referencedNetworkEndpoint
-		addressRanges []tcpip.Subnet
-		mcastJoins    map[NetworkEndpointID]uint32
+		enabled     bool
+		spoofing    bool
+		promiscuous bool
+		primary     map[tcpip.NetworkProtocolNumber][]*referencedNetworkEndpoint
+		endpoints   map[NetworkEndpointID]*referencedNetworkEndpoint
+		mcastJoins  map[NetworkEndpointID]uint32
 		// packetEPs is protected by mu, but the contained PacketEndpoint
 		// values are not.
 		packetEPs map[tcpip.NetworkProtocolNumber][]PacketEndpoint
@@ -670,25 +669,6 @@ func (n *NIC) getRefOrCreateTemp(protocol tcpip.NetworkProtocolNumber, address t
 	// A usable reference was not found, create a temporary one if requested by
 	// the caller or if the address is found in the NIC's subnets.
 	createTempEP := spoofingOrPromiscuous
-	if !createTempEP {
-		for _, sn := range n.mu.addressRanges {
-			// Skip the subnet address.
-			if address == sn.ID() {
-				continue
-			}
-			// For now just skip the broadcast address, until we support it.
-			// FIXME(b/137608825): Add support for sending/receiving directed
-			// (subnet) broadcast.
-			if address == sn.Broadcast() {
-				continue
-			}
-			if sn.Contains(address) {
-				createTempEP = true
-				break
-			}
-		}
-	}
-
 	n.mu.RUnlock()
 
 	if !createTempEP {
@@ -982,38 +962,11 @@ func (n *NIC) primaryAddress(proto tcpip.NetworkProtocolNumber) tcpip.AddressWit
 	return tcpip.AddressWithPrefix{}
 }
 
-// AddAddressRange adds a range of addresses to n, so that it starts accepting
-// packets targeted at the given addresses and network protocol. The range is
-// given by a subnet address, and all addresses contained in the subnet are
-// used except for the subnet address itself and the subnet's broadcast
-// address.
-func (n *NIC) AddAddressRange(protocol tcpip.NetworkProtocolNumber, subnet tcpip.Subnet) {
-	n.mu.Lock()
-	n.mu.addressRanges = append(n.mu.addressRanges, subnet)
-	n.mu.Unlock()
-}
-
-// RemoveAddressRange removes the given address range from n.
-func (n *NIC) RemoveAddressRange(subnet tcpip.Subnet) {
-	n.mu.Lock()
-
-	// Use the same underlying array.
-	tmp := n.mu.addressRanges[:0]
-	for _, sub := range n.mu.addressRanges {
-		if sub != subnet {
-			tmp = append(tmp, sub)
-		}
-	}
-	n.mu.addressRanges = tmp
-
-	n.mu.Unlock()
-}
-
 // AddressRanges returns the Subnets associated with this NIC.
 func (n *NIC) AddressRanges() []tcpip.Subnet {
 	n.mu.RLock()
 	defer n.mu.RUnlock()
-	sns := make([]tcpip.Subnet, 0, len(n.mu.addressRanges)+len(n.mu.endpoints))
+	sns := make([]tcpip.Subnet, 0, len(n.mu.endpoints))
 	for nid := range n.mu.endpoints {
 		sn, err := tcpip.NewSubnet(nid.LocalAddress, tcpip.AddressMask(strings.Repeat("\xff", len(nid.LocalAddress))))
 		if err != nil {
@@ -1023,7 +976,7 @@ func (n *NIC) AddressRanges() []tcpip.Subnet {
 		}
 		sns = append(sns, sn)
 	}
-	return append(sns, n.mu.addressRanges...)
+	return sns
 }
 
 // insertPrimaryEndpointLocked adds r to n's primary endpoint list as required
diff --git a/pkg/tcpip/stack/stack.go b/pkg/tcpip/stack/stack.go
index 9a1c8e4091..ae44cd5da2 100644
--- a/pkg/tcpip/stack/stack.go
+++ b/pkg/tcpip/stack/stack.go
@@ -1230,35 +1230,6 @@ func (s *Stack) AddProtocolAddressWithOptions(id tcpip.NICID, protocolAddress tc
 	return nic.AddAddress(protocolAddress, peb)
 }
 
-// AddAddressRange adds a range of addresses to the specified NIC. The range is
-// given by a subnet address, and all addresses contained in the subnet are
-// used except for the subnet address itself and the subnet's broadcast
-// address.
-func (s *Stack) AddAddressRange(id tcpip.NICID, protocol tcpip.NetworkProtocolNumber, subnet tcpip.Subnet) *tcpip.Error {
-	s.mu.RLock()
-	defer s.mu.RUnlock()
-
-	if nic, ok := s.nics[id]; ok {
-		nic.AddAddressRange(protocol, subnet)
-		return nil
-	}
-
-	return tcpip.ErrUnknownNICID
-}
-
-// RemoveAddressRange removes the range of addresses from the specified NIC.
-func (s *Stack) RemoveAddressRange(id tcpip.NICID, subnet tcpip.Subnet) *tcpip.Error {
-	s.mu.RLock()
-	defer s.mu.RUnlock()
-
-	if nic, ok := s.nics[id]; ok {
-		nic.RemoveAddressRange(subnet)
-		return nil
-	}
-
-	return tcpip.ErrUnknownNICID
-}
-
 // RemoveAddress removes an existing network-layer address from the specified
 // NIC.
 func (s *Stack) RemoveAddress(id tcpip.NICID, addr tcpip.Address) *tcpip.Error {
diff --git a/pkg/tcpip/stack/stack_test.go b/pkg/tcpip/stack/stack_test.go
index b5a603098a..106645c50b 100644
--- a/pkg/tcpip/stack/stack_test.go
+++ b/pkg/tcpip/stack/stack_test.go
@@ -23,7 +23,6 @@ import (
 	"math"
 	"net"
 	"sort"
-	"strings"
 	"testing"
 	"time"
 
@@ -1641,149 +1640,6 @@ func TestMulticastOrIPv6LinkLocalNeedsNoRoute(t *testing.T) {
 	}
 }
 
-// Add a range of addresses, then check that a packet is delivered.
-func TestAddressRangeAcceptsMatchingPacket(t *testing.T) {
-	s := stack.New(stack.Options{
-		NetworkProtocols: []stack.NetworkProtocol{fakeNetFactory()},
-	})
-
-	ep := channel.New(10, defaultMTU, "")
-	if err := s.CreateNIC(1, ep); err != nil {
-		t.Fatal("CreateNIC failed:", err)
-	}
-
-	{
-		subnet, err := tcpip.NewSubnet("\x00", "\x00")
-		if err != nil {
-			t.Fatal(err)
-		}
-		s.SetRouteTable([]tcpip.Route{{Destination: subnet, Gateway: "\x00", NIC: 1}})
-	}
-
-	fakeNet := s.NetworkProtocolInstance(fakeNetNumber).(*fakeNetworkProtocol)
-
-	buf := buffer.NewView(30)
-
-	const localAddrByte byte = 0x01
-	buf[dstAddrOffset] = localAddrByte
-	subnet, err := tcpip.NewSubnet(tcpip.Address("\x00"), tcpip.AddressMask("\xF0"))
-	if err != nil {
-		t.Fatal("NewSubnet failed:", err)
-	}
-	if err := s.AddAddressRange(1, fakeNetNumber, subnet); err != nil {
-		t.Fatal("AddAddressRange failed:", err)
-	}
-
-	testRecv(t, fakeNet, localAddrByte, ep, buf)
-}
-
-func testNicForAddressRange(t *testing.T, nicID tcpip.NICID, s *stack.Stack, subnet tcpip.Subnet, rangeExists bool) {
-	t.Helper()
-
-	// Loop over all addresses and check them.
-	numOfAddresses := 1 << uint(8-subnet.Prefix())
-	if numOfAddresses < 1 || numOfAddresses > 255 {
-		t.Fatalf("got numOfAddresses = %d, want = [1 .. 255] (subnet=%s)", numOfAddresses, subnet)
-	}
-
-	addrBytes := []byte(subnet.ID())
-	for i := 0; i < numOfAddresses; i++ {
-		addr := tcpip.Address(addrBytes)
-		wantNicID := nicID
-		// The subnet and broadcast addresses are skipped.
-		if !rangeExists || addr == subnet.ID() || addr == subnet.Broadcast() {
-			wantNicID = 0
-		}
-		if gotNicID := s.CheckLocalAddress(0, fakeNetNumber, addr); gotNicID != wantNicID {
-			t.Errorf("got CheckLocalAddress(0, %d, %s) = %d, want = %d", fakeNetNumber, addr, gotNicID, wantNicID)
-		}
-		addrBytes[0]++
-	}
-
-	// Trying the next address should always fail since it is outside the range.
-	if gotNicID := s.CheckLocalAddress(0, fakeNetNumber, tcpip.Address(addrBytes)); gotNicID != 0 {
-		t.Errorf("got CheckLocalAddress(0, %d, %s) = %d, want = 0", fakeNetNumber, tcpip.Address(addrBytes), gotNicID)
-	}
-}
-
-// Set a range of addresses, then remove it again, and check at each step that
-// CheckLocalAddress returns the correct NIC for each address or zero if not
-// existent.
-func TestCheckLocalAddressForSubnet(t *testing.T) {
-	const nicID tcpip.NICID = 1
-	s := stack.New(stack.Options{
-		NetworkProtocols: []stack.NetworkProtocol{fakeNetFactory()},
-	})
-
-	ep := channel.New(10, defaultMTU, "")
-	if err := s.CreateNIC(nicID, ep); err != nil {
-		t.Fatal("CreateNIC failed:", err)
-	}
-
-	{
-		subnet, err := tcpip.NewSubnet("\x00", "\x00")
-		if err != nil {
-			t.Fatal(err)
-		}
-		s.SetRouteTable([]tcpip.Route{{Destination: subnet, Gateway: "\x00", NIC: nicID}})
-	}
-
-	subnet, err := tcpip.NewSubnet(tcpip.Address("\xa0"), tcpip.AddressMask("\xf0"))
-	if err != nil {
-		t.Fatal("NewSubnet failed:", err)
-	}
-
-	testNicForAddressRange(t, nicID, s, subnet, false /* rangeExists */)
-
-	if err := s.AddAddressRange(nicID, fakeNetNumber, subnet); err != nil {
-		t.Fatal("AddAddressRange failed:", err)
-	}
-
-	testNicForAddressRange(t, nicID, s, subnet, true /* rangeExists */)
-
-	if err := s.RemoveAddressRange(nicID, subnet); err != nil {
-		t.Fatal("RemoveAddressRange failed:", err)
-	}
-
-	testNicForAddressRange(t, nicID, s, subnet, false /* rangeExists */)
-}
-
-// Set a range of addresses, then send a packet to a destination outside the
-// range and then check it doesn't get delivered.
-func TestAddressRangeRejectsNonmatchingPacket(t *testing.T) {
-	s := stack.New(stack.Options{
-		NetworkProtocols: []stack.NetworkProtocol{fakeNetFactory()},
-	})
-
-	ep := channel.New(10, defaultMTU, "")
-	if err := s.CreateNIC(1, ep); err != nil {
-		t.Fatal("CreateNIC failed:", err)
-	}
-
-	{
-		subnet, err := tcpip.NewSubnet("\x00", "\x00")
-		if err != nil {
-			t.Fatal(err)
-		}
-		s.SetRouteTable([]tcpip.Route{{Destination: subnet, Gateway: "\x00", NIC: 1}})
-	}
-
-	fakeNet := s.NetworkProtocolInstance(fakeNetNumber).(*fakeNetworkProtocol)
-
-	buf := buffer.NewView(30)
-
-	const localAddrByte byte = 0x01
-	buf[dstAddrOffset] = localAddrByte
-	subnet, err := tcpip.NewSubnet(tcpip.Address("\x10"), tcpip.AddressMask("\xF0"))
-	if err != nil {
-		t.Fatal("NewSubnet failed:", err)
-	}
-	if err := s.AddAddressRange(1, fakeNetNumber, subnet); err != nil {
-		t.Fatal("AddAddressRange failed:", err)
-	}
-	testFailingRecv(t, fakeNet, localAddrByte, ep, buf)
-}
-
 func TestNetworkOptions(t *testing.T) {
 	s := stack.New(stack.Options{
 		NetworkProtocols:   []stack.NetworkProtocol{fakeNetFactory()},
@@ -1827,56 +1683,6 @@ func TestNetworkOptions(t *testing.T) {
 	}
 }
 
-func stackContainsAddressRange(s *stack.Stack, id tcpip.NICID, addrRange tcpip.Subnet) bool {
-	ranges, ok := s.NICAddressRanges()[id]
-	if !ok {
-		return false
-	}
-	for _, r := range ranges {
-		if r == addrRange {
-			return true
-		}
-	}
-	return false
-}
-
-func TestAddresRangeAddRemove(t *testing.T) {
-	s := stack.New(stack.Options{
-		NetworkProtocols: []stack.NetworkProtocol{fakeNetFactory()},
-	})
-	ep := channel.New(10, defaultMTU, "")
-	if err := s.CreateNIC(1, ep); err != nil {
-		t.Fatal("CreateNIC failed:", err)
-	}
-
-	addr := tcpip.Address("\x01\x01\x01\x01")
-	mask := tcpip.AddressMask(strings.Repeat("\xff", len(addr)))
-	addrRange, err := tcpip.NewSubnet(addr, mask)
-	if err != nil {
-		t.Fatal("NewSubnet failed:", err)
-	}
-
-	if got, want := stackContainsAddressRange(s, 1, addrRange), false; got != want {
-		t.Fatalf("got stackContainsAddressRange(...) = %t, want = %t", got, want)
-	}
-
-	if err := s.AddAddressRange(1, fakeNetNumber, addrRange); err != nil {
-		t.Fatal("AddAddressRange failed:", err)
-	}
-
-	if got, want := stackContainsAddressRange(s, 1, addrRange), true; got != want {
-		t.Fatalf("got stackContainsAddressRange(...) = %t, want = %t", got, want)
-	}
-
-	if err := s.RemoveAddressRange(1, addrRange); err != nil {
-		t.Fatal("RemoveAddressRange failed:", err)
-	}
-
-	if got, want := stackContainsAddressRange(s, 1, addrRange), false; got != want {
-		t.Fatalf("got stackContainsAddressRange(...) = %t, want = %t", got, want)
-	}
-}
-
 func TestGetMainNICAddressAddPrimaryNonPrimary(t *testing.T) {
 	for _, addrLen := range []int{4, 16} {
 		t.Run(fmt.Sprintf("addrLen=%d", addrLen), func(t *testing.T) {

From a22ac024239d2d757c80deca06ad86341691b04c Mon Sep 17 00:00:00 2001
From: Ayush Ranjan <ayushranjan@google.com>
Date: Mon, 17 Aug 2020 10:03:38 -0700
Subject: [PATCH 004/211] [vfs] Return EIO when opening /dev/tty.

This is in compliance with VFS1. See pkg/sentry/fs/dev/tty.go in the struct
ttyInodeOperations.

Fixes the failure of python runtime test_ioctl.
Updates #3515

PiperOrigin-RevId: 327042758
---
 pkg/sentry/devices/ttydev/BUILD     |  2 +-
 pkg/sentry/devices/ttydev/ttydev.go | 46 ++---------------------------
 2 files changed, 4 insertions(+), 44 deletions(-)

diff --git a/pkg/sentry/devices/ttydev/BUILD b/pkg/sentry/devices/ttydev/BUILD
index 12e49b58a0..b4b6ca38a3 100644
--- a/pkg/sentry/devices/ttydev/BUILD
+++ b/pkg/sentry/devices/ttydev/BUILD
@@ -11,6 +11,6 @@ go_library(
         "//pkg/context",
         "//pkg/sentry/fsimpl/devtmpfs",
         "//pkg/sentry/vfs",
-        "//pkg/usermem",
+        "//pkg/syserror",
     ],
 )
diff --git a/pkg/sentry/devices/ttydev/ttydev.go b/pkg/sentry/devices/ttydev/ttydev.go
index fd4b79c468..664e54498d 100644
--- a/pkg/sentry/devices/ttydev/ttydev.go
+++ b/pkg/sentry/devices/ttydev/ttydev.go
@@ -12,10 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-// Package ttydev implements devices for /dev/tty and (eventually)
-// /dev/console.
-//
-// TODO(b/159623826): Support /dev/console.
+// Package ttydev implements an unopenable vfs.Device for /dev/tty.
 package ttydev
 
 import (
@@ -23,7 +20,7 @@ import (
 	"gvisor.dev/gvisor/pkg/context"
 	"gvisor.dev/gvisor/pkg/sentry/fsimpl/devtmpfs"
 	"gvisor.dev/gvisor/pkg/sentry/vfs"
-	"gvisor.dev/gvisor/pkg/usermem"
+	"gvisor.dev/gvisor/pkg/syserror"
 )
 
 const (
@@ -37,44 +34,7 @@ type ttyDevice struct{}
 
 // Open implements vfs.Device.Open.
 func (ttyDevice) Open(ctx context.Context, mnt *vfs.Mount, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
-	fd := &ttyFD{}
-	if err := fd.vfsfd.Init(fd, opts.Flags, mnt, vfsd, &vfs.FileDescriptionOptions{
-		UseDentryMetadata: true,
-	}); err != nil {
-		return nil, err
-	}
-	return &fd.vfsfd, nil
-}
-
-// ttyFD implements vfs.FileDescriptionImpl for /dev/tty.
-type ttyFD struct {
-	vfsfd vfs.FileDescription
-	vfs.FileDescriptionDefaultImpl
-	vfs.DentryMetadataFileDescriptionImpl
-	vfs.NoLockFD
-}
-
-// Release implements vfs.FileDescriptionImpl.Release.
-func (fd *ttyFD) Release(context.Context) {}
-
-// PRead implements vfs.FileDescriptionImpl.PRead.
-func (fd *ttyFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) {
-	return 0, nil
-}
-
-// Read implements vfs.FileDescriptionImpl.Read.
-func (fd *ttyFD) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) {
-	return 0, nil
-}
-
-// PWrite implements vfs.FileDescriptionImpl.PWrite.
-func (fd *ttyFD) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) {
-	return src.NumBytes(), nil
-}
-
-// Write implements vfs.FileDescriptionImpl.Write.
-func (fd *ttyFD) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) {
-	return src.NumBytes(), nil
+	return nil, syserror.EIO
 }
 
 // Register registers all devices implemented by this package in vfsObj.

From 80681bdb9541f31eafbe6e4593f76d98ff6e641a Mon Sep 17 00:00:00 2001
From: Arthur Sfez <asfez@google.com>
Date: Mon, 17 Aug 2020 10:04:03 -0700
Subject: [PATCH 005/211] Add a unit test for out of order IP reassembly

PiperOrigin-RevId: 327042869
---
 pkg/tcpip/network/ipv4/ipv4_test.go | 22 +++++++++++++++++
 pkg/tcpip/network/ipv6/ipv6_test.go | 38 +++++++++++++++++++++++++++++
 2 files changed, 60 insertions(+)

diff --git a/pkg/tcpip/network/ipv4/ipv4_test.go b/pkg/tcpip/network/ipv4/ipv4_test.go
index afd3ac06db..197e3bc512 100644
--- a/pkg/tcpip/network/ipv4/ipv4_test.go
+++ b/pkg/tcpip/network/ipv4/ipv4_test.go
@@ -627,6 +627,28 @@ func TestReceiveFragments(t *testing.T) {
 			},
 			expectedPayloads: [][]byte{udpPayload1Addr1ToAddr2},
 		},
+		{
+			name: "Two fragments out of order",
+			fragments: []fragmentData{
+				{
+					srcAddr:        addr1,
+					dstAddr:        addr2,
+					id:             1,
+					flags:          0,
+					fragmentOffset: 64,
+					payload:        ipv4Payload1Addr1ToAddr2[64:],
+				},
+				{
+					srcAddr:        addr1,
+					dstAddr:        addr2,
+					id:             1,
+					flags:          header.IPv4FlagMoreFragments,
+					fragmentOffset: 0,
+					payload:        ipv4Payload1Addr1ToAddr2[:64],
+				},
+			},
+			expectedPayloads: [][]byte{udpPayload1Addr1ToAddr2},
+		},
 		{
 			name: "Two fragments with last fragment size not a multiple of fragment block size",
 			fragments: []fragmentData{
diff --git a/pkg/tcpip/network/ipv6/ipv6_test.go b/pkg/tcpip/network/ipv6/ipv6_test.go
index 081afb0516..0a183bfdee 100644
--- a/pkg/tcpip/network/ipv6/ipv6_test.go
+++ b/pkg/tcpip/network/ipv6/ipv6_test.go
@@ -827,6 +827,44 @@ func TestReceiveIPv6Fragments(t *testing.T) {
 			},
 			expectedPayloads: [][]byte{udpPayload1Addr1ToAddr2},
 		},
+		{
+			name: "Two fragments out of order",
+			fragments: []fragmentData{
+				{
+					srcAddr: addr1,
+					dstAddr: addr2,
+					nextHdr: fragmentExtHdrID,
+					data: buffer.NewVectorisedView(
+						fragmentExtHdrLen+len(ipv6Payload1Addr1ToAddr2)-64,
+						[]buffer.View{
+							// Fragment extension header.
+							//
+							// Fragment offset = 8, More = false, ID = 1
+							buffer.View([]byte{uint8(header.UDPProtocolNumber), 0, 0, 64, 0, 0, 0, 1}),
+
+							ipv6Payload1Addr1ToAddr2[64:],
+						},
+					),
+				},
+				{
+					srcAddr: addr1,
+					dstAddr: addr2,
+					nextHdr: fragmentExtHdrID,
+					data: buffer.NewVectorisedView(
+						fragmentExtHdrLen+64,
+						[]buffer.View{
+							// Fragment extension header.
+							//
+							// Fragment offset = 0, More = true, ID = 1
+							buffer.View([]byte{uint8(header.UDPProtocolNumber), 0, 0, 1, 0, 0, 0, 1}),
+
+							ipv6Payload1Addr1ToAddr2[:64],
+						},
+					),
+				},
+			},
+			expectedPayloads: [][]byte{udpPayload1Addr1ToAddr2},
+		},
 		{
 			name: "Two fragments with last fragment size not a multiple of fragment block size",
 			fragments: []fragmentData{

From d1179ffa205b6ea60b450fd1c7e91230564719c8 Mon Sep 17 00:00:00 2001
From: Dean Deng <deandeng@google.com>
Date: Mon, 17 Aug 2020 11:40:08 -0700
Subject: [PATCH 006/211] Remove weak references from unix sockets.

The abstract socket namespace no longer holds any references on sockets.
Instead, TryIncRef() is used when a socket is being retrieved in
BoundEndpoint(). Abstract sockets are now responsible for removing themselves
from the namespace they are in, when they are destroyed.

Updates #1486.

PiperOrigin-RevId: 327064173
---
 pkg/refs_vfs2/BUILD                           |  6 +-
 pkg/refs_vfs2/refs.go                         |  4 +-
 pkg/sentry/kernel/BUILD                       |  1 +
 .../kernel/abstract_socket_namespace.go       | 77 ++++++++++++-------
 pkg/sentry/socket/unix/BUILD                  | 14 ++++
 pkg/sentry/socket/unix/unix.go                | 22 ++++--
 pkg/sentry/socket/unix/unix_vfs2.go           |  6 +-
 7 files changed, 91 insertions(+), 39 deletions(-)

diff --git a/pkg/refs_vfs2/BUILD b/pkg/refs_vfs2/BUILD
index 7f180c7bd2..7b3e10683c 100644
--- a/pkg/refs_vfs2/BUILD
+++ b/pkg/refs_vfs2/BUILD
@@ -19,10 +19,8 @@ go_template(
 )
 
 go_library(
-    name = "refs",
-    srcs = [
-        "refs.go",
-    ],
+    name = "refs_vfs2",
+    srcs = ["refs.go"],
     visibility = ["//pkg/sentry:internal"],
     deps = ["//pkg/context"],
 )
diff --git a/pkg/refs_vfs2/refs.go b/pkg/refs_vfs2/refs.go
index ee01b17b06..99a074e962 100644
--- a/pkg/refs_vfs2/refs.go
+++ b/pkg/refs_vfs2/refs.go
@@ -12,8 +12,8 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-// Package refs defines an interface for a reference-counted object.
-package refs
+// Package refs_vfs2 defines an interface for a reference-counted object.
+package refs_vfs2
 
 import (
 	"gvisor.dev/gvisor/pkg/context"
diff --git a/pkg/sentry/kernel/BUILD b/pkg/sentry/kernel/BUILD
index f6886a7585..5416a310d3 100644
--- a/pkg/sentry/kernel/BUILD
+++ b/pkg/sentry/kernel/BUILD
@@ -163,6 +163,7 @@ go_library(
         "//pkg/log",
         "//pkg/metric",
         "//pkg/refs",
+        "//pkg/refs_vfs2",
         "//pkg/safemem",
         "//pkg/secio",
         "//pkg/sentry/arch",
diff --git a/pkg/sentry/kernel/abstract_socket_namespace.go b/pkg/sentry/kernel/abstract_socket_namespace.go
index 52ed5cea2d..1b97215341 100644
--- a/pkg/sentry/kernel/abstract_socket_namespace.go
+++ b/pkg/sentry/kernel/abstract_socket_namespace.go
@@ -15,29 +15,21 @@
 package kernel
 
 import (
+	"fmt"
 	"syscall"
 
 	"gvisor.dev/gvisor/pkg/context"
-	"gvisor.dev/gvisor/pkg/refs"
+	"gvisor.dev/gvisor/pkg/refs_vfs2"
 	"gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
 	"gvisor.dev/gvisor/pkg/sync"
 )
 
 // +stateify savable
 type abstractEndpoint struct {
-	ep   transport.BoundEndpoint
-	wr   *refs.WeakRef
-	name string
-	ns   *AbstractSocketNamespace
-}
-
-// WeakRefGone implements refs.WeakRefUser.WeakRefGone.
-func (e *abstractEndpoint) WeakRefGone(context.Context) {
-	e.ns.mu.Lock()
-	if e.ns.endpoints[e.name].ep == e.ep {
-		delete(e.ns.endpoints, e.name)
-	}
-	e.ns.mu.Unlock()
+	ep     transport.BoundEndpoint
+	socket refs_vfs2.RefCounter
+	name   string
+	ns     *AbstractSocketNamespace
 }
 
 // AbstractSocketNamespace is used to implement the Linux abstract socket functionality.
@@ -46,7 +38,11 @@ func (e *abstractEndpoint) WeakRefGone(context.Context) {
 type AbstractSocketNamespace struct {
 	mu sync.Mutex `state:"nosave"`
 
-	// Keeps mapping from name to endpoint.
+	// Keeps a mapping from name to endpoint. AbstractSocketNamespace does not hold
+	// any references on any sockets that it contains; when retrieving a socket,
+	// TryIncRef() must be called in case the socket is concurrently being
+	// destroyed. It is the responsibility of the socket to remove itself from the
+	// abstract socket namespace when it is destroyed.
 	endpoints map[string]abstractEndpoint
 }
 
@@ -58,15 +54,15 @@ func NewAbstractSocketNamespace() *AbstractSocketNamespace {
 }
 
 // A boundEndpoint wraps a transport.BoundEndpoint to maintain a reference on
-// its backing object.
+// its backing socket.
 type boundEndpoint struct {
 	transport.BoundEndpoint
-	rc refs.RefCounter
+	socket refs_vfs2.RefCounter
 }
 
 // Release implements transport.BoundEndpoint.Release.
 func (e *boundEndpoint) Release(ctx context.Context) {
-	e.rc.DecRef(ctx)
+	e.socket.DecRef(ctx)
 	e.BoundEndpoint.Release(ctx)
 }
 
@@ -81,32 +77,59 @@ func (a *AbstractSocketNamespace) BoundEndpoint(name string) transport.BoundEndp
 		return nil
 	}
 
-	rc := ep.wr.Get()
-	if rc == nil {
-		delete(a.endpoints, name)
+	if !ep.socket.TryIncRef() {
+		// The socket has reached zero references and is being destroyed.
 		return nil
 	}
 
-	return &boundEndpoint{ep.ep, rc}
+	return &boundEndpoint{ep.ep, ep.socket}
 }
 
 // Bind binds the given socket.
 //
-// When the last reference managed by rc is dropped, ep may be removed from the
+// When the last reference managed by socket is dropped, ep may be removed from the
 // namespace.
-func (a *AbstractSocketNamespace) Bind(ctx context.Context, name string, ep transport.BoundEndpoint, rc refs.RefCounter) error {
+func (a *AbstractSocketNamespace) Bind(ctx context.Context, name string, ep transport.BoundEndpoint, socket refs_vfs2.RefCounter) error {
 	a.mu.Lock()
 	defer a.mu.Unlock()
 
+	// Check if there is already a socket (which has not yet been destroyed) bound at name.
 	if ep, ok := a.endpoints[name]; ok {
-		if rc := ep.wr.Get(); rc != nil {
-			rc.DecRef(ctx)
+		if ep.socket.TryIncRef() {
+			ep.socket.DecRef(ctx)
 			return syscall.EADDRINUSE
 		}
 	}
 
 	ae := abstractEndpoint{ep: ep, name: name, ns: a}
-	ae.wr = refs.NewWeakRef(rc, &ae)
+	ae.socket = socket
 	a.endpoints[name] = ae
 	return nil
 }
+
+// Remove removes the specified socket at name from the abstract socket
+// namespace, if it has not yet been replaced.
+func (a *AbstractSocketNamespace) Remove(name string, socket refs_vfs2.RefCounter) {
+	a.mu.Lock()
+	defer a.mu.Unlock()
+
+	ep, ok := a.endpoints[name]
+	if !ok {
+		// We never delete a map entry apart from a socket's destructor (although the
+		// map entry may be overwritten). Therefore, a socket should exist, even if it
+		// may not be the one we expect.
+		panic(fmt.Sprintf("expected socket to exist at '%s' in abstract socket namespace", name))
+	}
+
+	// A Bind() operation may race with callers of Remove(), e.g. in the
+	// following case:
+	//   socket1 reaches zero references and begins destruction
+	//   a.Bind("foo", ep, socket2) replaces socket1 with socket2
+	//   socket1's destructor calls a.Remove("foo", socket1)
+	//
+	// Therefore, we need to check that the socket at name is what we expect
+	// before modifying the map.
+	if ep.socket == socket {
+		delete(a.endpoints, name)
+	}
+}
diff --git a/pkg/sentry/socket/unix/BUILD b/pkg/sentry/socket/unix/BUILD
index 061a689a99..cb953e4dce 100644
--- a/pkg/sentry/socket/unix/BUILD
+++ b/pkg/sentry/socket/unix/BUILD
@@ -1,12 +1,25 @@
 load("//tools:defs.bzl", "go_library")
+load("//tools/go_generics:defs.bzl", "go_template_instance")
 
 package(licenses = ["notice"])
 
+go_template_instance(
+    name = "socket_refs",
+    out = "socket_refs.go",
+    package = "unix",
+    prefix = "socketOpsCommon",
+    template = "//pkg/refs_vfs2:refs_template",
+    types = {
+        "T": "socketOpsCommon",
+    },
+)
+
 go_library(
     name = "unix",
     srcs = [
         "device.go",
         "io.go",
+        "socket_refs.go",
         "unix.go",
         "unix_vfs2.go",
     ],
@@ -15,6 +28,7 @@ go_library(
         "//pkg/abi/linux",
         "//pkg/context",
         "//pkg/fspath",
+        "//pkg/log",
         "//pkg/refs",
         "//pkg/safemem",
         "//pkg/sentry/arch",
diff --git a/pkg/sentry/socket/unix/unix.go b/pkg/sentry/socket/unix/unix.go
index 2b8454edb1..b7e8e4325a 100644
--- a/pkg/sentry/socket/unix/unix.go
+++ b/pkg/sentry/socket/unix/unix.go
@@ -24,7 +24,6 @@ import (
 	"gvisor.dev/gvisor/pkg/abi/linux"
 	"gvisor.dev/gvisor/pkg/context"
 	"gvisor.dev/gvisor/pkg/fspath"
-	"gvisor.dev/gvisor/pkg/refs"
 	"gvisor.dev/gvisor/pkg/sentry/arch"
 	"gvisor.dev/gvisor/pkg/sentry/fs"
 	"gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
@@ -80,7 +79,7 @@ func NewWithDirent(ctx context.Context, d *fs.Dirent, ep transport.Endpoint, sty
 			stype: stype,
 		},
 	}
-	s.EnableLeakCheck("unix.SocketOperations")
+	s.EnableLeakCheck()
 
 	return fs.NewFile(ctx, d, flags, &s)
 }
@@ -89,17 +88,26 @@ func NewWithDirent(ctx context.Context, d *fs.Dirent, ep transport.Endpoint, sty
 //
 // +stateify savable
 type socketOpsCommon struct {
-	refs.AtomicRefCount
+	socketOpsCommonRefs
 	socket.SendReceiveTimeout
 
 	ep    transport.Endpoint
 	stype linux.SockType
+
+	// abstractName and abstractNamespace indicate the name and namespace of the
+	// socket if it is bound to an abstract socket namespace. Once the socket is
+	// bound, they cannot be modified.
+	abstractName      string
+	abstractNamespace *kernel.AbstractSocketNamespace
 }
 
 // DecRef implements RefCounter.DecRef.
 func (s *socketOpsCommon) DecRef(ctx context.Context) {
-	s.DecRefWithDestructor(ctx, func(context.Context) {
+	s.socketOpsCommonRefs.DecRef(func() {
 		s.ep.Close(ctx)
+		if s.abstractNamespace != nil {
+			s.abstractNamespace.Remove(s.abstractName, s)
+		}
 	})
 }
 
@@ -284,10 +292,14 @@ func (s *SocketOperations) Bind(t *kernel.Task, sockaddr []byte) *syserr.Error {
 			if t.IsNetworkNamespaced() {
 				return syserr.ErrInvalidEndpointState
 			}
-			if err := t.AbstractSockets().Bind(t, p[1:], bep, s); err != nil {
+			asn := t.AbstractSockets()
+			name := p[1:]
+			if err := asn.Bind(t, name, bep, s); err != nil {
 				// syserr.ErrPortInUse corresponds to EADDRINUSE.
 				return syserr.ErrPortInUse
 			}
+			s.abstractName = name
+			s.abstractNamespace = asn
 		} else {
 			// The parent and name.
 			var d *fs.Dirent
diff --git a/pkg/sentry/socket/unix/unix_vfs2.go b/pkg/sentry/socket/unix/unix_vfs2.go
index dfa25241a7..d066ef8aba 100644
--- a/pkg/sentry/socket/unix/unix_vfs2.go
+++ b/pkg/sentry/socket/unix/unix_vfs2.go
@@ -183,10 +183,14 @@ func (s *SocketVFS2) Bind(t *kernel.Task, sockaddr []byte) *syserr.Error {
 			if t.IsNetworkNamespaced() {
 				return syserr.ErrInvalidEndpointState
 			}
-			if err := t.AbstractSockets().Bind(t, p[1:], bep, s); err != nil {
+			asn := t.AbstractSockets()
+			name := p[1:]
+			if err := asn.Bind(t, name, bep, s); err != nil {
 				// syserr.ErrPortInUse corresponds to EADDRINUSE.
 				return syserr.ErrPortInUse
 			}
+			s.abstractName = name
+			s.abstractNamespace = asn
 		} else {
 			path := fspath.Parse(p)
 			root := t.FSContext().RootDirectoryVFS2()

From e1635261defd19195506eab8050455e992739026 Mon Sep 17 00:00:00 2001
From: Ghanan Gowripalan <ghanan@google.com>
Date: Mon, 17 Aug 2020 12:27:59 -0700
Subject: [PATCH 007/211] Remove address range functions

Should have been removed in cl/326791119
https://github.com/google/gvisor/commit/9a7b5830aa063895f67ca0fdf653a46906374613

PiperOrigin-RevId: 327074156
---
 pkg/tcpip/stack/nic.go   | 18 ------------------
 pkg/tcpip/stack/stack.go | 13 -------------
 2 files changed, 31 deletions(-)

diff --git a/pkg/tcpip/stack/nic.go b/pkg/tcpip/stack/nic.go
index 8a9a085f04..7282927820 100644
--- a/pkg/tcpip/stack/nic.go
+++ b/pkg/tcpip/stack/nic.go
@@ -19,7 +19,6 @@ import (
 	"math/rand"
 	"reflect"
 	"sort"
-	"strings"
 	"sync/atomic"
 
 	"gvisor.dev/gvisor/pkg/sync"
@@ -962,23 +961,6 @@ func (n *NIC) primaryAddress(proto tcpip.NetworkProtocolNumber) tcpip.AddressWit
 	return tcpip.AddressWithPrefix{}
 }
 
-// AddressRanges returns the Subnets associated with this NIC.
-func (n *NIC) AddressRanges() []tcpip.Subnet {
-	n.mu.RLock()
-	defer n.mu.RUnlock()
-	sns := make([]tcpip.Subnet, 0, len(n.mu.endpoints))
-	for nid := range n.mu.endpoints {
-		sn, err := tcpip.NewSubnet(nid.LocalAddress, tcpip.AddressMask(strings.Repeat("\xff", len(nid.LocalAddress))))
-		if err != nil {
-			// This should never happen as the mask has been carefully crafted to
-			// match the address.
-			panic("Invalid endpoint subnet: " + err.Error())
-		}
-		sns = append(sns, sn)
-	}
-	return sns
-}
-
 // insertPrimaryEndpointLocked adds r to n's primary endpoint list as required
 // by peb.
 //
diff --git a/pkg/tcpip/stack/stack.go b/pkg/tcpip/stack/stack.go
index ae44cd5da2..a3f87c8afd 100644
--- a/pkg/tcpip/stack/stack.go
+++ b/pkg/tcpip/stack/stack.go
@@ -1102,19 +1102,6 @@ func (s *Stack) removeNICLocked(id tcpip.NICID) *tcpip.Error {
 	return nic.remove()
 }
 
-// NICAddressRanges returns a map of NICIDs to their associated subnets.
-func (s *Stack) NICAddressRanges() map[tcpip.NICID][]tcpip.Subnet {
-	s.mu.RLock()
-	defer s.mu.RUnlock()
-
-	nics := map[tcpip.NICID][]tcpip.Subnet{}
-
-	for id, nic := range s.nics {
-		nics[id] = append(nics[id], nic.AddressRanges()...)
-	}
-	return nics
-}
-
 // NICInfo captures the name and addresses assigned to a NIC.
 type NICInfo struct {
 	Name              string

From 6c870ab053ff47a8fb13d3c0bf064d90592aa1f7 Mon Sep 17 00:00:00 2001
From: Ayush Ranjan <ayushranjan@google.com>
Date: Mon, 17 Aug 2020 13:24:09 -0700
Subject: [PATCH 008/211] [vfs] Do O_DIRECTORY check after resolving symlinks.

Fixes python runtime test test_glob.
Updates #3515

We were checking is the to-be-opened dentry is a dir or not before resolving
symlinks. We should check that after resolving symlinks.
This was preventing us from opening a symlink which pointed to a directory
with O_DIRECTORY.

Also added this check in tmpfs and removed a duplicate check.

PiperOrigin-RevId: 327085895
---
 pkg/sentry/fsimpl/gofer/filesystem.go | 6 +++---
 pkg/sentry/fsimpl/tmpfs/filesystem.go | 5 ++---
 test/syscalls/linux/open.cc           | 8 ++++++++
 3 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/pkg/sentry/fsimpl/gofer/filesystem.go b/pkg/sentry/fsimpl/gofer/filesystem.go
index 610a7ed78b..a3903db338 100644
--- a/pkg/sentry/fsimpl/gofer/filesystem.go
+++ b/pkg/sentry/fsimpl/gofer/filesystem.go
@@ -886,9 +886,6 @@ afterTrailingSymlink:
 	if mustCreate {
 		return nil, syserror.EEXIST
 	}
-	if !child.isDir() && rp.MustBeDir() {
-		return nil, syserror.ENOTDIR
-	}
 	// Open existing child or follow symlink.
 	if child.isSymlink() && rp.ShouldFollowSymlink() {
 		target, err := child.readlink(ctx, rp.Mount())
@@ -901,6 +898,9 @@ afterTrailingSymlink:
 		start = parent
 		goto afterTrailingSymlink
 	}
+	if rp.MustBeDir() && !child.isDir() {
+		return nil, syserror.ENOTDIR
+	}
 	return child.openLocked(ctx, rp, &opts)
 }
 
diff --git a/pkg/sentry/fsimpl/tmpfs/filesystem.go b/pkg/sentry/fsimpl/tmpfs/filesystem.go
index a4864df53f..cb8b2d9440 100644
--- a/pkg/sentry/fsimpl/tmpfs/filesystem.go
+++ b/pkg/sentry/fsimpl/tmpfs/filesystem.go
@@ -389,9 +389,8 @@ afterTrailingSymlink:
 		start = &parentDir.dentry
 		goto afterTrailingSymlink
 	}
-	// Open existing file.
-	if mustCreate {
-		return nil, syserror.EEXIST
+	if rp.MustBeDir() && !child.inode.isDir() {
+		return nil, syserror.ENOTDIR
 	}
 	return child.open(ctx, rp, &opts, false)
 }
diff --git a/test/syscalls/linux/open.cc b/test/syscalls/linux/open.cc
index 8f0c9cb491..77f390f3ca 100644
--- a/test/syscalls/linux/open.cc
+++ b/test/syscalls/linux/open.cc
@@ -27,6 +27,7 @@
 #include "test/util/cleanup.h"
 #include "test/util/file_descriptor.h"
 #include "test/util/fs_util.h"
+#include "test/util/posix_error.h"
 #include "test/util/temp_path.h"
 #include "test/util/test_util.h"
 #include "test/util/thread_util.h"
@@ -408,6 +409,13 @@ TEST_F(OpenTest, FileNotDirectory) {
               SyscallFailsWithErrno(ENOTDIR));
 }
 
+TEST_F(OpenTest, SymlinkDirectory) {
+  auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  std::string link = NewTempAbsPath();
+  ASSERT_THAT(symlink(dir.path().c_str(), link.c_str()), SyscallSucceeds());
+  ASSERT_NO_ERRNO(Open(link, O_RDONLY | O_DIRECTORY));
+}
+
 TEST_F(OpenTest, Null) {
   char c = '\0';
   ASSERT_THAT(open(&c, O_RDONLY), SyscallFailsWithErrno(ENOENT));

From 1d2941a43e3447cdede3e9d0fa8fa240cd03a4d0 Mon Sep 17 00:00:00 2001
From: Zach Koopmans <zkoopmans@google.com>
Date: Mon, 17 Aug 2020 14:01:38 -0700
Subject: [PATCH 009/211] Fix AllSocketPairTest for open source.

Setting timeouts for sockets on GCP images (debian) for usecs only
respects multiples of 4K. Set the test with a multiple of 4K with a comment.

PiperOrigin-RevId: 327093848
---
 test/syscalls/linux/socket_generic.cc | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/test/syscalls/linux/socket_generic.cc b/test/syscalls/linux/socket_generic.cc
index f7d6139f1f..a6182f0aca 100644
--- a/test/syscalls/linux/socket_generic.cc
+++ b/test/syscalls/linux/socket_generic.cc
@@ -462,6 +462,7 @@ TEST_P(AllSocketPairTest, SendTimeoutDefault) {
 TEST_P(AllSocketPairTest, SetGetSendTimeout) {
   auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
 
+  // tv_usec should be a multiple of 4000 to work on most systems.
   timeval tv = {.tv_sec = 89, .tv_usec = 42000};
   EXPECT_THAT(
       setsockopt(sockets->first_fd(), SOL_SOCKET, SO_SNDTIMEO, &tv, sizeof(tv)),
@@ -472,8 +473,8 @@ TEST_P(AllSocketPairTest, SetGetSendTimeout) {
   EXPECT_THAT(getsockopt(sockets->first_fd(), SOL_SOCKET, SO_SNDTIMEO,
                          &actual_tv, &len),
               SyscallSucceeds());
-  EXPECT_EQ(actual_tv.tv_sec, 89);
-  EXPECT_EQ(actual_tv.tv_usec, 42000);
+  EXPECT_EQ(actual_tv.tv_sec, tv.tv_sec);
+  EXPECT_EQ(actual_tv.tv_usec, tv.tv_usec);
 }
 
 TEST_P(AllSocketPairTest, SetGetSendTimeoutLargerArg) {
@@ -484,8 +485,9 @@ TEST_P(AllSocketPairTest, SetGetSendTimeoutLargerArg) {
     int64_t extra_data;
   } ABSL_ATTRIBUTE_PACKED;
 
+  // tv_usec should be a multiple of 4000 to work on most systems.
   timeval_with_extra tv_extra = {
-      .tv = {.tv_sec = 0, .tv_usec = 123000},
+      .tv = {.tv_sec = 0, .tv_usec = 124000},
   };
 
   EXPECT_THAT(setsockopt(sockets->first_fd(), SOL_SOCKET, SO_SNDTIMEO,
@@ -497,8 +499,8 @@ TEST_P(AllSocketPairTest, SetGetSendTimeoutLargerArg) {
   EXPECT_THAT(getsockopt(sockets->first_fd(), SOL_SOCKET, SO_SNDTIMEO,
                          &actual_tv, &len),
               SyscallSucceeds());
-  EXPECT_EQ(actual_tv.tv.tv_sec, 0);
-  EXPECT_EQ(actual_tv.tv.tv_usec, 123000);
+  EXPECT_EQ(actual_tv.tv.tv_sec, tv_extra.tv.tv_sec);
+  EXPECT_EQ(actual_tv.tv.tv_usec, tv_extra.tv.tv_usec);
 }
 
 TEST_P(AllSocketPairTest, SendTimeoutAllowsWrite) {

From 2529efaf0abe5cbfc58184697bf33017bc2a4f06 Mon Sep 17 00:00:00 2001
From: gVisor bot <gvisor-bot@google.com>
Date: Mon, 17 Aug 2020 15:53:58 -0700
Subject: [PATCH 010/211] Add Verify in merkle tree library

Verify checks input data against the merkle tree, and compares the root
hash with expectation.

PiperOrigin-RevId: 327116711
---
 pkg/merkletree/BUILD              |   1 +
 pkg/merkletree/merkletree.go      | 259 ++++++++++++++++++++----
 pkg/merkletree/merkletree_test.go | 319 +++++++++++++++++++++++++-----
 3 files changed, 495 insertions(+), 84 deletions(-)

diff --git a/pkg/merkletree/BUILD b/pkg/merkletree/BUILD
index 5b0e4143a3..a8fcb2e19b 100644
--- a/pkg/merkletree/BUILD
+++ b/pkg/merkletree/BUILD
@@ -5,6 +5,7 @@ package(licenses = ["notice"])
 go_library(
     name = "merkletree",
     srcs = ["merkletree.go"],
+    visibility = ["//pkg/sentry:internal"],
     deps = ["//pkg/usermem"],
 )
 
diff --git a/pkg/merkletree/merkletree.go b/pkg/merkletree/merkletree.go
index 906f679433..955c9c473b 100644
--- a/pkg/merkletree/merkletree.go
+++ b/pkg/merkletree/merkletree.go
@@ -16,7 +16,9 @@
 package merkletree
 
 import (
+	"bytes"
 	"crypto/sha256"
+	"fmt"
 	"io"
 
 	"gvisor.dev/gvisor/pkg/usermem"
@@ -27,50 +29,78 @@ const (
 	sha256DigestSize = 32
 )
 
-// Size defines the scale of a Merkle tree.
-type Size struct {
+// Layout defines the scale of a Merkle tree.
+type Layout struct {
 	// blockSize is the size of a data block to be hashed.
 	blockSize int64
 	// digestSize is the size of a generated hash.
 	digestSize int64
-	// hashesPerBlock is the number of hashes in a block. For example, if
-	// blockSize is 4096 bytes, and digestSize is 32 bytes, there will be 128
-	// hashesPerBlock. Therefore 128 hashes in a lower level will be put into a
-	// block and generate a single hash in an upper level.
-	hashesPerBlock int64
-	// levelStart is the start block index of each level. The number of levels in
-	// the tree is the length of the slice. The leafs (level 0) are hashes of
-	// blocks in the input data. The levels above are hashes of lower level
-	// hashes.  The highest level is the root hash.
-	levelStart []int64
+	// levelOffset contains the offset of the begnning of each level in
+	// bytes. The number of levels in the tree is the length of the slice.
+	// The leaf nodes (level 0) contain hashes of blocks of the input data.
+	// Each level N contains hashes of the blocks in level N-1. The highest
+	// level is the root hash.
+	levelOffset []int64
 }
 
-// MakeSize initializes and returns a new Size object describing the structure
-// of a tree. dataSize specifies the number of the file system size in bytes.
-func MakeSize(dataSize int64) Size {
-	size := Size{
+// InitLayout initializes and returns a new Layout object describing the structure
+// of a tree. dataSize specifies the size of input data in bytes.
+func InitLayout(dataSize int64) Layout {
+	layout := Layout{
 		blockSize: usermem.PageSize,
 		// TODO(b/156980949): Allow config other hash methods (SHA384/SHA512).
-		digestSize:     sha256DigestSize,
-		hashesPerBlock: usermem.PageSize / sha256DigestSize,
+		digestSize: sha256DigestSize,
 	}
-	numBlocks := (dataSize + size.blockSize - 1) / size.blockSize
-	level := int64(0)
+	numBlocks := (dataSize + layout.blockSize - 1) / layout.blockSize
+	level := 0
 	offset := int64(0)
 
-	// Calcuate the number of levels in the Merkle tree and the beginning offset
-	// of each level. Level 0 is the level directly above the data blocks, while
-	// level NumLevels - 1 is the root.
+	// Calculate the number of levels in the Merkle tree and the beginning
+	// offset of each level. Level 0 consists of the leaf nodes that
+	// contain the hashes of the data blocks, while level numLevels - 1 is
+	// the root.
 	for numBlocks > 1 {
-		size.levelStart = append(size.levelStart, offset)
+		layout.levelOffset = append(layout.levelOffset, offset*layout.blockSize)
 		// Round numBlocks up to fill up a block.
-		numBlocks += (size.hashesPerBlock - numBlocks%size.hashesPerBlock) % size.hashesPerBlock
-		offset += numBlocks / size.hashesPerBlock
-		numBlocks = numBlocks / size.hashesPerBlock
+		numBlocks += (layout.hashesPerBlock() - numBlocks%layout.hashesPerBlock()) % layout.hashesPerBlock()
+		offset += numBlocks / layout.hashesPerBlock()
+		numBlocks = numBlocks / layout.hashesPerBlock()
 		level++
 	}
-	size.levelStart = append(size.levelStart, offset)
-	return size
+	layout.levelOffset = append(layout.levelOffset, offset*layout.blockSize)
+	return layout
+}
+
+// hashesPerBlock() returns the number of digests in each block.  For example,
+// if blockSize is 4096 bytes, and digestSize is 32 bytes, there will be 128
+// hashesPerBlock. Therefore 128 hashes in one level will be combined in one
+// hash in the level above.
+func (layout Layout) hashesPerBlock() int64 {
+	return layout.blockSize / layout.digestSize
+}
+
+// numLevels returns the total number of levels in the Merkle tree.
+func (layout Layout) numLevels() int {
+	return len(layout.levelOffset)
+}
+
+// rootLevel returns the level of the root hash.
+func (layout Layout) rootLevel() int {
+	return layout.numLevels() - 1
+}
+
+// digestOffset finds the offset of a digest from the beginning of the tree.
+// The target digest is at level of the tree, with index from the beginning of
+// the current level.
+func (layout Layout) digestOffset(level int, index int64) int64 {
+	return layout.levelOffset[level] + index*layout.digestSize
+}
+
+// blockOffset finds the offset of a block from the beginning of the tree.  The
+// target block is at level of the tree, with index from the beginning of the
+// current level.
+func (layout Layout) blockOffset(level int, index int64) int64 {
+	return layout.levelOffset[level] + index*layout.blockSize
 }
 
 // Generate constructs a Merkle tree for the contents of data. The output is
@@ -78,21 +108,21 @@ func MakeSize(dataSize int64) Size {
 // it has been written. That is, treeWriter and treeReader should point to the
 // same underlying data but have separate cursors.
 func Generate(data io.Reader, dataSize int64, treeReader io.Reader, treeWriter io.Writer) ([]byte, error) {
-	size := MakeSize(dataSize)
+	layout := InitLayout(dataSize)
 
-	numBlocks := (dataSize + size.blockSize - 1) / size.blockSize
+	numBlocks := (dataSize + layout.blockSize - 1) / layout.blockSize
 
 	var root []byte
-	for level := 0; level < len(size.levelStart); level++ {
+	for level := 0; level < layout.numLevels(); level++ {
 		for i := int64(0); i < numBlocks; i++ {
-			buf := make([]byte, size.blockSize)
+			buf := make([]byte, layout.blockSize)
 			var (
 				n   int
 				err error
 			)
 			if level == 0 {
-				// Read data block from the target file since level 0 is directly above
-				// the raw data block.
+				// Read data block from the target file since level 0 includes hashes
+				// of blocks in the input data.
 				n, err = data.Read(buf)
 			} else {
 				// Read data block from the tree file since levels higher than 0 are
@@ -112,7 +142,7 @@ func Generate(data io.Reader, dataSize int64, treeReader io.Reader, treeWriter i
 			// Hash the bytes in buf.
 			digest := sha256.Sum256(buf)
 
-			if level == len(size.levelStart)-1 {
+			if level == layout.rootLevel() {
 				root = digest[:]
 			}
 
@@ -121,15 +151,164 @@ func Generate(data io.Reader, dataSize int64, treeReader io.Reader, treeWriter i
 				return nil, err
 			}
 		}
-		// If the genereated digests do not round up to a block, zero-padding the
+		// If the generated digests do not round up to a block, zero-padding the
 		// remaining of the last block. But no need to do so for root.
-		if level != len(size.levelStart)-1 && numBlocks%size.hashesPerBlock != 0 {
-			zeroBuf := make([]byte, size.blockSize-(numBlocks%size.hashesPerBlock)*size.digestSize)
+		if level != layout.rootLevel() && numBlocks%layout.hashesPerBlock() != 0 {
+			zeroBuf := make([]byte, layout.blockSize-(numBlocks%layout.hashesPerBlock())*layout.digestSize)
 			if _, err := treeWriter.Write(zeroBuf[:]); err != nil {
 				return nil, err
 			}
 		}
-		numBlocks = (numBlocks + size.hashesPerBlock - 1) / size.hashesPerBlock
+		numBlocks = (numBlocks + layout.hashesPerBlock() - 1) / layout.hashesPerBlock()
 	}
 	return root, nil
 }
+
+// Verify verifies the content read from data with offset. The content is
+// verified against tree. If content spans across multiple blocks, each block is
+// verified. Verification fails if the hash of the data does not match the tree
+// at any level, or if the final root hash does not match expectedRoot.
+// Once the data is verified, it will be written using w.
+// Verify will modify the cursor for data, but always restores it to its
+// original position upon exit. The cursor for tree is modified and not
+// restored.
+func Verify(w io.Writer, data, tree io.ReadSeeker, dataSize int64, readOffset int64, readSize int64, expectedRoot []byte) error {
+	if readSize <= 0 {
+		return fmt.Errorf("Unexpected read size: %d", readSize)
+	}
+	layout := InitLayout(int64(dataSize))
+
+	// Calculate the index of blocks that includes the target range in input
+	// data.
+	firstDataBlock := readOffset / layout.blockSize
+	lastDataBlock := (readOffset + readSize - 1) / layout.blockSize
+
+	// Store the current offset, so we can set it back once verification
+	// finishes.
+	origOffset, err := data.Seek(0, io.SeekCurrent)
+	if err != nil {
+		return fmt.Errorf("Find current data offset failed: %v", err)
+	}
+	defer data.Seek(origOffset, io.SeekStart)
+
+	// Move to the first block that contains target data.
+	if _, err := data.Seek(firstDataBlock*layout.blockSize, io.SeekStart); err != nil {
+		return fmt.Errorf("Seek to datablock start failed: %v", err)
+	}
+
+	buf := make([]byte, layout.blockSize)
+	var readErr error
+	bytesRead := 0
+	for i := firstDataBlock; i <= lastDataBlock; i++ {
+		// Read a block that includes all or part of target range in
+		// input data.
+		bytesRead, readErr = data.Read(buf)
+		// If at the end of input data and all previous blocks are
+		// verified, return the verified input data and EOF.
+		if readErr == io.EOF && bytesRead == 0 {
+			break
+		}
+		if readErr != nil && readErr != io.EOF {
+			return fmt.Errorf("Read from data failed: %v", err)
+		}
+		// If this is the end of file, zero the remaining bytes in buf,
+		// otherwise they are still from the previous block.
+		// TODO(b/162908070): Investigate possible issues with zero
+		// padding the data.
+		if bytesRead < len(buf) {
+			for j := bytesRead; j < len(buf); j++ {
+				buf[j] = 0
+			}
+		}
+		if err := verifyBlock(tree, layout, buf, i, expectedRoot); err != nil {
+			return err
+		}
+		// startOff is the beginning of the read range within the
+		// current data block. Note that for all blocks other than the
+		// first, startOff should be 0.
+		startOff := int64(0)
+		if i == firstDataBlock {
+			startOff = readOffset % layout.blockSize
+		}
+		// endOff is the end of the read range within the current data
+		// block. Note that for all blocks other than the last,  endOff
+		// should be the block size.
+		endOff := layout.blockSize
+		if i == lastDataBlock {
+			endOff = (readOffset+readSize-1)%layout.blockSize + 1
+		}
+		// If the provided size exceeds the end of input data, we should
+		// only copy the parts in buf that's part of input data.
+		if startOff > int64(bytesRead) {
+			startOff = int64(bytesRead)
+		}
+		if endOff > int64(bytesRead) {
+			endOff = int64(bytesRead)
+		}
+		w.Write(buf[startOff:endOff])
+
+	}
+	return readErr
+}
+
+// verifyBlock verifies a block against tree. index is the number of block in
+// original data. The block is verified through each level of the tree. It
+// fails if the calculated hash from block is different from any level of
+// hashes stored in tree. And the final root hash is compared with
+// expectedRoot.  verifyBlock modifies the cursor for tree. Users needs to
+// maintain the cursor if intended.
+func verifyBlock(tree io.ReadSeeker, layout Layout, dataBlock []byte, blockIndex int64, expectedRoot []byte) error {
+	if len(dataBlock) != int(layout.blockSize) {
+		return fmt.Errorf("incorrect block size")
+	}
+
+	expectedDigest := make([]byte, layout.digestSize)
+	treeBlock := make([]byte, layout.blockSize)
+	var digest []byte
+	for level := 0; level < layout.numLevels(); level++ {
+		// Calculate hash.
+		if level == 0 {
+			digestArray := sha256.Sum256(dataBlock)
+			digest = digestArray[:]
+		} else {
+			// Read a block in previous level that contains the
+			// hash we just generated, and generate a next level
+			// hash from it.
+			if _, err := tree.Seek(layout.blockOffset(level-1, blockIndex), io.SeekStart); err != nil {
+				return err
+			}
+			if _, err := tree.Read(treeBlock); err != nil {
+				return err
+			}
+			digestArray := sha256.Sum256(treeBlock)
+			digest = digestArray[:]
+		}
+
+		// Move to stored hash for the current block, read the digest
+		// and store in expectedDigest.
+		if _, err := tree.Seek(layout.digestOffset(level, blockIndex), io.SeekStart); err != nil {
+			return err
+		}
+		if _, err := tree.Read(expectedDigest); err != nil {
+			return err
+		}
+
+		if !bytes.Equal(digest, expectedDigest) {
+			return fmt.Errorf("Verification failed")
+		}
+
+		// If this is the root layer, no need to generate next level
+		// hash.
+		if level == layout.rootLevel() {
+			break
+		}
+		blockIndex = blockIndex / layout.hashesPerBlock()
+	}
+
+	// Verification for the tree succeeded. Now compare the root hash in the
+	// tree with expectedRoot.
+	if !bytes.Equal(digest[:], expectedRoot) {
+		return fmt.Errorf("Verification failed")
+	}
+	return nil
+}
diff --git a/pkg/merkletree/merkletree_test.go b/pkg/merkletree/merkletree_test.go
index 7344db0b6b..911f61df96 100644
--- a/pkg/merkletree/merkletree_test.go
+++ b/pkg/merkletree/merkletree_test.go
@@ -17,45 +17,48 @@ package merkletree
 import (
 	"bytes"
 	"fmt"
+	"io"
+	"math/rand"
 	"testing"
+	"time"
 
 	"gvisor.dev/gvisor/pkg/usermem"
 )
 
-func TestSize(t *testing.T) {
+func TestLayout(t *testing.T) {
 	testCases := []struct {
-		dataSize           int64
-		expectedLevelStart []int64
+		dataSize            int64
+		expectedLevelOffset []int64
 	}{
 		{
-			dataSize:           100,
-			expectedLevelStart: []int64{0},
+			dataSize:            100,
+			expectedLevelOffset: []int64{0},
 		},
 		{
-			dataSize:           1000000,
-			expectedLevelStart: []int64{0, 2, 3},
+			dataSize:            1000000,
+			expectedLevelOffset: []int64{0, 2 * usermem.PageSize, 3 * usermem.PageSize},
 		},
 		{
-			dataSize:           4096 * int64(usermem.PageSize),
-			expectedLevelStart: []int64{0, 32, 33},
+			dataSize:            4096 * int64(usermem.PageSize),
+			expectedLevelOffset: []int64{0, 32 * usermem.PageSize, 33 * usermem.PageSize},
 		},
 	}
 
 	for _, tc := range testCases {
 		t.Run(fmt.Sprintf("%d", tc.dataSize), func(t *testing.T) {
-			s := MakeSize(tc.dataSize)
-			if s.blockSize != int64(usermem.PageSize) {
-				t.Errorf("got blockSize %d, want %d", s.blockSize, usermem.PageSize)
+			p := InitLayout(tc.dataSize)
+			if p.blockSize != int64(usermem.PageSize) {
+				t.Errorf("got blockSize %d, want %d", p.blockSize, usermem.PageSize)
 			}
-			if s.digestSize != sha256DigestSize {
-				t.Errorf("got digestSize %d, want %d", s.digestSize, sha256DigestSize)
+			if p.digestSize != sha256DigestSize {
+				t.Errorf("got digestSize %d, want %d", p.digestSize, sha256DigestSize)
 			}
-			if len(s.levelStart) != len(tc.expectedLevelStart) {
-				t.Errorf("got levels %d, want %d", len(s.levelStart), len(tc.expectedLevelStart))
+			if p.numLevels() != len(tc.expectedLevelOffset) {
+				t.Errorf("got levels %d, want %d", p.numLevels(), len(tc.expectedLevelOffset))
 			}
-			for i := 0; i < len(s.levelStart) && i < len(tc.expectedLevelStart); i++ {
-				if s.levelStart[i] != tc.expectedLevelStart[i] {
-					t.Errorf("got levelStart[%d] %d, want %d", i, s.levelStart[i], tc.expectedLevelStart[i])
+			for i := 0; i < p.numLevels() && i < len(tc.expectedLevelOffset); i++ {
+				if p.levelOffset[i] != tc.expectedLevelOffset[i] {
+					t.Errorf("got levelStart[%d] %d, want %d", i, p.levelOffset[i], tc.expectedLevelOffset[i])
 				}
 			}
 		})
@@ -66,57 +69,285 @@ func TestGenerate(t *testing.T) {
 	// The input data has size dataSize. It starts with the data in startWith,
 	// and all other bytes are zeroes.
 	testCases := []struct {
-		dataSize     int
-		startWith    []byte
+		data         []byte
 		expectedRoot []byte
 	}{
 		{
-			dataSize:     usermem.PageSize,
-			startWith:    nil,
+			data:         bytes.Repeat([]byte{0}, usermem.PageSize),
 			expectedRoot: []byte{173, 127, 172, 178, 88, 111, 198, 233, 102, 192, 4, 215, 209, 209, 107, 2, 79, 88, 5, 255, 124, 180, 124, 122, 133, 218, 189, 139, 72, 137, 44, 167},
 		},
 		{
-			dataSize:     128*usermem.PageSize + 1,
-			startWith:    nil,
+			data:         bytes.Repeat([]byte{0}, 128*usermem.PageSize+1),
 			expectedRoot: []byte{62, 93, 40, 92, 161, 241, 30, 223, 202, 99, 39, 2, 132, 113, 240, 139, 117, 99, 79, 243, 54, 18, 100, 184, 141, 121, 238, 46, 149, 202, 203, 132},
 		},
 		{
-			dataSize:     1,
-			startWith:    []byte{'a'},
+			data:         []byte{'a'},
 			expectedRoot: []byte{52, 75, 204, 142, 172, 129, 37, 14, 145, 137, 103, 203, 11, 162, 209, 205, 30, 169, 213, 72, 20, 28, 243, 24, 242, 2, 92, 43, 169, 59, 110, 210},
 		},
 		{
-			dataSize:     1,
-			startWith:    []byte{'1'},
-			expectedRoot: []byte{74, 35, 103, 179, 176, 149, 254, 112, 42, 65, 104, 66, 119, 56, 133, 124, 228, 15, 65, 161, 150, 0, 117, 174, 242, 34, 115, 115, 218, 37, 3, 105},
+			data:         bytes.Repeat([]byte{'a'}, usermem.PageSize),
+			expectedRoot: []byte{201, 62, 238, 45, 13, 176, 47, 16, 172, 199, 70, 13, 149, 118, 225, 34, 220, 248, 205, 83, 196, 191, 141, 252, 174, 27, 62, 116, 235, 207, 255, 90},
 		},
 	}
 
 	for _, tc := range testCases {
-		t.Run(fmt.Sprintf("%d", tc.dataSize), func(t *testing.T) {
-			var (
-				data bytes.Buffer
-				tree bytes.Buffer
-			)
+		t.Run(fmt.Sprintf("%d:%v", len(tc.data), tc.data[0]), func(t *testing.T) {
+			var tree bytes.Buffer
 
-			startSize := len(tc.startWith)
-			_, err := data.Write(tc.startWith)
+			root, err := Generate(bytes.NewBuffer(tc.data), int64(len(tc.data)), &tree, &tree)
 			if err != nil {
-				t.Fatalf("Failed to write to data: %v", err)
+				t.Fatalf("Generate failed: %v", err)
 			}
-			_, err = data.Write(make([]byte, tc.dataSize-startSize))
-			if err != nil {
-				t.Fatalf("Failed to write to data: %v", err)
+
+			if !bytes.Equal(root, tc.expectedRoot) {
+				t.Errorf("Unexpected root")
 			}
+		})
+	}
+}
+
+// bytesReadWriter is used to read from/write to/seek in a byte array. Unlike
+// bytes.Buffer, it keeps the whole buffer during read so that it can be reused.
+type bytesReadWriter struct {
+	// bytes contains the underlying byte array.
+	bytes []byte
+	// readPos is the currently location for Read. Write always appends to
+	// the end of the array.
+	readPos int
+}
+
+func (brw *bytesReadWriter) Write(p []byte) (int, error) {
+	brw.bytes = append(brw.bytes, p...)
+	return len(p), nil
+}
+
+func (brw *bytesReadWriter) Read(p []byte) (int, error) {
+	if brw.readPos >= len(brw.bytes) {
+		return 0, io.EOF
+	}
+	bytesRead := copy(p, brw.bytes[brw.readPos:])
+	brw.readPos += bytesRead
+	if bytesRead < len(p) {
+		return bytesRead, io.EOF
+	}
+	return bytesRead, nil
+}
+
+func (brw *bytesReadWriter) Seek(offset int64, whence int) (int64, error) {
+	off := offset
+	if whence == io.SeekCurrent {
+		off += int64(brw.readPos)
+	}
+	if whence == io.SeekEnd {
+		off += int64(len(brw.bytes))
+	}
+	if off < 0 {
+		panic("seek with negative offset")
+	}
+	if off >= int64(len(brw.bytes)) {
+		return 0, io.EOF
+	}
+	brw.readPos = int(off)
+	return off, nil
+}
+
+func TestVerify(t *testing.T) {
+	// The input data has size dataSize. The portion to be verified ranges from
+	// verifyStart with verifySize. A bit is flipped in outOfRangeByteIndex to
+	// confirm that modifications outside the verification range does not cause
+	// issue. And a bit is flipped in modifyByte to confirm that
+	// modifications in the verification range is caught during verification.
+	testCases := []struct {
+		dataSize    int64
+		verifyStart int64
+		verifySize  int64
+		// A byte in input data is modified during the test. If the
+		// modified byte falls in verification range, Verify should
+		// fail, otherwise Verify should still succeed.
+		modifyByte    int64
+		shouldSucceed bool
+	}{
+		// Verify range start outside the data range should fail.
+		{
+			dataSize:      usermem.PageSize,
+			verifyStart:   usermem.PageSize,
+			verifySize:    1,
+			modifyByte:    0,
+			shouldSucceed: false,
+		},
+		// Verifying range is valid if it starts inside data and ends
+		// outside data range, in that case start to the end of data is
+		// verified.
+		{
+			dataSize:      usermem.PageSize,
+			verifyStart:   0,
+			verifySize:    2 * usermem.PageSize,
+			modifyByte:    0,
+			shouldSucceed: false,
+		},
+		// Invalid verify range (negative size) should fail.
+		{
+			dataSize:      usermem.PageSize,
+			verifyStart:   1,
+			verifySize:    -1,
+			modifyByte:    0,
+			shouldSucceed: false,
+		},
+		// Invalid verify range (0 size) should fail.
+		{
+			dataSize:      usermem.PageSize,
+			verifyStart:   0,
+			verifySize:    0,
+			modifyByte:    0,
+			shouldSucceed: false,
+		},
+		// The test cases below use a block-aligned verify range.
+		// Modifying a byte in the verified range should cause verify
+		// to fail.
+		{
+			dataSize:      8 * usermem.PageSize,
+			verifyStart:   4 * usermem.PageSize,
+			verifySize:    usermem.PageSize,
+			modifyByte:    4 * usermem.PageSize,
+			shouldSucceed: false,
+		},
+		// Modifying a byte before the verified range should not cause
+		// verify to fail.
+		{
+			dataSize:      8 * usermem.PageSize,
+			verifyStart:   4 * usermem.PageSize,
+			verifySize:    usermem.PageSize,
+			modifyByte:    4*usermem.PageSize - 1,
+			shouldSucceed: true,
+		},
+		// Modifying a byte after the verified range should not cause
+		// verify to fail.
+		{
+			dataSize:      8 * usermem.PageSize,
+			verifyStart:   4 * usermem.PageSize,
+			verifySize:    usermem.PageSize,
+			modifyByte:    5 * usermem.PageSize,
+			shouldSucceed: true,
+		},
+		// The tests below use a non-block-aligned verify range.
+		// Modifying a byte at strat of verify range should cause
+		// verify to fail.
+		{
+			dataSize:      8 * usermem.PageSize,
+			verifyStart:   4*usermem.PageSize + 123,
+			verifySize:    2 * usermem.PageSize,
+			modifyByte:    4*usermem.PageSize + 123,
+			shouldSucceed: false,
+		},
+		// Modifying a byte at the end of verify range should cause
+		// verify to fail.
+		{
+			dataSize:      8 * usermem.PageSize,
+			verifyStart:   4*usermem.PageSize + 123,
+			verifySize:    2 * usermem.PageSize,
+			modifyByte:    6*usermem.PageSize + 123,
+			shouldSucceed: false,
+		},
+		// Modifying a byte in the middle verified block should cause
+		// verify to fail.
+		{
+			dataSize:      8 * usermem.PageSize,
+			verifyStart:   4*usermem.PageSize + 123,
+			verifySize:    2 * usermem.PageSize,
+			modifyByte:    5*usermem.PageSize + 123,
+			shouldSucceed: false,
+		},
+		// Modifying a byte in the first block in the verified range
+		// should cause verify to fail, even the modified bit itself is
+		// out of verify range.
+		{
+			dataSize:      8 * usermem.PageSize,
+			verifyStart:   4*usermem.PageSize + 123,
+			verifySize:    2 * usermem.PageSize,
+			modifyByte:    4*usermem.PageSize + 122,
+			shouldSucceed: false,
+		},
+		// Modifying a byte in the last block in the verified range
+		// should cause verify to fail, even the modified bit itself is
+		// out of verify range.
+		{
+			dataSize:      8 * usermem.PageSize,
+			verifyStart:   4*usermem.PageSize + 123,
+			verifySize:    2 * usermem.PageSize,
+			modifyByte:    6*usermem.PageSize + 124,
+			shouldSucceed: false,
+		},
+	}
+
+	for _, tc := range testCases {
+		t.Run(fmt.Sprintf("%d", tc.modifyByte), func(t *testing.T) {
+			data := make([]byte, tc.dataSize)
+			// Generate random bytes in data.
+			rand.Read(data)
+			var tree bytesReadWriter
 
-			root, err := Generate(&data, int64(tc.dataSize), &tree, &tree)
+			root, err := Generate(bytes.NewBuffer(data), int64(tc.dataSize), &tree, &tree)
 			if err != nil {
 				t.Fatalf("Generate failed: %v", err)
 			}
 
-			if !bytes.Equal(root, tc.expectedRoot) {
-				t.Errorf("Unexpected root")
+			// Flip a bit in data and checks Verify results.
+			var buf bytes.Buffer
+			data[tc.modifyByte] ^= 1
+			if tc.shouldSucceed {
+				if err := Verify(&buf, bytes.NewReader(data), &tree, tc.dataSize, tc.verifyStart, tc.verifySize, root); err != nil && err != io.EOF {
+					t.Errorf("Verification failed when expected to succeed: %v", err)
+				}
+				if int64(buf.Len()) != tc.verifySize || !bytes.Equal(data[tc.verifyStart:tc.verifyStart+tc.verifySize], buf.Bytes()) {
+					t.Errorf("Incorrect output from Verify")
+				}
+			} else {
+				if err := Verify(&buf, bytes.NewReader(data), &tree, tc.dataSize, tc.verifyStart, tc.verifySize, root); err == nil {
+					t.Errorf("Verification succeeded when expected to fail")
+				}
 			}
 		})
 	}
 }
+
+func TestVerifyRandom(t *testing.T) {
+	rand.Seed(time.Now().UnixNano())
+	// Use a random dataSize.  Minimum size 2 so that we can pick a random
+	// portion from it.
+	dataSize := rand.Int63n(200*usermem.PageSize) + 2
+	data := make([]byte, dataSize)
+	// Generate random bytes in data.
+	rand.Read(data)
+	var tree bytesReadWriter
+
+	root, err := Generate(bytes.NewBuffer(data), int64(dataSize), &tree, &tree)
+	if err != nil {
+		t.Fatalf("Generate failed: %v", err)
+	}
+
+	// Pick a random portion of data.
+	start := rand.Int63n(dataSize - 1)
+	size := rand.Int63n(dataSize) + 1
+
+	var buf bytes.Buffer
+	// Checks that the random portion of data from the original data is
+	// verified successfully.
+	if err := Verify(&buf, bytes.NewReader(data), &tree, dataSize, start, size, root); err != nil && err != io.EOF {
+		t.Errorf("Verification failed for correct data: %v", err)
+	}
+	if size > dataSize-start {
+		size = dataSize - start
+	}
+	if int64(buf.Len()) != size || !bytes.Equal(data[start:start+size], buf.Bytes()) {
+		t.Errorf("Incorrect output from Verify")
+	}
+
+	buf.Reset()
+	// Flip a random bit in randPortion, and check that verification fails.
+	randBytePos := rand.Int63n(size)
+	data[start+randBytePos] ^= 1
+
+	if err := Verify(&buf, bytes.NewReader(data), &tree, dataSize, start, size, root); err == nil {
+		t.Errorf("Verification succeeded for modified data")
+	}
+}

From 327a3014c4548b03b26ef669f8fe811fc28228bf Mon Sep 17 00:00:00 2001
From: Nicolas Lacasse <nlacasse@google.com>
Date: Mon, 17 Aug 2020 16:28:19 -0700
Subject: [PATCH 011/211] Stop masking the IO error in handleIOError.

PiperOrigin-RevId: 327123331
---
 pkg/sentry/syscalls/linux/error.go | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/pkg/sentry/syscalls/linux/error.go b/pkg/sentry/syscalls/linux/error.go
index 64de56ac58..46060f6f50 100644
--- a/pkg/sentry/syscalls/linux/error.go
+++ b/pkg/sentry/syscalls/linux/error.go
@@ -36,8 +36,8 @@ var (
 // errors, we may consume the error and return only the partial read/write.
 //
 // op and f are used only for panics.
-func HandleIOErrorVFS2(t *kernel.Task, partialResult bool, err, intr error, op string, f *vfs.FileDescription) error {
-	known, err := handleIOErrorImpl(t, partialResult, err, intr, op)
+func HandleIOErrorVFS2(t *kernel.Task, partialResult bool, ioerr, intr error, op string, f *vfs.FileDescription) error {
+	known, err := handleIOErrorImpl(t, partialResult, ioerr, intr, op)
 	if err != nil {
 		return err
 	}
@@ -46,7 +46,7 @@ func HandleIOErrorVFS2(t *kernel.Task, partialResult bool, err, intr error, op s
 		fs := f.Mount().Filesystem().VirtualFilesystem()
 		root := vfs.RootFromContext(t)
 		name, _ := fs.PathnameWithDeleted(t, root, f.VirtualDentry())
-		log.Traceback("Invalid request partialResult %v and err (type %T) %v for %s operation on %q", partialResult, err, err, op, name)
+		log.Traceback("Invalid request partialResult %v and err (type %T) %v for %s operation on %q", partialResult, ioerr, ioerr, op, name)
 		partialResultOnce.Do(partialResultMetric.Increment)
 	}
 	return nil
@@ -56,15 +56,15 @@ func HandleIOErrorVFS2(t *kernel.Task, partialResult bool, err, intr error, op s
 // errors, we may consume the error and return only the partial read/write.
 //
 // op and f are used only for panics.
-func handleIOError(t *kernel.Task, partialResult bool, err, intr error, op string, f *fs.File) error {
-	known, err := handleIOErrorImpl(t, partialResult, err, intr, op)
+func handleIOError(t *kernel.Task, partialResult bool, ioerr, intr error, op string, f *fs.File) error {
+	known, err := handleIOErrorImpl(t, partialResult, ioerr, intr, op)
 	if err != nil {
 		return err
 	}
 	if !known {
 		// An unknown error is encountered with a partial read/write.
 		name, _ := f.Dirent.FullName(nil /* ignore chroot */)
-		log.Traceback("Invalid request partialResult %v and err (type %T) %v for %s operation on %q, %T", partialResult, err, err, op, name, f.FileOperations)
+		log.Traceback("Invalid request partialResult %v and err (type %T) %v for %s operation on %q, %T", partialResult, ioerr, ioerr, op, name, f.FileOperations)
 		partialResultOnce.Do(partialResultMetric.Increment)
 	}
 	return nil

From 4d571b4bf21147c132ea827fd19a0462a004688d Mon Sep 17 00:00:00 2001
From: gVisor bot <gvisor-bot@google.com>
Date: Mon, 17 Aug 2020 16:29:10 -0700
Subject: [PATCH 012/211] Add a skeleton for verity file system

PiperOrigin-RevId: 327123477
---
 pkg/sentry/fsimpl/verity/BUILD         |  23 ++
 pkg/sentry/fsimpl/verity/filesystem.go | 333 +++++++++++++++++++++++
 pkg/sentry/fsimpl/verity/verity.go     | 355 +++++++++++++++++++++++++
 3 files changed, 711 insertions(+)
 create mode 100644 pkg/sentry/fsimpl/verity/BUILD
 create mode 100644 pkg/sentry/fsimpl/verity/filesystem.go
 create mode 100644 pkg/sentry/fsimpl/verity/verity.go

diff --git a/pkg/sentry/fsimpl/verity/BUILD b/pkg/sentry/fsimpl/verity/BUILD
new file mode 100644
index 0000000000..28d2a4bcba
--- /dev/null
+++ b/pkg/sentry/fsimpl/verity/BUILD
@@ -0,0 +1,23 @@
+load("//tools:defs.bzl", "go_library")
+
+licenses(["notice"])
+
+go_library(
+    name = "verity",
+    srcs = [
+        "filesystem.go",
+        "verity.go",
+    ],
+    visibility = ["//pkg/sentry:internal"],
+    deps = [
+        "//pkg/abi/linux",
+        "//pkg/context",
+        "//pkg/fspath",
+        "//pkg/sentry/fs/lock",
+        "//pkg/sentry/kernel/auth",
+        "//pkg/sentry/socket/unix/transport",
+        "//pkg/sentry/vfs",
+        "//pkg/sync",
+        "//pkg/syserror",
+    ],
+)
diff --git a/pkg/sentry/fsimpl/verity/filesystem.go b/pkg/sentry/fsimpl/verity/filesystem.go
new file mode 100644
index 0000000000..78c6074bd0
--- /dev/null
+++ b/pkg/sentry/fsimpl/verity/filesystem.go
@@ -0,0 +1,333 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package verity
+
+import (
+	"gvisor.dev/gvisor/pkg/abi/linux"
+	"gvisor.dev/gvisor/pkg/context"
+	"gvisor.dev/gvisor/pkg/fspath"
+	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+	"gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
+	"gvisor.dev/gvisor/pkg/sentry/vfs"
+	"gvisor.dev/gvisor/pkg/sync"
+	"gvisor.dev/gvisor/pkg/syserror"
+)
+
+// Sync implements vfs.FilesystemImpl.Sync.
+func (fs *filesystem) Sync(ctx context.Context) error {
+	// All files should be read-only.
+	return nil
+}
+
+var dentrySlicePool = sync.Pool{
+	New: func() interface{} {
+		ds := make([]*dentry, 0, 4) // arbitrary non-zero initial capacity
+		return &ds
+	},
+}
+
+func appendDentry(ds *[]*dentry, d *dentry) *[]*dentry {
+	if ds == nil {
+		ds = dentrySlicePool.Get().(*[]*dentry)
+	}
+	*ds = append(*ds, d)
+	return ds
+}
+
+// Preconditions: ds != nil.
+func putDentrySlice(ds *[]*dentry) {
+	// Allow dentries to be GC'd.
+	for i := range *ds {
+		(*ds)[i] = nil
+	}
+	*ds = (*ds)[:0]
+	dentrySlicePool.Put(ds)
+}
+
+// renameMuRUnlockAndCheckDrop calls fs.renameMu.RUnlock(), then calls
+// dentry.checkDropLocked on all dentries in *ds with fs.renameMu locked for
+// writing.
+//
+// ds is a pointer-to-pointer since defer evaluates its arguments immediately,
+// but dentry slices are allocated lazily, and it's much easier to say "defer
+// fs.renameMuRUnlockAndCheckDrop(&ds)" than "defer func() {
+// fs.renameMuRUnlockAndCheckDrop(ds) }()" to work around this.
+func (fs *filesystem) renameMuRUnlockAndCheckDrop(ctx context.Context, ds **[]*dentry) {
+	fs.renameMu.RUnlock()
+	if *ds == nil {
+		return
+	}
+	if len(**ds) != 0 {
+		fs.renameMu.Lock()
+		for _, d := range **ds {
+			d.checkDropLocked(ctx)
+		}
+		fs.renameMu.Unlock()
+	}
+	putDentrySlice(*ds)
+}
+
+func (fs *filesystem) renameMuUnlockAndCheckDrop(ctx context.Context, ds **[]*dentry) {
+	if *ds == nil {
+		fs.renameMu.Unlock()
+		return
+	}
+	for _, d := range **ds {
+		d.checkDropLocked(ctx)
+	}
+	fs.renameMu.Unlock()
+	putDentrySlice(*ds)
+}
+
+// resolveLocked resolves rp to an existing file.
+func (fs *filesystem) resolveLocked(ctx context.Context, rp *vfs.ResolvingPath, ds **[]*dentry) (*dentry, error) {
+	// TODO(b/159261227): Implement resolveLocked.
+	return nil, nil
+}
+
+// walkParentDirLocked resolves all but the last path component of rp to an
+// existing directory, starting from the given directory (which is usually
+// rp.Start().Impl().(*dentry)). It does not check that the returned directory
+// is searchable by the provider of rp.
+//
+// Preconditions: fs.renameMu must be locked. !rp.Done().
+func (fs *filesystem) walkParentDirLocked(ctx context.Context, rp *vfs.ResolvingPath, d *dentry, ds **[]*dentry) (*dentry, error) {
+	// TODO(b/159261227): Implement walkParentDirLocked.
+	return nil, nil
+}
+
+// AccessAt implements vfs.Filesystem.Impl.AccessAt.
+func (fs *filesystem) AccessAt(ctx context.Context, rp *vfs.ResolvingPath, creds *auth.Credentials, ats vfs.AccessTypes) error {
+	// Verity file system is read-only.
+	if ats&vfs.MayWrite != 0 {
+		return syserror.EROFS
+	}
+	var ds *[]*dentry
+	fs.renameMu.RLock()
+	defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
+	d, err := fs.resolveLocked(ctx, rp, &ds)
+	if err != nil {
+		return err
+	}
+	return d.checkPermissions(creds, ats)
+}
+
+// GetDentryAt implements vfs.FilesystemImpl.GetDentryAt.
+func (fs *filesystem) GetDentryAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.GetDentryOptions) (*vfs.Dentry, error) {
+	var ds *[]*dentry
+	fs.renameMu.RLock()
+	defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
+	d, err := fs.resolveLocked(ctx, rp, &ds)
+	if err != nil {
+		return nil, err
+	}
+	if opts.CheckSearchable {
+		if !d.isDir() {
+			return nil, syserror.ENOTDIR
+		}
+		if err := d.checkPermissions(rp.Credentials(), vfs.MayExec); err != nil {
+			return nil, err
+		}
+	}
+	d.IncRef()
+	return &d.vfsd, nil
+}
+
+// GetParentDentryAt implements vfs.FilesystemImpl.GetParentDentryAt.
+func (fs *filesystem) GetParentDentryAt(ctx context.Context, rp *vfs.ResolvingPath) (*vfs.Dentry, error) {
+	var ds *[]*dentry
+	fs.renameMu.RLock()
+	defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
+	start := rp.Start().Impl().(*dentry)
+	d, err := fs.walkParentDirLocked(ctx, rp, start, &ds)
+	if err != nil {
+		return nil, err
+	}
+	d.IncRef()
+	return &d.vfsd, nil
+}
+
+// LinkAt implements vfs.FilesystemImpl.LinkAt.
+func (fs *filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs.VirtualDentry) error {
+	// Verity file system is read-only.
+	return syserror.EROFS
+}
+
+// MkdirAt implements vfs.FilesystemImpl.MkdirAt.
+func (fs *filesystem) MkdirAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.MkdirOptions) error {
+	// Verity file system is read-only.
+	return syserror.EROFS
+}
+
+// MknodAt implements vfs.FilesystemImpl.MknodAt.
+func (fs *filesystem) MknodAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.MknodOptions) error {
+	// Verity file system is read-only.
+	return syserror.EROFS
+}
+
+// OpenAt implements vfs.FilesystemImpl.OpenAt.
+func (fs *filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
+	//TODO(b/159261227): Implement OpenAt.
+	return nil, nil
+}
+
+// ReadlinkAt implements vfs.FilesystemImpl.ReadlinkAt.
+func (fs *filesystem) ReadlinkAt(ctx context.Context, rp *vfs.ResolvingPath) (string, error) {
+	var ds *[]*dentry
+	fs.renameMu.RLock()
+	defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
+	d, err := fs.resolveLocked(ctx, rp, &ds)
+	if err != nil {
+		return "", err
+	}
+	//TODO(b/162787271): Provide integrity check for ReadlinkAt.
+	return fs.vfsfs.VirtualFilesystem().ReadlinkAt(ctx, d.fs.creds, &vfs.PathOperation{
+		Root:  d.lowerVD,
+		Start: d.lowerVD,
+	})
+}
+
+// RenameAt implements vfs.FilesystemImpl.RenameAt.
+func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldParentVD vfs.VirtualDentry, oldName string, opts vfs.RenameOptions) error {
+	// Verity file system is read-only.
+	return syserror.EROFS
+}
+
+// RmdirAt implements vfs.FilesystemImpl.RmdirAt.
+func (fs *filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error {
+	// Verity file system is read-only.
+	return syserror.EROFS
+}
+
+// SetStatAt implements vfs.FilesystemImpl.SetStatAt.
+func (fs *filesystem) SetStatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetStatOptions) error {
+	// Verity file system is read-only.
+	return syserror.EROFS
+}
+
+// StatAt implements vfs.FilesystemImpl.StatAt.
+func (fs *filesystem) StatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.StatOptions) (linux.Statx, error) {
+	var ds *[]*dentry
+	fs.renameMu.RLock()
+	defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
+	d, err := fs.resolveLocked(ctx, rp, &ds)
+	if err != nil {
+		return linux.Statx{}, err
+	}
+
+	var stat linux.Statx
+	stat, err = fs.vfsfs.VirtualFilesystem().StatAt(ctx, fs.creds, &vfs.PathOperation{
+		Root:  d.lowerVD,
+		Start: d.lowerVD,
+	}, &opts)
+	if err != nil {
+		return linux.Statx{}, err
+	}
+	return stat, nil
+}
+
+// StatFSAt implements vfs.FilesystemImpl.StatFSAt.
+func (fs *filesystem) StatFSAt(ctx context.Context, rp *vfs.ResolvingPath) (linux.Statfs, error) {
+	// TODO(b/159261227): Implement StatFSAt.
+	return linux.Statfs{}, nil
+}
+
+// SymlinkAt implements vfs.FilesystemImpl.SymlinkAt.
+func (fs *filesystem) SymlinkAt(ctx context.Context, rp *vfs.ResolvingPath, target string) error {
+	// Verity file system is read-only.
+	return syserror.EROFS
+}
+
+// UnlinkAt implements vfs.FilesystemImpl.UnlinkAt.
+func (fs *filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error {
+	// Verity file system is read-only.
+	return syserror.EROFS
+}
+
+// BoundEndpointAt implements FilesystemImpl.BoundEndpointAt.
+func (fs *filesystem) BoundEndpointAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.BoundEndpointOptions) (transport.BoundEndpoint, error) {
+	var ds *[]*dentry
+	fs.renameMu.RLock()
+	defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
+	if _, err := fs.resolveLocked(ctx, rp, &ds); err != nil {
+		return nil, err
+	}
+	return nil, syserror.ECONNREFUSED
+}
+
+// ListxattrAt implements vfs.FilesystemImpl.ListxattrAt.
+func (fs *filesystem) ListxattrAt(ctx context.Context, rp *vfs.ResolvingPath, size uint64) ([]string, error) {
+	var ds *[]*dentry
+	fs.renameMu.RLock()
+	defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
+	d, err := fs.resolveLocked(ctx, rp, &ds)
+	if err != nil {
+		return nil, err
+	}
+	lowerVD := d.lowerVD
+	return fs.vfsfs.VirtualFilesystem().ListxattrAt(ctx, d.fs.creds, &vfs.PathOperation{
+		Root:  lowerVD,
+		Start: lowerVD,
+	}, size)
+}
+
+// GetxattrAt implements vfs.FilesystemImpl.GetxattrAt.
+func (fs *filesystem) GetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.GetxattrOptions) (string, error) {
+	var ds *[]*dentry
+	fs.renameMu.RLock()
+	defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
+	d, err := fs.resolveLocked(ctx, rp, &ds)
+	if err != nil {
+		return "", err
+	}
+	lowerVD := d.lowerVD
+	return fs.vfsfs.VirtualFilesystem().GetxattrAt(ctx, d.fs.creds, &vfs.PathOperation{
+		Root:  lowerVD,
+		Start: lowerVD,
+	}, &opts)
+}
+
+// SetxattrAt implements vfs.FilesystemImpl.SetxattrAt.
+func (fs *filesystem) SetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetxattrOptions) error {
+	// Verity file system is read-only.
+	return syserror.EROFS
+}
+
+// RemovexattrAt implements vfs.FilesystemImpl.RemovexattrAt.
+func (fs *filesystem) RemovexattrAt(ctx context.Context, rp *vfs.ResolvingPath, name string) error {
+	// Verity file system is read-only.
+	return syserror.EROFS
+}
+
+// PrependPath implements vfs.FilesystemImpl.PrependPath.
+func (fs *filesystem) PrependPath(ctx context.Context, vfsroot, vd vfs.VirtualDentry, b *fspath.Builder) error {
+	fs.renameMu.RLock()
+	defer fs.renameMu.RUnlock()
+	mnt := vd.Mount()
+	d := vd.Dentry().Impl().(*dentry)
+	for {
+		if mnt == vfsroot.Mount() && &d.vfsd == vfsroot.Dentry() {
+			return vfs.PrependPathAtVFSRootError{}
+		}
+		if &d.vfsd == mnt.Root() {
+			return nil
+		}
+		if d.parent == nil {
+			return vfs.PrependPathAtNonMountRootError{}
+		}
+		b.PrependComponent(d.name)
+		d = d.parent
+	}
+}
diff --git a/pkg/sentry/fsimpl/verity/verity.go b/pkg/sentry/fsimpl/verity/verity.go
new file mode 100644
index 0000000000..cb29d33a58
--- /dev/null
+++ b/pkg/sentry/fsimpl/verity/verity.go
@@ -0,0 +1,355 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package verity provides a filesystem implementation that is a wrapper of
+// another file system.
+// The verity file system provides integrity check for the underlying file
+// system by providing verification for path traversals and each read.
+// The verity file system is read-only, except for one case: when
+// allowRuntimeEnable is true, additional Merkle files can be generated using
+// the FS_IOC_ENABLE_VERITY ioctl.
+package verity
+
+import (
+	"sync/atomic"
+
+	"gvisor.dev/gvisor/pkg/abi/linux"
+	"gvisor.dev/gvisor/pkg/context"
+	fslock "gvisor.dev/gvisor/pkg/sentry/fs/lock"
+	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+	"gvisor.dev/gvisor/pkg/sentry/vfs"
+	"gvisor.dev/gvisor/pkg/sync"
+	"gvisor.dev/gvisor/pkg/syserror"
+)
+
+// Name is the default filesystem name.
+const Name = "verity"
+
+// testOnlyDebugging allows verity file system to return error instead of
+// crashing the application when a malicious action is detected. This should
+// only be set for tests.
+var testOnlyDebugging bool
+
+// FilesystemType implements vfs.FilesystemType.
+type FilesystemType struct{}
+
+// filesystem implements vfs.FilesystemImpl.
+type filesystem struct {
+	vfsfs vfs.Filesystem
+
+	// creds is a copy of the filesystem's creator's credentials, which are
+	// used for accesses to the underlying file system. creds is immutable.
+	creds *auth.Credentials
+
+	// allowRuntimeEnable is true if using ioctl with FS_IOC_ENABLE_VERITY
+	// to build Merkle trees in the verity file system is allowed. If this
+	// is false, no new Merkle trees can be built, and only the files that
+	// had Merkle trees before startup (e.g. from a host filesystem mounted
+	// with gofer fs) can be verified.
+	allowRuntimeEnable bool
+
+	// lowerMount is the underlying file system mount.
+	lowerMount *vfs.Mount
+
+	// rootDentry is the mount root Dentry for this file system, which
+	// stores the root hash of the whole file system in bytes.
+	rootDentry *dentry
+
+	// renameMu synchronizes renaming with non-renaming operations in order
+	// to ensure consistent lock ordering between dentry.dirMu in different
+	// dentries.
+	renameMu sync.RWMutex
+}
+
+// InternalFilesystemOptions may be passed as
+// vfs.GetFilesystemOptions.InternalData to FilesystemType.GetFilesystem.
+type InternalFilesystemOptions struct {
+	// RootMerkleFileName is the name of the verity root Merkle tree file.
+	RootMerkleFileName string
+
+	// LowerName is the name of the filesystem wrapped by verity fs.
+	LowerName string
+
+	// RootHash is the root hash of the overall verity file system.
+	RootHash []byte
+
+	// AllowRuntimeEnable specifies whether the verity file system allows
+	// enabling verification for files (i.e. building Merkle trees) during
+	// runtime.
+	AllowRuntimeEnable bool
+
+	// LowerGetFSOptions is the file system option for the lower layer file
+	// system wrapped by verity file system.
+	LowerGetFSOptions vfs.GetFilesystemOptions
+
+	// TestOnlyDebugging allows verity file system to return error instead
+	// of crashing the application when a malicious action is detected. This
+	// should only be set for tests.
+	TestOnlyDebugging bool
+}
+
+// Name implements vfs.FilesystemType.Name.
+func (FilesystemType) Name() string {
+	return Name
+}
+
+// GetFilesystem implements vfs.FilesystemType.GetFilesystem.
+func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *auth.Credentials, source string, opts vfs.GetFilesystemOptions) (*vfs.Filesystem, *vfs.Dentry, error) {
+	//TODO(b/159261227): Implement GetFilesystem.
+	return nil, nil, nil
+}
+
+// Release implements vfs.FilesystemImpl.Release.
+func (fs *filesystem) Release(ctx context.Context) {
+	fs.lowerMount.DecRef(ctx)
+}
+
+// dentry implements vfs.DentryImpl.
+type dentry struct {
+	vfsd vfs.Dentry
+
+	refs int64
+
+	// fs is the owning filesystem. fs is immutable.
+	fs *filesystem
+
+	// mode, uid and gid are the file mode, owner, and group of the file in
+	// the underlying file system.
+	mode uint32
+	uid  uint32
+	gid  uint32
+
+	// parent is the dentry corresponding to this dentry's parent directory.
+	// name is this dentry's name in parent. If this dentry is a filesystem
+	// root, parent is nil and name is the empty string. parent and name are
+	// protected by fs.renameMu.
+	parent *dentry
+	name   string
+
+	// If this dentry represents a directory, children maps the names of
+	// children for which dentries have been instantiated to those dentries,
+	// and dirents (if not nil) is a cache of dirents as returned by
+	// directoryFDs representing this directory. children is protected by
+	// dirMu.
+	dirMu    sync.Mutex
+	children map[string]*dentry
+
+	// lowerVD is the VirtualDentry in the underlying file system.
+	lowerVD vfs.VirtualDentry
+
+	// lowerMerkleVD is the VirtualDentry of the corresponding Merkle tree
+	// in the underlying file system.
+	lowerMerkleVD vfs.VirtualDentry
+
+	// rootHash is the rootHash for the current file or directory.
+	rootHash []byte
+}
+
+// newDentry creates a new dentry representing the given verity file. The
+// dentry initially has no references; it is the caller's responsibility to set
+// the dentry's reference count and/or call dentry.destroy() as appropriate.
+// The dentry is initially invalid in that it contains no underlying dentry;
+// the caller is responsible for setting them.
+func (fs *filesystem) newDentry() *dentry {
+	d := &dentry{
+		fs: fs,
+	}
+	d.vfsd.Init(d)
+	return d
+}
+
+// IncRef implements vfs.DentryImpl.IncRef.
+func (d *dentry) IncRef() {
+	atomic.AddInt64(&d.refs, 1)
+}
+
+// TryIncRef implements vfs.DentryImpl.TryIncRef.
+func (d *dentry) TryIncRef() bool {
+	for {
+		refs := atomic.LoadInt64(&d.refs)
+		if refs <= 0 {
+			return false
+		}
+		if atomic.CompareAndSwapInt64(&d.refs, refs, refs+1) {
+			return true
+		}
+	}
+}
+
+// DecRef implements vfs.DentryImpl.DecRef.
+func (d *dentry) DecRef(ctx context.Context) {
+	if refs := atomic.AddInt64(&d.refs, -1); refs == 0 {
+		d.fs.renameMu.Lock()
+		d.checkDropLocked(ctx)
+		d.fs.renameMu.Unlock()
+	} else if refs < 0 {
+		panic("verity.dentry.DecRef() called without holding a reference")
+	}
+}
+
+// checkDropLocked should be called after d's reference count becomes 0 or it
+// becomes deleted.
+func (d *dentry) checkDropLocked(ctx context.Context) {
+	// Dentries with a positive reference count must be retained. Dentries
+	// with a negative reference count have already been destroyed.
+	if atomic.LoadInt64(&d.refs) != 0 {
+		return
+	}
+	// Refs is still zero; destroy it.
+	d.destroyLocked(ctx)
+	return
+}
+
+// destroyLocked destroys the dentry.
+//
+// Preconditions: d.fs.renameMu must be locked for writing. d.refs == 0.
+func (d *dentry) destroyLocked(ctx context.Context) {
+	switch atomic.LoadInt64(&d.refs) {
+	case 0:
+		// Mark the dentry destroyed.
+		atomic.StoreInt64(&d.refs, -1)
+	case -1:
+		panic("verity.dentry.destroyLocked() called on already destroyed dentry")
+	default:
+		panic("verity.dentry.destroyLocked() called with references on the dentry")
+	}
+
+	if d.lowerVD.Ok() {
+		d.lowerVD.DecRef(ctx)
+	}
+
+	if d.lowerMerkleVD.Ok() {
+		d.lowerMerkleVD.DecRef(ctx)
+	}
+
+	if d.parent != nil {
+		d.parent.dirMu.Lock()
+		if !d.vfsd.IsDead() {
+			delete(d.parent.children, d.name)
+		}
+		d.parent.dirMu.Unlock()
+		if refs := atomic.AddInt64(&d.parent.refs, -1); refs == 0 {
+			d.parent.checkDropLocked(ctx)
+		} else if refs < 0 {
+			panic("verity.dentry.DecRef() called without holding a reference")
+		}
+	}
+}
+
+// InotifyWithParent implements vfs.DentryImpl.InotifyWithParent.
+func (d *dentry) InotifyWithParent(ctx context.Context, events, cookie uint32, et vfs.EventType) {
+	//TODO(b/159261227): Implement InotifyWithParent.
+}
+
+// Watches implements vfs.DentryImpl.Watches.
+func (d *dentry) Watches() *vfs.Watches {
+	//TODO(b/159261227): Implement Watches.
+	return nil
+}
+
+// OnZeroWatches implements vfs.DentryImpl.OnZeroWatches.
+func (d *dentry) OnZeroWatches(context.Context) {
+	//TODO(b/159261227): Implement OnZeroWatches.
+}
+
+func (d *dentry) isSymlink() bool {
+	return atomic.LoadUint32(&d.mode)&linux.S_IFMT == linux.S_IFLNK
+}
+
+func (d *dentry) isDir() bool {
+	return atomic.LoadUint32(&d.mode)&linux.S_IFMT == linux.S_IFDIR
+}
+
+func (d *dentry) checkPermissions(creds *auth.Credentials, ats vfs.AccessTypes) error {
+	return vfs.GenericCheckPermissions(creds, ats, linux.FileMode(atomic.LoadUint32(&d.mode)), auth.KUID(atomic.LoadUint32(&d.uid)), auth.KGID(atomic.LoadUint32(&d.gid)))
+}
+
+func (d *dentry) readlink(ctx context.Context) (string, error) {
+	return d.fs.vfsfs.VirtualFilesystem().ReadlinkAt(ctx, d.fs.creds, &vfs.PathOperation{
+		Root:  d.lowerVD,
+		Start: d.lowerVD,
+	})
+}
+
+// FileDescription implements vfs.FileDescriptionImpl for verity fds.
+// FileDescription is a wrapper of the underlying lowerFD, with support to build
+// Merkle trees through the Linux fs-verity API to verify contents read from
+// lowerFD.
+type fileDescription struct {
+	vfsfd vfs.FileDescription
+	vfs.FileDescriptionDefaultImpl
+	vfs.LockFD
+
+	// d is the corresponding dentry to the fileDescription.
+	d *dentry
+
+	// isDir specifies whehter the fileDescription points to a directory.
+	isDir bool
+
+	// lowerFD is the FileDescription corresponding to the file in the
+	// underlying file system.
+	lowerFD *vfs.FileDescription
+
+	// merkleReader is the read-only FileDescription corresponding to the
+	// Merkle tree file in the underlying file system.
+	merkleReader *vfs.FileDescription
+
+	// merkleWriter is the FileDescription corresponding to the Merkle tree
+	// file in the underlying file system for writing. This should only be
+	// used when allowRuntimeEnable is set to true.
+	merkleWriter *vfs.FileDescription
+
+	// parentMerkleWriter is the FileDescription of the Merkle tree for the
+	// directory that contains the current file/directory. This is only used
+	// if allowRuntimeEnable is set to true.
+	parentMerkleWriter *vfs.FileDescription
+}
+
+// Release implements vfs.FileDescriptionImpl.Release.
+func (fd *fileDescription) Release(ctx context.Context) {
+	fd.lowerFD.DecRef(ctx)
+	fd.merkleReader.DecRef(ctx)
+	if fd.merkleWriter != nil {
+		fd.merkleWriter.DecRef(ctx)
+	}
+	if fd.parentMerkleWriter != nil {
+		fd.parentMerkleWriter.DecRef(ctx)
+	}
+}
+
+// Stat implements vfs.FileDescriptionImpl.Stat.
+func (fd *fileDescription) Stat(ctx context.Context, opts vfs.StatOptions) (linux.Statx, error) {
+	// TODO(b/162788573): Add integrity check for metadata.
+	stat, err := fd.lowerFD.Stat(ctx, opts)
+	if err != nil {
+		return linux.Statx{}, err
+	}
+	return stat, nil
+}
+
+// SetStat implements vfs.FileDescriptionImpl.SetStat.
+func (fd *fileDescription) SetStat(ctx context.Context, opts vfs.SetStatOptions) error {
+	// Verity files are read-only.
+	return syserror.EPERM
+}
+
+// LockPOSIX implements vfs.FileDescriptionImpl.LockPOSIX.
+func (fd *fileDescription) LockPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, start, length uint64, whence int16, block fslock.Blocker) error {
+	return fd.Locks().LockPOSIX(ctx, &fd.vfsfd, uid, t, start, length, whence, block)
+}
+
+// UnlockPOSIX implements vfs.FileDescriptionImpl.UnlockPOSIX.
+func (fd *fileDescription) UnlockPOSIX(ctx context.Context, uid fslock.UniqueID, start, length uint64, whence int16) error {
+	return fd.Locks().UnlockPOSIX(ctx, &fd.vfsfd, uid, start, length, whence)
+}

From f3f29d8d6748ed67ce78789cf45f062f3d136d3d Mon Sep 17 00:00:00 2001
From: Zach Koopmans <zkoopmans@google.com>
Date: Tue, 18 Aug 2020 09:52:53 -0700
Subject: [PATCH 013/211] Fix return value for MMap Tests in OpenSource

Some systems return 128 + errno instead of just errno, which is the case
here.

PiperOrigin-RevId: 327247836
---
 test/syscalls/linux/mmap.cc | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/test/syscalls/linux/mmap.cc b/test/syscalls/linux/mmap.cc
index 6d3227ab63..e52c9cbcbe 100644
--- a/test/syscalls/linux/mmap.cc
+++ b/test/syscalls/linux/mmap.cc
@@ -43,6 +43,8 @@
 #include "test/util/temp_path.h"
 #include "test/util/test_util.h"
 
+using ::testing::AnyOf;
+using ::testing::Eq;
 using ::testing::Gt;
 
 namespace gvisor {
@@ -296,7 +298,8 @@ TEST_F(MMapTest, MapDevZeroSegfaultAfterUnmap) {
   };
 
   EXPECT_THAT(InForkedProcess(rest),
-              IsPosixErrorOkAndHolds(W_EXITCODE(0, SIGSEGV)));
+              IsPosixErrorOkAndHolds(AnyOf(Eq(W_EXITCODE(0, SIGSEGV)),
+                                           Eq(W_EXITCODE(0, 128 + SIGSEGV)))));
 }
 
 TEST_F(MMapTest, MapDevZeroUnaligned) {

From 65e64145082d7800fd18e07d199565452db0442f Mon Sep 17 00:00:00 2001
From: Zach Koopmans <zkoopmans@google.com>
Date: Tue, 18 Aug 2020 09:53:47 -0700
Subject: [PATCH 014/211] Fix readahead test for opensource.

Skip InvalidOffset and InvalidLength for Linux as the test is invalid for
later Kernel versions.

Add UnsupportedFile test as this check is in all kernel versions.

PiperOrigin-RevId: 327248035
---
 test/syscalls/linux/BUILD        |  1 +
 test/syscalls/linux/readahead.cc | 11 +++++++++++
 2 files changed, 12 insertions(+)

diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD
index 66a31cd28d..6299870bc0 100644
--- a/test/syscalls/linux/BUILD
+++ b/test/syscalls/linux/BUILD
@@ -1862,6 +1862,7 @@ cc_binary(
     srcs = ["readahead.cc"],
     linkstatic = 1,
     deps = [
+        ":socket_test_util",
         "//test/util:file_descriptor",
         gtest,
         "//test/util:temp_path",
diff --git a/test/syscalls/linux/readahead.cc b/test/syscalls/linux/readahead.cc
index 09703b5c1f..71073bb3cc 100644
--- a/test/syscalls/linux/readahead.cc
+++ b/test/syscalls/linux/readahead.cc
@@ -16,6 +16,7 @@
 #include <fcntl.h>
 
 #include "gtest/gtest.h"
+#include "test/syscalls/linux/socket_test_util.h"
 #include "test/util/file_descriptor.h"
 #include "test/util/temp_path.h"
 #include "test/util/test_util.h"
@@ -29,7 +30,15 @@ TEST(ReadaheadTest, InvalidFD) {
   EXPECT_THAT(readahead(-1, 1, 1), SyscallFailsWithErrno(EBADF));
 }
 
+TEST(ReadaheadTest, UnsupportedFile) {
+  FileDescriptor sock =
+      ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_UNIX, SOCK_STREAM, 0));
+  ASSERT_THAT(readahead(sock.get(), 1, 1), SyscallFailsWithErrno(EINVAL));
+}
+
 TEST(ReadaheadTest, InvalidOffset) {
+  // This test is not valid for some Linux Kernels.
+  SKIP_IF(!IsRunningOnGvisor());
   const TempPath in_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
   const FileDescriptor fd =
       ASSERT_NO_ERRNO_AND_VALUE(Open(in_file.path(), O_RDWR));
@@ -79,6 +88,8 @@ TEST(ReadaheadTest, WriteOnly) {
 }
 
 TEST(ReadaheadTest, InvalidSize) {
+  // This test is not valid on some Linux kernels.
+  SKIP_IF(!IsRunningOnGvisor());
   const TempPath in_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
   const FileDescriptor fd =
       ASSERT_NO_ERRNO_AND_VALUE(Open(in_file.path(), O_RDWR));

From 8b5e9dbae85d0877a60112055aa304665d5e39fa Mon Sep 17 00:00:00 2001
From: Ayush Ranjan <ayushranjan@google.com>
Date: Tue, 18 Aug 2020 10:20:17 -0700
Subject: [PATCH 015/211] [vfs2] Implement /proc/sys/net/ipv4/tcp_rmem and
 /proc/sys/net/ipv4/tcp_wmem.

Updates #1035

PiperOrigin-RevId: 327253907
---
 pkg/sentry/fsimpl/proc/BUILD        |   1 +
 pkg/sentry/fsimpl/proc/tasks_sys.go | 110 ++++++++++++++++++++++++++--
 2 files changed, 106 insertions(+), 5 deletions(-)

diff --git a/pkg/sentry/fsimpl/proc/BUILD b/pkg/sentry/fsimpl/proc/BUILD
index 6014138ffb..14ecfd3005 100644
--- a/pkg/sentry/fsimpl/proc/BUILD
+++ b/pkg/sentry/fsimpl/proc/BUILD
@@ -36,6 +36,7 @@ go_library(
         "//pkg/sentry/socket/unix/transport",
         "//pkg/sentry/usage",
         "//pkg/sentry/vfs",
+        "//pkg/sync",
         "//pkg/syserror",
         "//pkg/tcpip/header",
         "//pkg/usermem",
diff --git a/pkg/sentry/fsimpl/proc/tasks_sys.go b/pkg/sentry/fsimpl/proc/tasks_sys.go
index b717781284..6435385efe 100644
--- a/pkg/sentry/fsimpl/proc/tasks_sys.go
+++ b/pkg/sentry/fsimpl/proc/tasks_sys.go
@@ -25,10 +25,18 @@ import (
 	"gvisor.dev/gvisor/pkg/sentry/kernel"
 	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
 	"gvisor.dev/gvisor/pkg/sentry/vfs"
+	"gvisor.dev/gvisor/pkg/sync"
 	"gvisor.dev/gvisor/pkg/syserror"
 	"gvisor.dev/gvisor/pkg/usermem"
 )
 
+type tcpMemDir int
+
+const (
+	tcpRMem tcpMemDir = iota
+	tcpWMem
+)
+
 // newSysDir returns the dentry corresponding to /proc/sys directory.
 func (fs *filesystem) newSysDir(root *auth.Credentials, k *kernel.Kernel) *kernfs.Dentry {
 	return kernfs.NewStaticDir(root, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), 0555, map[string]*kernfs.Dentry{
@@ -56,7 +64,9 @@ func (fs *filesystem) newSysNetDir(root *auth.Credentials, k *kernel.Kernel) *ke
 		contents = map[string]*kernfs.Dentry{
 			"ipv4": kernfs.NewStaticDir(root, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), 0555, map[string]*kernfs.Dentry{
 				"tcp_recovery": fs.newDentry(root, fs.NextIno(), 0644, &tcpRecoveryData{stack: stack}),
+				"tcp_rmem":     fs.newDentry(root, fs.NextIno(), 0644, &tcpMemData{stack: stack, dir: tcpRMem}),
 				"tcp_sack":     fs.newDentry(root, fs.NextIno(), 0644, &tcpSackData{stack: stack}),
+				"tcp_wmem":     fs.newDentry(root, fs.NextIno(), 0644, &tcpMemData{stack: stack, dir: tcpWMem}),
 
 				// The following files are simple stubs until they are implemented in
 				// netstack, most of these files are configuration related. We use the
@@ -181,10 +191,11 @@ func (d *tcpSackData) Generate(ctx context.Context, buf *bytes.Buffer) error {
 		// Tough luck.
 		val = "1\n"
 	}
-	buf.WriteString(val)
-	return nil
+	_, err := buf.WriteString(val)
+	return err
 }
 
+// Write implements vfs.WritableDynamicBytesSource.Write.
 func (d *tcpSackData) Write(ctx context.Context, src usermem.IOSequence, offset int64) (int64, error) {
 	if offset != 0 {
 		// No need to handle partial writes thus far.
@@ -200,7 +211,7 @@ func (d *tcpSackData) Write(ctx context.Context, src usermem.IOSequence, offset
 	var v int32
 	n, err := usermem.CopyInt32StringInVec(ctx, src.IO, src.Addrs, &v, src.Opts)
 	if err != nil {
-		return n, err
+		return 0, err
 	}
 	if d.enabled == nil {
 		d.enabled = new(bool)
@@ -228,10 +239,11 @@ func (d *tcpRecoveryData) Generate(ctx context.Context, buf *bytes.Buffer) error
 		return err
 	}
 
-	buf.WriteString(fmt.Sprintf("%d\n", recovery))
-	return nil
+	_, err = buf.WriteString(fmt.Sprintf("%d\n", recovery))
+	return err
 }
 
+// Write implements vfs.WritableDynamicBytesSource.Write.
 func (d *tcpRecoveryData) Write(ctx context.Context, src usermem.IOSequence, offset int64) (int64, error) {
 	if offset != 0 {
 		// No need to handle partial writes thus far.
@@ -254,3 +266,91 @@ func (d *tcpRecoveryData) Write(ctx context.Context, src usermem.IOSequence, off
 	}
 	return n, nil
 }
+
+// tcpMemData implements vfs.WritableDynamicBytesSource for
+// /proc/sys/net/ipv4/tcp_rmem and /proc/sys/net/ipv4/tcp_wmem.
+//
+// +stateify savable
+type tcpMemData struct {
+	kernfs.DynamicBytesFile
+
+	dir   tcpMemDir
+	stack inet.Stack `state:"wait"`
+
+	// mu protects against concurrent reads/writes to FDs based on the dentry
+	// backing this byte source.
+	mu sync.Mutex `state:"nosave"`
+}
+
+var _ vfs.WritableDynamicBytesSource = (*tcpMemData)(nil)
+
+// Generate implements vfs.DynamicBytesSource.
+func (d *tcpMemData) Generate(ctx context.Context, buf *bytes.Buffer) error {
+	d.mu.Lock()
+	defer d.mu.Unlock()
+
+	size, err := d.readSizeLocked()
+	if err != nil {
+		return err
+	}
+	_, err = buf.WriteString(fmt.Sprintf("%d\t%d\t%d\n", size.Min, size.Default, size.Max))
+	return err
+}
+
+// Write implements vfs.WritableDynamicBytesSource.Write.
+func (d *tcpMemData) Write(ctx context.Context, src usermem.IOSequence, offset int64) (int64, error) {
+	if offset != 0 {
+		// No need to handle partial writes thus far.
+		return 0, syserror.EINVAL
+	}
+	if src.NumBytes() == 0 {
+		return 0, nil
+	}
+	d.mu.Lock()
+	defer d.mu.Unlock()
+
+	// Limit the amount of memory allocated.
+	src = src.TakeFirst(usermem.PageSize - 1)
+	size, err := d.readSizeLocked()
+	if err != nil {
+		return 0, err
+	}
+	buf := []int32{int32(size.Min), int32(size.Default), int32(size.Max)}
+	n, err := usermem.CopyInt32StringsInVec(ctx, src.IO, src.Addrs, buf, src.Opts)
+	if err != nil {
+		return 0, err
+	}
+	newSize := inet.TCPBufferSize{
+		Min:     int(buf[0]),
+		Default: int(buf[1]),
+		Max:     int(buf[2]),
+	}
+	if err := d.writeSizeLocked(newSize); err != nil {
+		return 0, err
+	}
+	return n, nil
+}
+
+// Precondition: d.mu must be locked.
+func (d *tcpMemData) readSizeLocked() (inet.TCPBufferSize, error) {
+	switch d.dir {
+	case tcpRMem:
+		return d.stack.TCPReceiveBufferSize()
+	case tcpWMem:
+		return d.stack.TCPSendBufferSize()
+	default:
+		panic(fmt.Sprintf("unknown tcpMemFile type: %v", d.dir))
+	}
+}
+
+// Precondition: d.mu must be locked.
+func (d *tcpMemData) writeSizeLocked(size inet.TCPBufferSize) error {
+	switch d.dir {
+	case tcpRMem:
+		return d.stack.SetTCPReceiveBufferSize(size)
+	case tcpWMem:
+		return d.stack.SetTCPSendBufferSize(size)
+	default:
+		panic(fmt.Sprintf("unknown tcpMemFile type: %v", d.dir))
+	}
+}

From 596ba8e719eeb13bd8c8645ad3083a1ccc941d97 Mon Sep 17 00:00:00 2001
From: Jamie Liu <jamieliu@google.com>
Date: Tue, 18 Aug 2020 12:30:10 -0700
Subject: [PATCH 016/211] Wait for all p9 handlers to complete before server
 shutdown.

... including those invoked via flipcall.

PiperOrigin-RevId: 327283194
---
 pkg/p9/server.go | 27 +++++++++++----------------
 1 file changed, 11 insertions(+), 16 deletions(-)

diff --git a/pkg/p9/server.go b/pkg/p9/server.go
index 60cf94fa1b..b9f15e4ed4 100644
--- a/pkg/p9/server.go
+++ b/pkg/p9/server.go
@@ -87,6 +87,9 @@ type connState struct {
 	// version 0 implies 9P2000.L.
 	version uint32
 
+	// pendingWg counts requests that are still being handled.
+	pendingWg sync.WaitGroup
+
 	// -- below relates to the legacy handler --
 
 	// recvOkay indicates that a receive may start.
@@ -479,7 +482,9 @@ func (cs *connState) lookupChannel(id uint32) *channel {
 
 // handle handles a single message.
 func (cs *connState) handle(m message) (r message) {
+	cs.pendingWg.Add(1)
 	defer func() {
+		cs.pendingWg.Done()
 		if r == nil {
 			// Don't allow a panic to propagate.
 			err := recover()
@@ -568,6 +573,11 @@ func (cs *connState) handleRequests() {
 }
 
 func (cs *connState) stop() {
+	// Wait for completion of all inflight requests. This is mostly so that if
+	// a request is stuck, the sandbox supervisor has the opportunity to kill
+	// us with SIGABRT to get a stack dump of the offending handler.
+	cs.pendingWg.Wait()
+
 	// Close all channels.
 	close(cs.recvOkay)
 	close(cs.recvDone)
@@ -606,11 +616,6 @@ func (cs *connState) stop() {
 
 // service services requests concurrently.
 func (cs *connState) service() error {
-	// Pending is the number of handlers that have finished receiving but
-	// not finished processing requests. These must be waiting on properly
-	// below. See the next comment for an explanation of the loop.
-	pending := 0
-
 	// Start the first request handler.
 	go cs.handleRequests() // S/R-SAFE: Irrelevant.
 	cs.recvOkay <- true
@@ -622,16 +627,9 @@ func (cs *connState) service() error {
 		select {
 		case err := <-cs.recvDone:
 			if err != nil {
-				// Wait for pending handlers.
-				for i := 0; i < pending; i++ {
-					<-cs.sendDone
-				}
-				return nil
+				return err
 			}
 
-			// This handler is now pending.
-			pending++
-
 			// Kick the next receiver, or start a new handler
 			// if no receiver is currently waiting.
 			select {
@@ -642,9 +640,6 @@ func (cs *connState) service() error {
 			}
 
 		case <-cs.sendDone:
-			// This handler is finished.
-			pending--
-
 			// Error sending a response? Nothing can be done.
 			//
 			// We don't terminate on a send error though, since

From d03004dea56f4c9b1bf47c23f28c31a1d9634191 Mon Sep 17 00:00:00 2001
From: Zach Koopmans <zkoopmans@google.com>
Date: Tue, 18 Aug 2020 12:55:06 -0700
Subject: [PATCH 017/211] Fix timeval for Socket test

tv_usec field should be a multiple of 4K to pass
in open source on linux/native, so make it one.

PiperOrigin-RevId: 327288405
---
 test/syscalls/linux/socket_generic.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/syscalls/linux/socket_generic.cc b/test/syscalls/linux/socket_generic.cc
index a6182f0aca..5d39e6fbd0 100644
--- a/test/syscalls/linux/socket_generic.cc
+++ b/test/syscalls/linux/socket_generic.cc
@@ -463,7 +463,7 @@ TEST_P(AllSocketPairTest, SetGetSendTimeout) {
   auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
 
   // tv_usec should be a multiple of 4000 to work on most systems.
-  timeval tv = {.tv_sec = 89, .tv_usec = 42000};
+  timeval tv = {.tv_sec = 89, .tv_usec = 44000};
   EXPECT_THAT(
       setsockopt(sockets->first_fd(), SOL_SOCKET, SO_SNDTIMEO, &tv, sizeof(tv)),
       SyscallSucceeds());

From 8de9b4b65851feeec103c4742486e90f36345352 Mon Sep 17 00:00:00 2001
From: Fabricio Voznika <fvoznika@google.com>
Date: Tue, 18 Aug 2020 13:57:03 -0700
Subject: [PATCH 018/211] Return EROFS if mount is read-only

PiperOrigin-RevId: 327300635
---
 runsc/fsgofer/fsgofer.go      | 104 +++++++++++++++-------------------
 runsc/fsgofer/fsgofer_test.go |  84 ++++++++++++++++++++++-----
 2 files changed, 117 insertions(+), 71 deletions(-)

diff --git a/runsc/fsgofer/fsgofer.go b/runsc/fsgofer/fsgofer.go
index c6694c2788..639de9ca15 100644
--- a/runsc/fsgofer/fsgofer.go
+++ b/runsc/fsgofer/fsgofer.go
@@ -46,6 +46,8 @@ const (
 	invalidMode = p9.OpenFlags(math.MaxUint32)
 
 	openFlags = syscall.O_NOFOLLOW | syscall.O_CLOEXEC
+
+	allowedOpenFlags = unix.O_TRUNC
 )
 
 // Config sets configuration options for each attach point.
@@ -357,10 +359,16 @@ func (l *localFile) Open(flags p9.OpenFlags) (*fd.FD, p9.QID, uint32, error) {
 	if l.isOpen() {
 		panic(fmt.Sprintf("attempting to open already opened file: %q", l.hostPath))
 	}
+	mode := flags & p9.OpenFlagsModeMask
+	if mode == p9.WriteOnly || mode == p9.ReadWrite || flags&p9.OpenTruncate != 0 {
+		if err := l.checkROMount(); err != nil {
+			return nil, p9.QID{}, 0, err
+		}
+	}
 
 	// Check if control file can be used or if a new open must be created.
 	var newFile *fd.FD
-	if flags == p9.ReadOnly && l.controlReadable {
+	if mode == p9.ReadOnly && l.controlReadable && flags.OSFlags()&allowedOpenFlags == 0 {
 		log.Debugf("Open reusing control file, flags: %v, %q", flags, l.hostPath)
 		newFile = l.file
 	} else {
@@ -369,8 +377,8 @@ func (l *localFile) Open(flags p9.OpenFlags) (*fd.FD, p9.QID, uint32, error) {
 		// name_to_handle_at and open_by_handle_at aren't supported by overlay2.
 		log.Debugf("Open reopening file, flags: %v, %q", flags, l.hostPath)
 		var err error
-		// Constrain open flags to the open mode and O_TRUNC.
-		newFile, err = reopenProcFd(l.file, openFlags|(flags.OSFlags()&(syscall.O_ACCMODE|syscall.O_TRUNC)))
+		osFlags := flags.OSFlags() & (syscall.O_ACCMODE | allowedOpenFlags)
+		newFile, err = reopenProcFd(l.file, openFlags|osFlags)
 		if err != nil {
 			return nil, p9.QID{}, 0, extractErrno(err)
 		}
@@ -389,31 +397,31 @@ func (l *localFile) Open(flags p9.OpenFlags) (*fd.FD, p9.QID, uint32, error) {
 		}
 		l.file = newFile
 	}
-	l.mode = flags & p9.OpenFlagsModeMask
+	l.mode = mode
 	return fd, l.qid, 0, nil
 }
 
 // Create implements p9.File.
-func (l *localFile) Create(name string, mode p9.OpenFlags, perm p9.FileMode, uid p9.UID, gid p9.GID) (*fd.FD, p9.File, p9.QID, uint32, error) {
-	conf := l.attachPoint.conf
-	if conf.ROMount {
-		if conf.PanicOnWrite {
-			panic("attempt to write to RO mount")
-		}
-		return nil, nil, p9.QID{}, 0, syscall.EBADF
+func (l *localFile) Create(name string, p9Flags p9.OpenFlags, perm p9.FileMode, uid p9.UID, gid p9.GID) (*fd.FD, p9.File, p9.QID, uint32, error) {
+	if err := l.checkROMount(); err != nil {
+		return nil, nil, p9.QID{}, 0, err
 	}
 
+	// Set file creation flags, plus allowed open flags from caller.
+	osFlags := openFlags | syscall.O_CREAT | syscall.O_EXCL
+	osFlags |= p9Flags.OSFlags() & allowedOpenFlags
+
 	// 'file' may be used for other operations (e.g. Walk), so read access is
 	// always added to flags. Note that resulting file might have a wider mode
 	// than needed for each particular case.
-	flags := openFlags | syscall.O_CREAT | syscall.O_EXCL
+	mode := p9Flags & p9.OpenFlagsModeMask
 	if mode == p9.WriteOnly {
-		flags |= syscall.O_RDWR
+		osFlags |= syscall.O_RDWR
 	} else {
-		flags |= mode.OSFlags()
+		osFlags |= mode.OSFlags() & unix.O_ACCMODE
 	}
 
-	child, err := fd.OpenAt(l.file, name, flags, uint32(perm.Permissions()))
+	child, err := fd.OpenAt(l.file, name, osFlags, uint32(perm.Permissions()))
 	if err != nil {
 		return nil, nil, p9.QID{}, 0, extractErrno(err)
 	}
@@ -449,12 +457,8 @@ func (l *localFile) Create(name string, mode p9.OpenFlags, perm p9.FileMode, uid
 
 // Mkdir implements p9.File.
 func (l *localFile) Mkdir(name string, perm p9.FileMode, uid p9.UID, gid p9.GID) (p9.QID, error) {
-	conf := l.attachPoint.conf
-	if conf.ROMount {
-		if conf.PanicOnWrite {
-			panic("attempt to write to RO mount")
-		}
-		return p9.QID{}, syscall.EBADF
+	if err := l.checkROMount(); err != nil {
+		return p9.QID{}, err
 	}
 
 	if err := syscall.Mkdirat(l.file.FD(), name, uint32(perm.Permissions())); err != nil {
@@ -637,12 +641,8 @@ func (l *localFile) fillAttr(stat syscall.Stat_t) (p9.AttrMask, p9.Attr) {
 // cannot be changed atomically and user may see partial changes when
 // an error happens.
 func (l *localFile) SetAttr(valid p9.SetAttrMask, attr p9.SetAttr) error {
-	conf := l.attachPoint.conf
-	if conf.ROMount {
-		if conf.PanicOnWrite {
-			panic("attempt to write to RO mount")
-		}
-		return syscall.EBADF
+	if err := l.checkROMount(); err != nil {
+		return err
 	}
 
 	allowed := p9.SetAttrMask{
@@ -804,12 +804,8 @@ func (*localFile) Rename(p9.File, string) error {
 
 // RenameAt implements p9.File.RenameAt.
 func (l *localFile) RenameAt(oldName string, directory p9.File, newName string) error {
-	conf := l.attachPoint.conf
-	if conf.ROMount {
-		if conf.PanicOnWrite {
-			panic("attempt to write to RO mount")
-		}
-		return syscall.EBADF
+	if err := l.checkROMount(); err != nil {
+		return err
 	}
 
 	newParent := directory.(*localFile)
@@ -855,12 +851,8 @@ func (l *localFile) WriteAt(p []byte, offset uint64) (int, error) {
 
 // Symlink implements p9.File.
 func (l *localFile) Symlink(target, newName string, uid p9.UID, gid p9.GID) (p9.QID, error) {
-	conf := l.attachPoint.conf
-	if conf.ROMount {
-		if conf.PanicOnWrite {
-			panic("attempt to write to RO mount")
-		}
-		return p9.QID{}, syscall.EBADF
+	if err := l.checkROMount(); err != nil {
+		return p9.QID{}, err
 	}
 
 	if err := unix.Symlinkat(target, l.file.FD(), newName); err != nil {
@@ -895,12 +887,8 @@ func (l *localFile) Symlink(target, newName string, uid p9.UID, gid p9.GID) (p9.
 
 // Link implements p9.File.
 func (l *localFile) Link(target p9.File, newName string) error {
-	conf := l.attachPoint.conf
-	if conf.ROMount {
-		if conf.PanicOnWrite {
-			panic("attempt to write to RO mount")
-		}
-		return syscall.EBADF
+	if err := l.checkROMount(); err != nil {
+		return err
 	}
 
 	targetFile := target.(*localFile)
@@ -912,12 +900,8 @@ func (l *localFile) Link(target p9.File, newName string) error {
 
 // Mknod implements p9.File.
 func (l *localFile) Mknod(name string, mode p9.FileMode, _ uint32, _ uint32, _ p9.UID, _ p9.GID) (p9.QID, error) {
-	conf := l.attachPoint.conf
-	if conf.ROMount {
-		if conf.PanicOnWrite {
-			panic("attempt to write to RO mount")
-		}
-		return p9.QID{}, syscall.EROFS
+	if err := l.checkROMount(); err != nil {
+		return p9.QID{}, err
 	}
 
 	hostPath := path.Join(l.hostPath, name)
@@ -948,12 +932,8 @@ func (l *localFile) Mknod(name string, mode p9.FileMode, _ uint32, _ uint32, _ p
 
 // UnlinkAt implements p9.File.
 func (l *localFile) UnlinkAt(name string, flags uint32) error {
-	conf := l.attachPoint.conf
-	if conf.ROMount {
-		if conf.PanicOnWrite {
-			panic("attempt to write to RO mount")
-		}
-		return syscall.EBADF
+	if err := l.checkROMount(); err != nil {
+		return err
 	}
 
 	if err := unix.Unlinkat(l.file.FD(), name, int(flags)); err != nil {
@@ -1178,3 +1158,13 @@ func extractErrno(err error) syscall.Errno {
 	log.Debugf("Unknown error: %v, defaulting to EIO", err)
 	return syscall.EIO
 }
+
+func (l *localFile) checkROMount() error {
+	if conf := l.attachPoint.conf; conf.ROMount {
+		if conf.PanicOnWrite {
+			panic("attempt to write to RO mount")
+		}
+		return syscall.EROFS
+	}
+	return nil
+}
diff --git a/runsc/fsgofer/fsgofer_test.go b/runsc/fsgofer/fsgofer_test.go
index 94f167417b..8ed703584a 100644
--- a/runsc/fsgofer/fsgofer_test.go
+++ b/runsc/fsgofer/fsgofer_test.go
@@ -491,30 +491,50 @@ func TestLink(t *testing.T) {
 }
 
 func TestROMountChecks(t *testing.T) {
+	const want = syscall.EROFS
 	runCustom(t, allTypes, roConfs, func(t *testing.T, s state) {
-		if _, _, _, _, err := s.file.Create("some_file", p9.ReadWrite, 0777, p9.UID(os.Getuid()), p9.GID(os.Getgid())); err != syscall.EBADF {
-			t.Errorf("%v: Create() should have failed, got: %v, expected: syscall.EBADF", s, err)
+		if s.fileType != syscall.S_IFLNK {
+			if _, _, _, err := s.file.Open(p9.WriteOnly); err != want {
+				t.Errorf("Open() should have failed, got: %v, expected: %v", err, want)
+			}
+			if _, _, _, err := s.file.Open(p9.ReadWrite); err != want {
+				t.Errorf("Open() should have failed, got: %v, expected: %v", err, want)
+			}
+			if _, _, _, err := s.file.Open(p9.ReadOnly | p9.OpenTruncate); err != want {
+				t.Errorf("Open() should have failed, got: %v, expected: %v", err, want)
+			}
+			f, _, _, err := s.file.Open(p9.ReadOnly)
+			if err != nil {
+				t.Errorf("Open() failed: %v", err)
+			}
+			if f != nil {
+				_ = f.Close()
+			}
+		}
+
+		if _, _, _, _, err := s.file.Create("some_file", p9.ReadWrite, 0777, p9.UID(os.Getuid()), p9.GID(os.Getgid())); err != want {
+			t.Errorf("Create() should have failed, got: %v, expected: %v", err, want)
 		}
-		if _, err := s.file.Mkdir("some_dir", 0777, p9.UID(os.Getuid()), p9.GID(os.Getgid())); err != syscall.EBADF {
-			t.Errorf("%v: MkDir() should have failed, got: %v, expected: syscall.EBADF", s, err)
+		if _, err := s.file.Mkdir("some_dir", 0777, p9.UID(os.Getuid()), p9.GID(os.Getgid())); err != want {
+			t.Errorf("MkDir() should have failed, got: %v, expected: %v", err, want)
 		}
-		if err := s.file.RenameAt("some_file", s.file, "other_file"); err != syscall.EBADF {
-			t.Errorf("%v: Rename() should have failed, got: %v, expected: syscall.EBADF", s, err)
+		if err := s.file.RenameAt("some_file", s.file, "other_file"); err != want {
+			t.Errorf("Rename() should have failed, got: %v, expected: %v", err, want)
 		}
-		if _, err := s.file.Symlink("some_place", "some_symlink", p9.UID(os.Getuid()), p9.GID(os.Getgid())); err != syscall.EBADF {
-			t.Errorf("%v: Symlink() should have failed, got: %v, expected: syscall.EBADF", s, err)
+		if _, err := s.file.Symlink("some_place", "some_symlink", p9.UID(os.Getuid()), p9.GID(os.Getgid())); err != want {
+			t.Errorf("Symlink() should have failed, got: %v, expected: %v", err, want)
 		}
-		if err := s.file.UnlinkAt("some_file", 0); err != syscall.EBADF {
-			t.Errorf("%v: UnlinkAt() should have failed, got: %v, expected: syscall.EBADF", s, err)
+		if err := s.file.UnlinkAt("some_file", 0); err != want {
+			t.Errorf("UnlinkAt() should have failed, got: %v, expected: %v", err, want)
 		}
-		if err := s.file.Link(s.file, "some_link"); err != syscall.EBADF {
-			t.Errorf("%v: Link() should have failed, got: %v, expected: syscall.EBADF", s, err)
+		if err := s.file.Link(s.file, "some_link"); err != want {
+			t.Errorf("Link() should have failed, got: %v, expected: %v", err, want)
 		}
 
 		valid := p9.SetAttrMask{Size: true}
 		attr := p9.SetAttr{Size: 0}
-		if err := s.file.SetAttr(valid, attr); err != syscall.EBADF {
-			t.Errorf("%v: SetAttr() should have failed, got: %v, expected: syscall.EBADF", s, err)
+		if err := s.file.SetAttr(valid, attr); err != want {
+			t.Errorf("SetAttr() should have failed, got: %v, expected: %v", err, want)
 		}
 	})
 }
@@ -522,6 +542,9 @@ func TestROMountChecks(t *testing.T) {
 func TestROMountPanics(t *testing.T) {
 	conf := Config{ROMount: true, PanicOnWrite: true}
 	runCustom(t, allTypes, []Config{conf}, func(t *testing.T, s state) {
+		if s.fileType != syscall.S_IFLNK {
+			assertPanic(t, func() { s.file.Open(p9.WriteOnly) })
+		}
 		assertPanic(t, func() { s.file.Create("some_file", p9.ReadWrite, 0777, p9.UID(os.Getuid()), p9.GID(os.Getgid())) })
 		assertPanic(t, func() { s.file.Mkdir("some_dir", 0777, p9.UID(os.Getuid()), p9.GID(os.Getgid())) })
 		assertPanic(t, func() { s.file.RenameAt("some_file", s.file, "other_file") })
@@ -741,3 +764,36 @@ func TestDoubleAttachError(t *testing.T) {
 		t.Fatalf("Attach should have failed, got %v want non-nil", err)
 	}
 }
+
+func TestTruncate(t *testing.T) {
+	runCustom(t, []uint32{syscall.S_IFDIR}, rwConfs, func(t *testing.T, s state) {
+		child, err := createFile(s.file, "test")
+		if err != nil {
+			t.Fatalf("createFile() failed, err: %v", err)
+		}
+		defer child.Close()
+		want := []byte("foobar")
+		w, err := child.WriteAt(want, 0)
+		if err != nil {
+			t.Fatalf("Write() failed, err: %v", err)
+		}
+		if w != len(want) {
+			t.Fatalf("Write() was partial, got: %d, expected: %d", w, len(want))
+		}
+
+		_, l, err := s.file.Walk([]string{"test"})
+		if err != nil {
+			t.Fatalf("Walk(%s) failed, err: %v", "test", err)
+		}
+		if _, _, _, err := l.Open(p9.ReadOnly | p9.OpenTruncate); err != nil {
+			t.Fatalf("Open() failed, err: %v", err)
+		}
+		_, mask, attr, err := l.GetAttr(p9.AttrMask{Size: true})
+		if !mask.Size {
+			t.Fatalf("GetAttr() didn't return size: %+v", mask)
+		}
+		if attr.Size != 0 {
+			t.Fatalf("truncate didn't work, want: 0, got: %d", attr.Size)
+		}
+	})
+}

From 79d819a62c1db623ee8cb8f7df07c2d4702fd016 Mon Sep 17 00:00:00 2001
From: Jamie Liu <jamieliu@google.com>
Date: Tue, 18 Aug 2020 14:34:15 -0700
Subject: [PATCH 019/211] Avoid holding locks when opening files in VFS2.

Fixes #3243, #3521

PiperOrigin-RevId: 327308890
---
 pkg/sentry/fsimpl/gofer/filesystem.go   | 32 +++++++++++++------
 pkg/sentry/fsimpl/kernfs/filesystem.go  | 28 +++++++++++++++--
 pkg/sentry/fsimpl/overlay/filesystem.go | 42 ++++++++++++++++++-------
 pkg/sentry/fsimpl/tmpfs/filesystem.go   | 23 ++++++++++++--
 4 files changed, 100 insertions(+), 25 deletions(-)

diff --git a/pkg/sentry/fsimpl/gofer/filesystem.go b/pkg/sentry/fsimpl/gofer/filesystem.go
index a3903db338..9a90351e5b 100644
--- a/pkg/sentry/fsimpl/gofer/filesystem.go
+++ b/pkg/sentry/fsimpl/gofer/filesystem.go
@@ -834,7 +834,14 @@ func (fs *filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vf
 
 	var ds *[]*dentry
 	fs.renameMu.RLock()
-	defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
+	unlocked := false
+	unlock := func() {
+		if !unlocked {
+			fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
+			unlocked = true
+		}
+	}
+	defer unlock()
 
 	start := rp.Start().Impl().(*dentry)
 	if !start.cachedMetadataAuthoritative() {
@@ -851,7 +858,10 @@ func (fs *filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vf
 		if mustCreate {
 			return nil, syserror.EEXIST
 		}
-		return start.openLocked(ctx, rp, &opts)
+		start.IncRef()
+		defer start.DecRef(ctx)
+		unlock()
+		return start.open(ctx, rp, &opts)
 	}
 
 afterTrailingSymlink:
@@ -901,11 +911,15 @@ afterTrailingSymlink:
 	if rp.MustBeDir() && !child.isDir() {
 		return nil, syserror.ENOTDIR
 	}
-	return child.openLocked(ctx, rp, &opts)
+	child.IncRef()
+	defer child.DecRef(ctx)
+	unlock()
+	return child.open(ctx, rp, &opts)
 }
 
-// Preconditions: fs.renameMu must be locked.
-func (d *dentry) openLocked(ctx context.Context, rp *vfs.ResolvingPath, opts *vfs.OpenOptions) (*vfs.FileDescription, error) {
+// Preconditions: The caller must hold no locks (since opening pipes may block
+// indefinitely).
+func (d *dentry) open(ctx context.Context, rp *vfs.ResolvingPath, opts *vfs.OpenOptions) (*vfs.FileDescription, error) {
 	ats := vfs.AccessTypesForOpenFlags(opts)
 	if err := d.checkPermissions(rp.Credentials(), ats); err != nil {
 		return nil, err
@@ -968,7 +982,7 @@ func (d *dentry) openLocked(ctx context.Context, rp *vfs.ResolvingPath, opts *vf
 			return nil, syserror.ENXIO
 		}
 		if d.fs.iopts.OpenSocketsByConnecting {
-			return d.connectSocketLocked(ctx, opts)
+			return d.openSocketByConnecting(ctx, opts)
 		}
 	case linux.S_IFIFO:
 		if d.isSynthetic() {
@@ -977,7 +991,7 @@ func (d *dentry) openLocked(ctx context.Context, rp *vfs.ResolvingPath, opts *vf
 	}
 
 	if vfd == nil {
-		if vfd, err = d.openSpecialFileLocked(ctx, mnt, opts); err != nil {
+		if vfd, err = d.openSpecialFile(ctx, mnt, opts); err != nil {
 			return nil, err
 		}
 	}
@@ -996,7 +1010,7 @@ func (d *dentry) openLocked(ctx context.Context, rp *vfs.ResolvingPath, opts *vf
 	return vfd, err
 }
 
-func (d *dentry) connectSocketLocked(ctx context.Context, opts *vfs.OpenOptions) (*vfs.FileDescription, error) {
+func (d *dentry) openSocketByConnecting(ctx context.Context, opts *vfs.OpenOptions) (*vfs.FileDescription, error) {
 	if opts.Flags&linux.O_DIRECT != 0 {
 		return nil, syserror.EINVAL
 	}
@@ -1016,7 +1030,7 @@ func (d *dentry) connectSocketLocked(ctx context.Context, opts *vfs.OpenOptions)
 	return fd, nil
 }
 
-func (d *dentry) openSpecialFileLocked(ctx context.Context, mnt *vfs.Mount, opts *vfs.OpenOptions) (*vfs.FileDescription, error) {
+func (d *dentry) openSpecialFile(ctx context.Context, mnt *vfs.Mount, opts *vfs.OpenOptions) (*vfs.FileDescription, error) {
 	ats := vfs.AccessTypesForOpenFlags(opts)
 	if opts.Flags&linux.O_DIRECT != 0 {
 		return nil, syserror.EINVAL
diff --git a/pkg/sentry/fsimpl/kernfs/filesystem.go b/pkg/sentry/fsimpl/kernfs/filesystem.go
index d7edb63425..3e5192edd4 100644
--- a/pkg/sentry/fsimpl/kernfs/filesystem.go
+++ b/pkg/sentry/fsimpl/kernfs/filesystem.go
@@ -397,15 +397,21 @@ func (fs *Filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vf
 	// Do not create new file.
 	if opts.Flags&linux.O_CREAT == 0 {
 		fs.mu.RLock()
-		defer fs.processDeferredDecRefs(ctx)
-		defer fs.mu.RUnlock()
 		vfsd, inode, err := fs.walkExistingLocked(ctx, rp)
 		if err != nil {
+			fs.mu.RUnlock()
+			fs.processDeferredDecRefs(ctx)
 			return nil, err
 		}
 		if err := inode.CheckPermissions(ctx, rp.Credentials(), ats); err != nil {
+			fs.mu.RUnlock()
+			fs.processDeferredDecRefs(ctx)
 			return nil, err
 		}
+		inode.IncRef()
+		defer inode.DecRef(ctx)
+		fs.mu.RUnlock()
+		fs.processDeferredDecRefs(ctx)
 		return inode.Open(ctx, rp, vfsd, opts)
 	}
 
@@ -414,7 +420,14 @@ func (fs *Filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vf
 	vfsd := rp.Start()
 	inode := vfsd.Impl().(*Dentry).inode
 	fs.mu.Lock()
-	defer fs.mu.Unlock()
+	unlocked := false
+	unlock := func() {
+		if !unlocked {
+			fs.mu.Unlock()
+			unlocked = true
+		}
+	}
+	defer unlock()
 	if rp.Done() {
 		if rp.MustBeDir() {
 			return nil, syserror.EISDIR
@@ -425,6 +438,9 @@ func (fs *Filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vf
 		if err := inode.CheckPermissions(ctx, rp.Credentials(), ats); err != nil {
 			return nil, err
 		}
+		inode.IncRef()
+		defer inode.DecRef(ctx)
+		unlock()
 		return inode.Open(ctx, rp, vfsd, opts)
 	}
 afterTrailingSymlink:
@@ -466,6 +482,9 @@ afterTrailingSymlink:
 		}
 		child := childVFSD.Impl().(*Dentry)
 		parentVFSD.Impl().(*Dentry).InsertChild(pc, child)
+		child.inode.IncRef()
+		defer child.inode.DecRef(ctx)
+		unlock()
 		return child.inode.Open(ctx, rp, childVFSD, opts)
 	}
 	if err != nil {
@@ -499,6 +518,9 @@ afterTrailingSymlink:
 	if err := child.inode.CheckPermissions(ctx, rp.Credentials(), ats); err != nil {
 		return nil, err
 	}
+	child.inode.IncRef()
+	defer child.inode.DecRef(ctx)
+	unlock()
 	return child.inode.Open(ctx, rp, &child.vfsd, opts)
 }
 
diff --git a/pkg/sentry/fsimpl/overlay/filesystem.go b/pkg/sentry/fsimpl/overlay/filesystem.go
index 986b36ead4..86d0164b4e 100644
--- a/pkg/sentry/fsimpl/overlay/filesystem.go
+++ b/pkg/sentry/fsimpl/overlay/filesystem.go
@@ -717,17 +717,33 @@ func (fs *filesystem) MknodAt(ctx context.Context, rp *vfs.ResolvingPath, opts v
 func (fs *filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
 	mayCreate := opts.Flags&linux.O_CREAT != 0
 	mustCreate := opts.Flags&(linux.O_CREAT|linux.O_EXCL) == (linux.O_CREAT | linux.O_EXCL)
+	mayWrite := vfs.AccessTypesForOpenFlags(&opts).MayWrite()
 
 	var ds *[]*dentry
 	fs.renameMu.RLock()
-	defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
+	unlocked := false
+	unlock := func() {
+		if !unlocked {
+			fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
+			unlocked = true
+		}
+	}
+	defer unlock()
 
 	start := rp.Start().Impl().(*dentry)
 	if rp.Done() {
 		if mustCreate {
 			return nil, syserror.EEXIST
 		}
-		return start.openLocked(ctx, rp, &opts)
+		if mayWrite {
+			if err := start.copyUpLocked(ctx); err != nil {
+				return nil, err
+			}
+		}
+		start.IncRef()
+		defer start.DecRef(ctx)
+		unlock()
+		return start.openCopiedUp(ctx, rp, &opts)
 	}
 
 afterTrailingSymlink:
@@ -767,20 +783,24 @@ afterTrailingSymlink:
 		start = parent
 		goto afterTrailingSymlink
 	}
-	return child.openLocked(ctx, rp, &opts)
+	if mayWrite {
+		if err := child.copyUpLocked(ctx); err != nil {
+			return nil, err
+		}
+	}
+	child.IncRef()
+	defer child.DecRef(ctx)
+	unlock()
+	return child.openCopiedUp(ctx, rp, &opts)
 }
 
-// Preconditions: fs.renameMu must be locked.
-func (d *dentry) openLocked(ctx context.Context, rp *vfs.ResolvingPath, opts *vfs.OpenOptions) (*vfs.FileDescription, error) {
+// Preconditions: If vfs.AccessTypesForOpenFlags(opts).MayWrite(), then d has
+// been copied up.
+func (d *dentry) openCopiedUp(ctx context.Context, rp *vfs.ResolvingPath, opts *vfs.OpenOptions) (*vfs.FileDescription, error) {
 	ats := vfs.AccessTypesForOpenFlags(opts)
 	if err := d.checkPermissions(rp.Credentials(), ats); err != nil {
 		return nil, err
 	}
-	if ats.MayWrite() {
-		if err := d.copyUpLocked(ctx); err != nil {
-			return nil, err
-		}
-	}
 	mnt := rp.Mount()
 
 	// Directory FDs open FDs from each layer when directory entries are read,
@@ -792,7 +812,7 @@ func (d *dentry) openLocked(ctx context.Context, rp *vfs.ResolvingPath, opts *vf
 			return nil, syserror.EISDIR
 		}
 		// Can't open directories writably.
-		if ats&vfs.MayWrite != 0 {
+		if ats.MayWrite() {
 			return nil, syserror.EISDIR
 		}
 		if opts.Flags&linux.O_DIRECT != 0 {
diff --git a/pkg/sentry/fsimpl/tmpfs/filesystem.go b/pkg/sentry/fsimpl/tmpfs/filesystem.go
index cb8b2d9440..b0ec177e69 100644
--- a/pkg/sentry/fsimpl/tmpfs/filesystem.go
+++ b/pkg/sentry/fsimpl/tmpfs/filesystem.go
@@ -307,18 +307,28 @@ func (fs *filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vf
 	// don't need fs.mu for writing.
 	if opts.Flags&linux.O_CREAT == 0 {
 		fs.mu.RLock()
-		defer fs.mu.RUnlock()
 		d, err := resolveLocked(ctx, rp)
 		if err != nil {
+			fs.mu.RUnlock()
 			return nil, err
 		}
+		d.IncRef()
+		defer d.DecRef(ctx)
+		fs.mu.RUnlock()
 		return d.open(ctx, rp, &opts, false /* afterCreate */)
 	}
 
 	mustCreate := opts.Flags&linux.O_EXCL != 0
 	start := rp.Start().Impl().(*dentry)
 	fs.mu.Lock()
-	defer fs.mu.Unlock()
+	unlocked := false
+	unlock := func() {
+		if !unlocked {
+			fs.mu.Unlock()
+			unlocked = true
+		}
+	}
+	defer unlock()
 	if rp.Done() {
 		// Reject attempts to open mount root directory with O_CREAT.
 		if rp.MustBeDir() {
@@ -327,6 +337,9 @@ func (fs *filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vf
 		if mustCreate {
 			return nil, syserror.EEXIST
 		}
+		start.IncRef()
+		defer start.DecRef(ctx)
+		unlock()
 		return start.open(ctx, rp, &opts, false /* afterCreate */)
 	}
 afterTrailingSymlink:
@@ -364,6 +377,7 @@ afterTrailingSymlink:
 		creds := rp.Credentials()
 		child := fs.newDentry(fs.newRegularFile(creds.EffectiveKUID, creds.EffectiveKGID, opts.Mode))
 		parentDir.insertChildLocked(child, name)
+		unlock()
 		fd, err := child.open(ctx, rp, &opts, true)
 		if err != nil {
 			return nil, err
@@ -392,9 +406,14 @@ afterTrailingSymlink:
 	if rp.MustBeDir() && !child.inode.isDir() {
 		return nil, syserror.ENOTDIR
 	}
+	child.IncRef()
+	defer child.DecRef(ctx)
+	unlock()
 	return child.open(ctx, rp, &opts, false)
 }
 
+// Preconditions: The caller must hold no locks (since opening pipes may block
+// indefinitely).
 func (d *dentry) open(ctx context.Context, rp *vfs.ResolvingPath, opts *vfs.OpenOptions, afterCreate bool) (*vfs.FileDescription, error) {
 	ats := vfs.AccessTypesForOpenFlags(opts)
 	if !afterCreate {

From 3e6d607ee45b817d146c6a5b791a64608c1e9d0c Mon Sep 17 00:00:00 2001
From: Nayana Bidari <nybidari@google.com>
Date: Tue, 18 Aug 2020 15:57:48 -0700
Subject: [PATCH 020/211] RACK: Create a new list for segments.

RACK requires the segments to be in the order of their transmission
or retransmission times. This cl creates a new list and moves the
retransmitted segments to the end of the list.

PiperOrigin-RevId: 327325153
---
 pkg/tcpip/transport/tcp/BUILD       | 17 +++++++++++-
 pkg/tcpip/transport/tcp/connect.go  | 11 ++++++++
 pkg/tcpip/transport/tcp/endpoint.go |  4 +--
 pkg/tcpip/transport/tcp/segment.go  | 23 +++++++++++-----
 pkg/tcpip/transport/tcp/snd.go      | 41 ++++++++++++++++++-----------
 5 files changed, 72 insertions(+), 24 deletions(-)

diff --git a/pkg/tcpip/transport/tcp/BUILD b/pkg/tcpip/transport/tcp/BUILD
index 234fb95ce2..bde071f2a6 100644
--- a/pkg/tcpip/transport/tcp/BUILD
+++ b/pkg/tcpip/transport/tcp/BUILD
@@ -11,7 +11,8 @@ go_template_instance(
     template = "//pkg/ilist:generic_list",
     types = {
         "Element": "*segment",
-        "Linker": "*segment",
+        "ElementMapper": "segmentMapper",
+        "Linker": "*segmentEntry",
     },
 )
 
@@ -27,6 +28,19 @@ go_template_instance(
     },
 )
 
+go_template_instance(
+    name = "tcp_rack_segment_list",
+    out = "tcp_rack_segment_list.go",
+    package = "tcp",
+    prefix = "rackSegment",
+    template = "//pkg/ilist:generic_list",
+    types = {
+        "Element": "*segment",
+        "ElementMapper": "rackSegmentMapper",
+        "Linker": "*rackSegmentEntry",
+    },
+)
+
 go_library(
     name = "tcp",
     srcs = [
@@ -55,6 +69,7 @@ go_library(
         "snd.go",
         "snd_state.go",
         "tcp_endpoint_list.go",
+        "tcp_rack_segment_list.go",
         "tcp_segment_list.go",
         "timer.go",
     ],
diff --git a/pkg/tcpip/transport/tcp/connect.go b/pkg/tcpip/transport/tcp/connect.go
index 290172ac98..87980c0a19 100644
--- a/pkg/tcpip/transport/tcp/connect.go
+++ b/pkg/tcpip/transport/tcp/connect.go
@@ -924,7 +924,18 @@ func (e *endpoint) handleWrite() *tcpip.Error {
 
 	first := e.sndQueue.Front()
 	if first != nil {
+		lastSeg := e.snd.writeList.Back()
 		e.snd.writeList.PushBackList(&e.sndQueue)
+		if lastSeg == nil {
+			lastSeg = e.snd.writeList.Front()
+		} else {
+			lastSeg = lastSeg.segEntry.Next()
+		}
+		// Add new segments to rcList, as rcList and writeList should
+		// be consistent.
+		for seg := lastSeg; seg != nil; seg = seg.segEntry.Next() {
+			e.snd.rcList.PushBack(seg)
+		}
 		e.sndBufInQueue = 0
 	}
 
diff --git a/pkg/tcpip/transport/tcp/endpoint.go b/pkg/tcpip/transport/tcp/endpoint.go
index 1ccedebcca..21a4b6e2f4 100644
--- a/pkg/tcpip/transport/tcp/endpoint.go
+++ b/pkg/tcpip/transport/tcp/endpoint.go
@@ -1428,7 +1428,7 @@ func (e *endpoint) Peek(vec [][]byte) (int64, tcpip.ControlMessages, *tcpip.Erro
 	vec = append([][]byte(nil), vec...)
 
 	var num int64
-	for s := e.rcvList.Front(); s != nil; s = s.Next() {
+	for s := e.rcvList.Front(); s != nil; s = s.segEntry.Next() {
 		views := s.data.Views()
 
 		for i := s.viewToDeliver; i < len(views); i++ {
@@ -2249,7 +2249,7 @@ func (e *endpoint) connect(addr tcpip.FullAddress, handshake bool, run bool) *tc
 	if !handshake {
 		e.segmentQueue.mu.Lock()
 		for _, l := range []segmentList{e.segmentQueue.list, e.sndQueue, e.snd.writeList} {
-			for s := l.Front(); s != nil; s = s.Next() {
+			for s := l.Front(); s != nil; s = s.segEntry.Next() {
 				s.id = e.ID
 				s.route = r.Clone()
 				e.sndWaker.Assert()
diff --git a/pkg/tcpip/transport/tcp/segment.go b/pkg/tcpip/transport/tcp/segment.go
index 94307d31a8..a20755f785 100644
--- a/pkg/tcpip/transport/tcp/segment.go
+++ b/pkg/tcpip/transport/tcp/segment.go
@@ -30,12 +30,13 @@ import (
 //
 // +stateify savable
 type segment struct {
-	segmentEntry
-	refCnt int32
-	id     stack.TransportEndpointID `state:"manual"`
-	route  stack.Route               `state:"manual"`
-	data   buffer.VectorisedView     `state:".(buffer.VectorisedView)"`
-	hdr    header.TCP
+	segEntry     segmentEntry
+	rackSegEntry rackSegmentEntry
+	refCnt       int32
+	id           stack.TransportEndpointID `state:"manual"`
+	route        stack.Route               `state:"manual"`
+	data         buffer.VectorisedView     `state:".(buffer.VectorisedView)"`
+	hdr          header.TCP
 	// views is used as buffer for data when its length is large
 	// enough to store a VectorisedView.
 	views [8]buffer.View `state:"nosave"`
@@ -61,6 +62,16 @@ type segment struct {
 	xmitCount uint32
 }
 
+// segmentMapper is the ElementMapper for the writeList.
+type segmentMapper struct{}
+
+func (segmentMapper) linkerFor(seg *segment) *segmentEntry { return &seg.segEntry }
+
+// rackSegmentMapper is the ElementMapper for the rcList.
+type rackSegmentMapper struct{}
+
+func (rackSegmentMapper) linkerFor(seg *segment) *rackSegmentEntry { return &seg.rackSegEntry }
+
 func newSegment(r *stack.Route, id stack.TransportEndpointID, pkt *stack.PacketBuffer) *segment {
 	s := &segment{
 		refCnt: 1,
diff --git a/pkg/tcpip/transport/tcp/snd.go b/pkg/tcpip/transport/tcp/snd.go
index c55589c459..31151f23d4 100644
--- a/pkg/tcpip/transport/tcp/snd.go
+++ b/pkg/tcpip/transport/tcp/snd.go
@@ -154,6 +154,7 @@ type sender struct {
 	closed      bool
 	writeNext   *segment
 	writeList   segmentList
+	rcList      rackSegmentList
 	resendTimer timer       `state:"nosave"`
 	resendWaker sleep.Waker `state:"nosave"`
 
@@ -367,7 +368,7 @@ func (s *sender) updateMaxPayloadSize(mtu, count int) {
 
 	// Rewind writeNext to the first segment exceeding the MTU. Do nothing
 	// if it is already before such a packet.
-	for seg := s.writeList.Front(); seg != nil; seg = seg.Next() {
+	for seg := s.writeList.Front(); seg != nil; seg = seg.segEntry.Next() {
 		if seg == s.writeNext {
 			// We got to writeNext before we could find a segment
 			// exceeding the MTU.
@@ -622,6 +623,7 @@ func (s *sender) splitSeg(seg *segment, size int) {
 	nSeg.data.TrimFront(size)
 	nSeg.sequenceNumber.UpdateForward(seqnum.Size(size))
 	s.writeList.InsertAfter(seg, nSeg)
+	s.rcList.InsertAfter(seg, nSeg)
 
 	// The segment being split does not carry PUSH flag because it is
 	// followed by the newly split segment.
@@ -653,7 +655,7 @@ func (s *sender) NextSeg(nextSegHint *segment) (nextSeg, hint *segment, rescueRt
 	var s3 *segment
 	var s4 *segment
 	// Step 1.
-	for seg := nextSegHint; seg != nil; seg = seg.Next() {
+	for seg := nextSegHint; seg != nil; seg = seg.segEntry.Next() {
 		// Stop iteration if we hit a segment that has never been
 		// transmitted (i.e. either it has no assigned sequence number
 		// or if it does have one, it's >= the next sequence number
@@ -683,7 +685,7 @@ func (s *sender) NextSeg(nextSegHint *segment) (nextSeg, hint *segment, rescueRt
 				// NextSeg():
 				//     (1.c) IsLost(S2) returns true.
 				if s.ep.scoreboard.IsLost(segSeq) {
-					return seg, seg.Next(), false
+					return seg, seg.segEntry.Next(), false
 				}
 
 				// NextSeg():
@@ -697,7 +699,7 @@ func (s *sender) NextSeg(nextSegHint *segment) (nextSeg, hint *segment, rescueRt
 				// SHOULD be returned.
 				if s3 == nil {
 					s3 = seg
-					hint = seg.Next()
+					hint = seg.segEntry.Next()
 				}
 			}
 			// NextSeg():
@@ -731,7 +733,7 @@ func (s *sender) NextSeg(nextSegHint *segment) (nextSeg, hint *segment, rescueRt
 	// range of one segment of up to SMSS octets of
 	// previously unsent data starting with sequence number
 	// HighData+1 MUST be returned."
-	for seg := s.writeNext; seg != nil; seg = seg.Next() {
+	for seg := s.writeNext; seg != nil; seg = seg.segEntry.Next() {
 		if s.isAssignedSequenceNumber(seg) && seg.sequenceNumber.LessThan(s.sndNxt) {
 			continue
 		}
@@ -773,15 +775,16 @@ func (s *sender) maybeSendSegment(seg *segment, limit int, end seqnum.Value) (se
 			// triggering bugs in poorly written DNS
 			// implementations.
 			var nextTooBig bool
-			for seg.Next() != nil && seg.Next().data.Size() != 0 {
-				if seg.data.Size()+seg.Next().data.Size() > available {
+			for seg.segEntry.Next() != nil && seg.segEntry.Next().data.Size() != 0 {
+				if seg.data.Size()+seg.segEntry.Next().data.Size() > available {
 					nextTooBig = true
 					break
 				}
-				seg.data.Append(seg.Next().data)
+				seg.data.Append(seg.segEntry.Next().data)
 
 				// Consume the segment that we just merged in.
-				s.writeList.Remove(seg.Next())
+				s.writeList.Remove(seg.segEntry.Next())
+				s.rcList.Remove(seg.rackSegEntry.Next())
 			}
 			if !nextTooBig && seg.data.Size() < available {
 				// Segment is not full.
@@ -948,7 +951,7 @@ func (s *sender) handleSACKRecovery(limit int, end seqnum.Value) (dataSent bool)
 			}
 			dataSent = true
 			s.outstanding++
-			s.writeNext = nextSeg.Next()
+			s.writeNext = nextSeg.segEntry.Next()
 			continue
 		}
 
@@ -961,6 +964,7 @@ func (s *sender) handleSACKRecovery(limit int, end seqnum.Value) (dataSent bool)
 		// transmitted in (C.1)."
 		s.outstanding++
 		dataSent = true
+
 		s.sendSegment(nextSeg)
 
 		segEnd := nextSeg.sequenceNumber.Add(nextSeg.logicalLen())
@@ -1035,7 +1039,7 @@ func (s *sender) sendData() {
 	if s.fr.active && s.ep.sackPermitted {
 		dataSent = s.handleSACKRecovery(s.maxPayloadSize, end)
 	} else {
-		for seg := s.writeNext; seg != nil && s.outstanding < s.sndCwnd; seg = seg.Next() {
+		for seg := s.writeNext; seg != nil && s.outstanding < s.sndCwnd; seg = seg.segEntry.Next() {
 			cwndLimit := (s.sndCwnd - s.outstanding) * s.maxPayloadSize
 			if cwndLimit < limit {
 				limit = cwndLimit
@@ -1043,7 +1047,7 @@ func (s *sender) sendData() {
 			if s.isAssignedSequenceNumber(seg) && s.ep.sackPermitted && s.ep.scoreboard.IsSACKED(seg.sackBlock()) {
 				// Move writeNext along so that we don't try and scan data that
 				// has already been SACKED.
-				s.writeNext = seg.Next()
+				s.writeNext = seg.segEntry.Next()
 				continue
 			}
 			if sent := s.maybeSendSegment(seg, limit, end); !sent {
@@ -1051,7 +1055,7 @@ func (s *sender) sendData() {
 			}
 			dataSent = true
 			s.outstanding += s.pCount(seg)
-			s.writeNext = seg.Next()
+			s.writeNext = seg.segEntry.Next()
 		}
 	}
 
@@ -1182,7 +1186,7 @@ func (s *sender) SetPipe() {
 	}
 	pipe := 0
 	smss := seqnum.Size(s.ep.scoreboard.SMSS())
-	for s1 := s.writeList.Front(); s1 != nil && s1.data.Size() != 0 && s.isAssignedSequenceNumber(s1); s1 = s1.Next() {
+	for s1 := s.writeList.Front(); s1 != nil && s1.data.Size() != 0 && s.isAssignedSequenceNumber(s1); s1 = s1.segEntry.Next() {
 		// With GSO each segment can be much larger than SMSS. So check the segment
 		// in SMSS sized ranges.
 		segEnd := s1.sequenceNumber.Add(seqnum.Size(s1.data.Size()))
@@ -1384,7 +1388,7 @@ func (s *sender) handleRcvdSegment(rcvdSeg *segment) {
 			}
 
 			if s.writeNext == seg {
-				s.writeNext = seg.Next()
+				s.writeNext = seg.segEntry.Next()
 			}
 
 			// Update the RACK fields if SACK is enabled.
@@ -1393,6 +1397,7 @@ func (s *sender) handleRcvdSegment(rcvdSeg *segment) {
 			}
 
 			s.writeList.Remove(seg)
+			s.rcList.Remove(seg)
 
 			// if SACK is enabled then Only reduce outstanding if
 			// the segment was not previously SACKED as these have
@@ -1460,6 +1465,12 @@ func (s *sender) sendSegment(seg *segment) *tcpip.Error {
 		if s.sndCwnd < s.sndSsthresh {
 			s.ep.stack.Stats().TCP.SlowStartRetransmits.Increment()
 		}
+
+		// Move the segment which has to be retransmitted to the end of the list, as
+		// RACK requires the segments in the order of their transmission times.
+		// See: https://tools.ietf.org/html/draft-ietf-tcpm-rack-09#section-6.2
+		// Step 5
+		s.rcList.PushBack(seg)
 	}
 	seg.xmitTime = time.Now()
 	seg.xmitCount++

From e38e559d23ddc92cf9f94ad4207e2ea48bf4c8df Mon Sep 17 00:00:00 2001
From: Zach Koopmans <zkoopmans@google.com>
Date: Tue, 18 Aug 2020 16:02:52 -0700
Subject: [PATCH 021/211] Fix return value in shm_test for opensource

Some machines return 128 + signal for failures. Accept that
as a valid result.

PiperOrigin-RevId: 327326113
---
 test/syscalls/linux/shm.cc | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/test/syscalls/linux/shm.cc b/test/syscalls/linux/shm.cc
index c7fdbb9246..d6e8b3e594 100644
--- a/test/syscalls/linux/shm.cc
+++ b/test/syscalls/linux/shm.cc
@@ -29,6 +29,8 @@ namespace testing {
 namespace {
 
 using ::testing::_;
+using ::testing::AnyOf;
+using ::testing::Eq;
 
 const uint64_t kAllocSize = kPageSize * 128ULL;
 
@@ -394,7 +396,8 @@ TEST(ShmDeathTest, SegmentNotAccessibleAfterDetach) {
   };
 
   EXPECT_THAT(InForkedProcess(rest),
-              IsPosixErrorOkAndHolds(W_EXITCODE(0, SIGSEGV)));
+              IsPosixErrorOkAndHolds(AnyOf(Eq(W_EXITCODE(0, SIGSEGV)),
+                                           Eq(W_EXITCODE(0, 128 + SIGSEGV)))));
 }
 
 TEST(ShmTest, RequestingSegmentSmallerThanSHMMINFails) {

From cf38ac1c6c32b4c514bb56fc70073788835f3766 Mon Sep 17 00:00:00 2001
From: Fabricio Voznika <fvoznika@google.com>
Date: Tue, 18 Aug 2020 18:50:24 -0700
Subject: [PATCH 022/211] Add more information to panic when device ID don't
 match

PiperOrigin-RevId: 327351357
---
 pkg/sentry/device/device.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pkg/sentry/device/device.go b/pkg/sentry/device/device.go
index f45b2bd2b6..6ca9dc79f2 100644
--- a/pkg/sentry/device/device.go
+++ b/pkg/sentry/device/device.go
@@ -256,7 +256,7 @@ func (m *MultiDevice) Load(key MultiDeviceKey, value uint64) bool {
 	}
 	if k, exists := m.rcache[value]; exists && k != key {
 		// Should never happen.
-		panic("MultiDevice's caches are inconsistent")
+		panic(fmt.Sprintf("MultiDevice's caches are inconsistent, current: %+v, previous: %+v", key, k))
 	}
 
 	// Cache value at key.

From dbade2ec35aa836e8e3b02b0c145b658662728b3 Mon Sep 17 00:00:00 2001
From: Fabricio Voznika <fvoznika@google.com>
Date: Tue, 18 Aug 2020 18:51:19 -0700
Subject: [PATCH 023/211] Don't set atime if mount is readonly

Updates #1035

PiperOrigin-RevId: 327351475
---
 pkg/sentry/fsimpl/gofer/time.go |  2 +-
 pkg/sentry/vfs/mount.go         | 11 ++++++-----
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/pkg/sentry/fsimpl/gofer/time.go b/pkg/sentry/fsimpl/gofer/time.go
index 2cb8191b9f..e59d07e900 100644
--- a/pkg/sentry/fsimpl/gofer/time.go
+++ b/pkg/sentry/fsimpl/gofer/time.go
@@ -38,7 +38,7 @@ func statxTimestampFromDentry(ns int64) linux.StatxTimestamp {
 
 // Preconditions: d.cachedMetadataAuthoritative() == true.
 func (d *dentry) touchAtime(mnt *vfs.Mount) {
-	if mnt.Flags.NoATime {
+	if mnt.Flags.NoATime || mnt.ReadOnly() {
 		return
 	}
 	if err := mnt.CheckBeginWrite(); err != nil {
diff --git a/pkg/sentry/vfs/mount.go b/pkg/sentry/vfs/mount.go
index d1d29d0cdd..67dfba9867 100644
--- a/pkg/sentry/vfs/mount.go
+++ b/pkg/sentry/vfs/mount.go
@@ -114,7 +114,7 @@ func (mnt *Mount) Options() MountOptions {
 	defer mnt.vfs.mountMu.Unlock()
 	return MountOptions{
 		Flags:    mnt.Flags,
-		ReadOnly: mnt.readOnly(),
+		ReadOnly: mnt.ReadOnly(),
 	}
 }
 
@@ -688,7 +688,8 @@ func (mnt *Mount) setReadOnlyLocked(ro bool) error {
 	return nil
 }
 
-func (mnt *Mount) readOnly() bool {
+// ReadOnly returns true if mount is readonly.
+func (mnt *Mount) ReadOnly() bool {
 	return atomic.LoadInt64(&mnt.writers) < 0
 }
 
@@ -756,7 +757,7 @@ func (vfs *VirtualFilesystem) GenerateProcMounts(ctx context.Context, taskRootDi
 		}
 
 		opts := "rw"
-		if mnt.readOnly() {
+		if mnt.ReadOnly() {
 			opts = "ro"
 		}
 		if mnt.Flags.NoATime {
@@ -844,7 +845,7 @@ func (vfs *VirtualFilesystem) GenerateProcMountInfo(ctx context.Context, taskRoo
 
 		// (6) Mount options.
 		opts := "rw"
-		if mnt.readOnly() {
+		if mnt.ReadOnly() {
 			opts = "ro"
 		}
 		if mnt.Flags.NoATime {
@@ -883,7 +884,7 @@ func superBlockOpts(mountPath string, mnt *Mount) string {
 	// gVisor doesn't (yet) have a concept of super block options, so we
 	// use the ro/rw bit from the mount flag.
 	opts := "rw"
-	if mnt.readOnly() {
+	if mnt.ReadOnly() {
 		opts = "ro"
 	}
 

From b99fce30936ea42bf00e2c7270dc4ca797f766eb Mon Sep 17 00:00:00 2001
From: Dean Deng <deandeng@google.com>
Date: Tue, 18 Aug 2020 19:26:55 -0700
Subject: [PATCH 024/211] Move ERESTART* error definitions to syserror package.

This is needed to avoid circular dependencies between the vfs and kernel
packages.

PiperOrigin-RevId: 327355524
---
 pkg/sentry/fs/host/tty.go                    |  2 +-
 pkg/sentry/fsimpl/host/tty.go                |  2 +-
 pkg/sentry/kernel/task_run.go                |  5 +-
 pkg/sentry/kernel/task_signals.go            |  8 +--
 pkg/sentry/kernel/task_syscall.go            | 69 +-------------------
 pkg/sentry/syscalls/linux/error.go           |  2 +-
 pkg/sentry/syscalls/linux/sys_file.go        |  4 +-
 pkg/sentry/syscalls/linux/sys_futex.go       |  8 +--
 pkg/sentry/syscalls/linux/sys_getdents.go    |  2 +-
 pkg/sentry/syscalls/linux/sys_lseek.go       |  2 +-
 pkg/sentry/syscalls/linux/sys_mmap.go        |  2 +-
 pkg/sentry/syscalls/linux/sys_poll.go        |  8 +--
 pkg/sentry/syscalls/linux/sys_read.go        | 12 ++--
 pkg/sentry/syscalls/linux/sys_signal.go      |  4 +-
 pkg/sentry/syscalls/linux/sys_socket.go      | 14 ++--
 pkg/sentry/syscalls/linux/sys_splice.go      |  6 +-
 pkg/sentry/syscalls/linux/sys_sync.go        |  6 +-
 pkg/sentry/syscalls/linux/sys_thread.go      |  2 +-
 pkg/sentry/syscalls/linux/sys_time.go        |  4 +-
 pkg/sentry/syscalls/linux/sys_write.go       | 12 ++--
 pkg/sentry/syscalls/linux/vfs2/poll.go       |  8 +--
 pkg/sentry/syscalls/linux/vfs2/read_write.go | 20 +++---
 pkg/sentry/syscalls/linux/vfs2/socket.go     | 14 ++--
 pkg/sentry/syscalls/linux/vfs2/sync.go       |  2 +-
 pkg/syserror/syserror.go                     | 67 +++++++++++++++++++
 25 files changed, 143 insertions(+), 142 deletions(-)

diff --git a/pkg/sentry/fs/host/tty.go b/pkg/sentry/fs/host/tty.go
index b5229098cc..e29ae00f24 100644
--- a/pkg/sentry/fs/host/tty.go
+++ b/pkg/sentry/fs/host/tty.go
@@ -358,7 +358,7 @@ func (t *TTYFileOperations) checkChange(ctx context.Context, sig linux.Signal) e
 	//
 	// Linux ignores the result of kill_pgrp().
 	_ = pg.SendSignal(kernel.SignalInfoPriv(sig))
-	return kernel.ERESTARTSYS
+	return syserror.ERESTARTSYS
 }
 
 // LINT.ThenChange(../../fsimpl/host/tty.go)
diff --git a/pkg/sentry/fsimpl/host/tty.go b/pkg/sentry/fsimpl/host/tty.go
index d372c60cb9..27cbd30599 100644
--- a/pkg/sentry/fsimpl/host/tty.go
+++ b/pkg/sentry/fsimpl/host/tty.go
@@ -376,7 +376,7 @@ func (t *TTYFileDescription) checkChange(ctx context.Context, sig linux.Signal)
 	//
 	// Linux ignores the result of kill_pgrp().
 	_ = pg.SendSignal(kernel.SignalInfoPriv(sig))
-	return kernel.ERESTARTSYS
+	return syserror.ERESTARTSYS
 }
 
 // LockPOSIX implements vfs.FileDescriptionImpl.LockPOSIX.
diff --git a/pkg/sentry/kernel/task_run.go b/pkg/sentry/kernel/task_run.go
index abaf292164..aa3a573c00 100644
--- a/pkg/sentry/kernel/task_run.go
+++ b/pkg/sentry/kernel/task_run.go
@@ -26,6 +26,7 @@ import (
 	ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
 	"gvisor.dev/gvisor/pkg/sentry/memmap"
 	"gvisor.dev/gvisor/pkg/sentry/platform"
+	"gvisor.dev/gvisor/pkg/syserror"
 	"gvisor.dev/gvisor/pkg/usermem"
 )
 
@@ -189,8 +190,8 @@ func (app *runApp) execute(t *Task) taskRunState {
 	// a pending signal, causing another interruption, but that signal should
 	// not interact with the interrupted syscall.)
 	if t.haveSyscallReturn {
-		if sre, ok := SyscallRestartErrnoFromReturn(t.Arch().Return()); ok {
-			if sre == ERESTART_RESTARTBLOCK {
+		if sre, ok := syserror.SyscallRestartErrnoFromReturn(t.Arch().Return()); ok {
+			if sre == syserror.ERESTART_RESTARTBLOCK {
 				t.Debugf("Restarting syscall %d with restart block after errno %d: not interrupted by handled signal", t.Arch().SyscallNo(), sre)
 				t.Arch().RestartSyscallWithRestartBlock()
 			} else {
diff --git a/pkg/sentry/kernel/task_signals.go b/pkg/sentry/kernel/task_signals.go
index cff2a83653..d6a2040bc9 100644
--- a/pkg/sentry/kernel/task_signals.go
+++ b/pkg/sentry/kernel/task_signals.go
@@ -159,7 +159,7 @@ func (t *Task) deliverSignal(info *arch.SignalInfo, act arch.SignalAct) taskRunS
 	sigact := computeAction(linux.Signal(info.Signo), act)
 
 	if t.haveSyscallReturn {
-		if sre, ok := SyscallRestartErrnoFromReturn(t.Arch().Return()); ok {
+		if sre, ok := syserror.SyscallRestartErrnoFromReturn(t.Arch().Return()); ok {
 			// Signals that are ignored, cause a thread group stop, or
 			// terminate the thread group do not interact with interrupted
 			// syscalls; in Linux terms, they are never returned to the signal
@@ -168,11 +168,11 @@ func (t *Task) deliverSignal(info *arch.SignalInfo, act arch.SignalAct) taskRunS
 			// signal that is actually handled (by userspace).
 			if sigact == SignalActionHandler {
 				switch {
-				case sre == ERESTARTNOHAND:
+				case sre == syserror.ERESTARTNOHAND:
 					fallthrough
-				case sre == ERESTART_RESTARTBLOCK:
+				case sre == syserror.ERESTART_RESTARTBLOCK:
 					fallthrough
-				case (sre == ERESTARTSYS && !act.IsRestart()):
+				case (sre == syserror.ERESTARTSYS && !act.IsRestart()):
 					t.Debugf("Not restarting syscall %d after errno %d: interrupted by signal %d", t.Arch().SyscallNo(), sre, info.Signo)
 					t.Arch().SetReturn(uintptr(-ExtractErrno(syserror.EINTR, -1)))
 				default:
diff --git a/pkg/sentry/kernel/task_syscall.go b/pkg/sentry/kernel/task_syscall.go
index a5903b0b58..2dbf865471 100644
--- a/pkg/sentry/kernel/task_syscall.go
+++ b/pkg/sentry/kernel/task_syscall.go
@@ -29,75 +29,8 @@ import (
 	"gvisor.dev/gvisor/pkg/usermem"
 )
 
-// SyscallRestartErrno represents a ERESTART* errno defined in the Linux's kernel
-// include/linux/errno.h. These errnos are never returned to userspace
-// directly, but are used to communicate the expected behavior of an
-// interrupted syscall from the syscall to signal handling.
-type SyscallRestartErrno int
-
-// These numeric values are significant because ptrace syscall exit tracing can
-// observe them.
-//
-// For all of the following errnos, if the syscall is not interrupted by a
-// signal delivered to a user handler, the syscall is restarted.
-const (
-	// ERESTARTSYS is returned by an interrupted syscall to indicate that it
-	// should be converted to EINTR if interrupted by a signal delivered to a
-	// user handler without SA_RESTART set, and restarted otherwise.
-	ERESTARTSYS = SyscallRestartErrno(512)
-
-	// ERESTARTNOINTR is returned by an interrupted syscall to indicate that it
-	// should always be restarted.
-	ERESTARTNOINTR = SyscallRestartErrno(513)
-
-	// ERESTARTNOHAND is returned by an interrupted syscall to indicate that it
-	// should be converted to EINTR if interrupted by a signal delivered to a
-	// user handler, and restarted otherwise.
-	ERESTARTNOHAND = SyscallRestartErrno(514)
-
-	// ERESTART_RESTARTBLOCK is returned by an interrupted syscall to indicate
-	// that it should be restarted using a custom function. The interrupted
-	// syscall must register a custom restart function by calling
-	// Task.SetRestartSyscallFn.
-	ERESTART_RESTARTBLOCK = SyscallRestartErrno(516)
-)
-
 var vsyscallCount = metric.MustCreateNewUint64Metric("/kernel/vsyscall_count", false /* sync */, "Number of times vsyscalls were invoked by the application")
 
-// Error implements error.Error.
-func (e SyscallRestartErrno) Error() string {
-	// Descriptions are borrowed from strace.
-	switch e {
-	case ERESTARTSYS:
-		return "to be restarted if SA_RESTART is set"
-	case ERESTARTNOINTR:
-		return "to be restarted"
-	case ERESTARTNOHAND:
-		return "to be restarted if no handler"
-	case ERESTART_RESTARTBLOCK:
-		return "interrupted by signal"
-	default:
-		return "(unknown interrupt error)"
-	}
-}
-
-// SyscallRestartErrnoFromReturn returns the SyscallRestartErrno represented by
-// rv, the value in a syscall return register.
-func SyscallRestartErrnoFromReturn(rv uintptr) (SyscallRestartErrno, bool) {
-	switch int(rv) {
-	case -int(ERESTARTSYS):
-		return ERESTARTSYS, true
-	case -int(ERESTARTNOINTR):
-		return ERESTARTNOINTR, true
-	case -int(ERESTARTNOHAND):
-		return ERESTARTNOHAND, true
-	case -int(ERESTART_RESTARTBLOCK):
-		return ERESTART_RESTARTBLOCK, true
-	default:
-		return 0, false
-	}
-}
-
 // SyscallRestartBlock represents the restart block for a syscall restartable
 // with a custom function. It encapsulates the state required to restart a
 // syscall across a S/R.
@@ -447,7 +380,7 @@ func ExtractErrno(err error, sysno int) int {
 		return 0
 	case syscall.Errno:
 		return int(err)
-	case SyscallRestartErrno:
+	case syserror.SyscallRestartErrno:
 		return int(err)
 	case *memmap.BusError:
 		// Bus errors may generate SIGBUS, but for syscalls they still
diff --git a/pkg/sentry/syscalls/linux/error.go b/pkg/sentry/syscalls/linux/error.go
index 46060f6f50..dab6207c0d 100644
--- a/pkg/sentry/syscalls/linux/error.go
+++ b/pkg/sentry/syscalls/linux/error.go
@@ -147,7 +147,7 @@ func handleIOErrorImpl(t *kernel.Task, partialResult bool, err, intr error, op s
 	}
 
 	switch err.(type) {
-	case kernel.SyscallRestartErrno:
+	case syserror.SyscallRestartErrno:
 		// Identical to the EINTR case.
 		return true, nil
 	}
diff --git a/pkg/sentry/syscalls/linux/sys_file.go b/pkg/sentry/syscalls/linux/sys_file.go
index 1bc9b184e2..2564226892 100644
--- a/pkg/sentry/syscalls/linux/sys_file.go
+++ b/pkg/sentry/syscalls/linux/sys_file.go
@@ -184,7 +184,7 @@ func openAt(t *kernel.Task, dirFD int32, addr usermem.Addr, flags uint) (fd uint
 
 		file, err := d.Inode.GetFile(t, d, fileFlags)
 		if err != nil {
-			return syserror.ConvertIntr(err, kernel.ERESTARTSYS)
+			return syserror.ConvertIntr(err, syserror.ERESTARTSYS)
 		}
 		defer file.DecRef(t)
 
@@ -414,7 +414,7 @@ func createAt(t *kernel.Task, dirFD int32, addr usermem.Addr, flags uint, mode l
 			// Create a new fs.File.
 			newFile, err = found.Inode.GetFile(t, found, fileFlags)
 			if err != nil {
-				return syserror.ConvertIntr(err, kernel.ERESTARTSYS)
+				return syserror.ConvertIntr(err, syserror.ERESTARTSYS)
 			}
 			defer newFile.DecRef(t)
 		case syserror.ENOENT:
diff --git a/pkg/sentry/syscalls/linux/sys_futex.go b/pkg/sentry/syscalls/linux/sys_futex.go
index 9d1b2edb15..12b2fa6902 100644
--- a/pkg/sentry/syscalls/linux/sys_futex.go
+++ b/pkg/sentry/syscalls/linux/sys_futex.go
@@ -74,7 +74,7 @@ func futexWaitAbsolute(t *kernel.Task, clockRealtime bool, ts linux.Timespec, fo
 	}
 
 	t.Futex().WaitComplete(w, t)
-	return 0, syserror.ConvertIntr(err, kernel.ERESTARTSYS)
+	return 0, syserror.ConvertIntr(err, syserror.ERESTARTSYS)
 }
 
 // futexWaitDuration performs a FUTEX_WAIT, blocking until the wait is
@@ -110,7 +110,7 @@ func futexWaitDuration(t *kernel.Task, duration time.Duration, forever bool, add
 
 	// The wait duration was absolute, restart with the original arguments.
 	if forever {
-		return 0, kernel.ERESTARTSYS
+		return 0, syserror.ERESTARTSYS
 	}
 
 	// The wait duration was relative, restart with the remaining duration.
@@ -121,7 +121,7 @@ func futexWaitDuration(t *kernel.Task, duration time.Duration, forever bool, add
 		val:      val,
 		mask:     mask,
 	})
-	return 0, kernel.ERESTART_RESTARTBLOCK
+	return 0, syserror.ERESTART_RESTARTBLOCK
 }
 
 func futexLockPI(t *kernel.Task, ts linux.Timespec, forever bool, addr usermem.Addr, private bool) error {
@@ -149,7 +149,7 @@ func futexLockPI(t *kernel.Task, ts linux.Timespec, forever bool, addr usermem.A
 	}
 
 	t.Futex().WaitComplete(w, t)
-	return syserror.ConvertIntr(err, kernel.ERESTARTSYS)
+	return syserror.ConvertIntr(err, syserror.ERESTARTSYS)
 }
 
 func tryLockPI(t *kernel.Task, addr usermem.Addr, private bool) error {
diff --git a/pkg/sentry/syscalls/linux/sys_getdents.go b/pkg/sentry/syscalls/linux/sys_getdents.go
index f5699e55d2..59004cefed 100644
--- a/pkg/sentry/syscalls/linux/sys_getdents.go
+++ b/pkg/sentry/syscalls/linux/sys_getdents.go
@@ -82,7 +82,7 @@ func getdents(t *kernel.Task, fd int32, addr usermem.Addr, size int, f func(*dir
 	ds := newDirentSerializer(f, w, t.Arch(), size)
 	rerr := dir.Readdir(t, ds)
 
-	switch err := handleIOError(t, ds.Written() > 0, rerr, kernel.ERESTARTSYS, "getdents", dir); err {
+	switch err := handleIOError(t, ds.Written() > 0, rerr, syserror.ERESTARTSYS, "getdents", dir); err {
 	case nil:
 		dir.Dirent.InotifyEvent(linux.IN_ACCESS, 0)
 		return uintptr(ds.Written()), nil
diff --git a/pkg/sentry/syscalls/linux/sys_lseek.go b/pkg/sentry/syscalls/linux/sys_lseek.go
index 1c38f8f4f2..0046347cb6 100644
--- a/pkg/sentry/syscalls/linux/sys_lseek.go
+++ b/pkg/sentry/syscalls/linux/sys_lseek.go
@@ -48,7 +48,7 @@ func Lseek(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
 	}
 
 	offset, serr := file.Seek(t, sw, offset)
-	err := handleIOError(t, false /* partialResult */, serr, kernel.ERESTARTSYS, "lseek", file)
+	err := handleIOError(t, false /* partialResult */, serr, syserror.ERESTARTSYS, "lseek", file)
 	if err != nil {
 		return 0, nil, err
 	}
diff --git a/pkg/sentry/syscalls/linux/sys_mmap.go b/pkg/sentry/syscalls/linux/sys_mmap.go
index 72786b0325..d0109baa4f 100644
--- a/pkg/sentry/syscalls/linux/sys_mmap.go
+++ b/pkg/sentry/syscalls/linux/sys_mmap.go
@@ -267,7 +267,7 @@ func Msync(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
 	})
 	// MSync calls fsync, the same interrupt conversion rules apply, see
 	// mm/msync.c, fsync POSIX.1-2008.
-	return 0, nil, syserror.ConvertIntr(err, kernel.ERESTARTSYS)
+	return 0, nil, syserror.ConvertIntr(err, syserror.ERESTARTSYS)
 }
 
 // Mlock implements linux syscall mlock(2).
diff --git a/pkg/sentry/syscalls/linux/sys_poll.go b/pkg/sentry/syscalls/linux/sys_poll.go
index 3435bdf77d..789e2ed5bc 100644
--- a/pkg/sentry/syscalls/linux/sys_poll.go
+++ b/pkg/sentry/syscalls/linux/sys_poll.go
@@ -410,7 +410,7 @@ func poll(t *kernel.Task, pfdAddr usermem.Addr, nfds uint, timeout time.Duration
 			nfds:    nfds,
 			timeout: remainingTimeout,
 		})
-		return 0, kernel.ERESTART_RESTARTBLOCK
+		return 0, syserror.ERESTART_RESTARTBLOCK
 	}
 	return n, err
 }
@@ -464,7 +464,7 @@ func Ppoll(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
 	// Note that this means that if err is nil but copyErr is not, copyErr is
 	// ignored. This is consistent with Linux.
 	if err == syserror.EINTR && copyErr == nil {
-		err = kernel.ERESTARTNOHAND
+		err = syserror.ERESTARTNOHAND
 	}
 	return n, nil, err
 }
@@ -494,7 +494,7 @@ func Select(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal
 	copyErr := copyOutTimevalRemaining(t, startNs, timeout, timevalAddr)
 	// See comment in Ppoll.
 	if err == syserror.EINTR && copyErr == nil {
-		err = kernel.ERESTARTNOHAND
+		err = syserror.ERESTARTNOHAND
 	}
 	return n, nil, err
 }
@@ -539,7 +539,7 @@ func Pselect(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca
 	copyErr := copyOutTimespecRemaining(t, startNs, timeout, timespecAddr)
 	// See comment in Ppoll.
 	if err == syserror.EINTR && copyErr == nil {
-		err = kernel.ERESTARTNOHAND
+		err = syserror.ERESTARTNOHAND
 	}
 	return n, nil, err
 }
diff --git a/pkg/sentry/syscalls/linux/sys_read.go b/pkg/sentry/syscalls/linux/sys_read.go
index 3bbc3fa4b0..f655d3db1b 100644
--- a/pkg/sentry/syscalls/linux/sys_read.go
+++ b/pkg/sentry/syscalls/linux/sys_read.go
@@ -71,7 +71,7 @@ func Read(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallC
 
 	n, err := readv(t, file, dst)
 	t.IOUsage().AccountReadSyscall(n)
-	return uintptr(n), nil, handleIOError(t, n != 0, err, kernel.ERESTARTSYS, "read", file)
+	return uintptr(n), nil, handleIOError(t, n != 0, err, syserror.ERESTARTSYS, "read", file)
 }
 
 // Readahead implements readahead(2).
@@ -151,7 +151,7 @@ func Pread64(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca
 
 	n, err := preadv(t, file, dst, offset)
 	t.IOUsage().AccountReadSyscall(n)
-	return uintptr(n), nil, handleIOError(t, n != 0, err, kernel.ERESTARTSYS, "pread64", file)
+	return uintptr(n), nil, handleIOError(t, n != 0, err, syserror.ERESTARTSYS, "pread64", file)
 }
 
 // Readv implements linux syscall readv(2).
@@ -181,7 +181,7 @@ func Readv(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
 
 	n, err := readv(t, file, dst)
 	t.IOUsage().AccountReadSyscall(n)
-	return uintptr(n), nil, handleIOError(t, n != 0, err, kernel.ERESTARTSYS, "readv", file)
+	return uintptr(n), nil, handleIOError(t, n != 0, err, syserror.ERESTARTSYS, "readv", file)
 }
 
 // Preadv implements linux syscall preadv(2).
@@ -222,7 +222,7 @@ func Preadv(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal
 
 	n, err := preadv(t, file, dst, offset)
 	t.IOUsage().AccountReadSyscall(n)
-	return uintptr(n), nil, handleIOError(t, n != 0, err, kernel.ERESTARTSYS, "preadv", file)
+	return uintptr(n), nil, handleIOError(t, n != 0, err, syserror.ERESTARTSYS, "preadv", file)
 }
 
 // Preadv2 implements linux syscall preadv2(2).
@@ -280,12 +280,12 @@ func Preadv2(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca
 	if offset == -1 {
 		n, err := readv(t, file, dst)
 		t.IOUsage().AccountReadSyscall(n)
-		return uintptr(n), nil, handleIOError(t, n != 0, err, kernel.ERESTARTSYS, "preadv2", file)
+		return uintptr(n), nil, handleIOError(t, n != 0, err, syserror.ERESTARTSYS, "preadv2", file)
 	}
 
 	n, err := preadv(t, file, dst, offset)
 	t.IOUsage().AccountReadSyscall(n)
-	return uintptr(n), nil, handleIOError(t, n != 0, err, kernel.ERESTARTSYS, "preadv2", file)
+	return uintptr(n), nil, handleIOError(t, n != 0, err, syserror.ERESTARTSYS, "preadv2", file)
 }
 
 func readv(t *kernel.Task, f *fs.File, dst usermem.IOSequence) (int64, error) {
diff --git a/pkg/sentry/syscalls/linux/sys_signal.go b/pkg/sentry/syscalls/linux/sys_signal.go
index 20cb1a5cbe..e748d33d88 100644
--- a/pkg/sentry/syscalls/linux/sys_signal.go
+++ b/pkg/sentry/syscalls/linux/sys_signal.go
@@ -348,7 +348,7 @@ func Sigaltstack(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.S
 
 // Pause implements linux syscall pause(2).
 func Pause(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
-	return 0, nil, syserror.ConvertIntr(t.Block(nil), kernel.ERESTARTNOHAND)
+	return 0, nil, syserror.ConvertIntr(t.Block(nil), syserror.ERESTARTNOHAND)
 }
 
 // RtSigpending implements linux syscall rt_sigpending(2).
@@ -496,7 +496,7 @@ func RtSigsuspend(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.
 	t.SetSavedSignalMask(oldmask)
 
 	// Perform the wait.
-	return 0, nil, syserror.ConvertIntr(t.Block(nil), kernel.ERESTARTNOHAND)
+	return 0, nil, syserror.ConvertIntr(t.Block(nil), syserror.ERESTARTNOHAND)
 }
 
 // RestartSyscall implements the linux syscall restart_syscall(2).
diff --git a/pkg/sentry/syscalls/linux/sys_socket.go b/pkg/sentry/syscalls/linux/sys_socket.go
index fec1c19749..38f573c14b 100644
--- a/pkg/sentry/syscalls/linux/sys_socket.go
+++ b/pkg/sentry/syscalls/linux/sys_socket.go
@@ -285,7 +285,7 @@ func Connect(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca
 	}
 
 	blocking := !file.Flags().NonBlocking
-	return 0, nil, syserror.ConvertIntr(s.Connect(t, a, blocking).ToError(), kernel.ERESTARTSYS)
+	return 0, nil, syserror.ConvertIntr(s.Connect(t, a, blocking).ToError(), syserror.ERESTARTSYS)
 }
 
 // accept is the implementation of the accept syscall. It is called by accept
@@ -316,7 +316,7 @@ func accept(t *kernel.Task, fd int32, addr usermem.Addr, addrLen usermem.Addr, f
 	peerRequested := addrLen != 0
 	nfd, peer, peerLen, e := s.Accept(t, peerRequested, flags, blocking)
 	if e != nil {
-		return 0, syserror.ConvertIntr(e.ToError(), kernel.ERESTARTSYS)
+		return 0, syserror.ConvertIntr(e.ToError(), syserror.ERESTARTSYS)
 	}
 	if peerRequested {
 		// NOTE(magi): Linux does not give you an error if it can't
@@ -771,7 +771,7 @@ func recvSingleMsg(t *kernel.Task, s socket.Socket, msgPtr usermem.Addr, flags i
 	if msg.ControlLen == 0 && msg.NameLen == 0 {
 		n, mflags, _, _, cms, err := s.RecvMsg(t, dst, int(flags), haveDeadline, deadline, false, 0)
 		if err != nil {
-			return 0, syserror.ConvertIntr(err.ToError(), kernel.ERESTARTSYS)
+			return 0, syserror.ConvertIntr(err.ToError(), syserror.ERESTARTSYS)
 		}
 		if !cms.Unix.Empty() {
 			mflags |= linux.MSG_CTRUNC
@@ -793,7 +793,7 @@ func recvSingleMsg(t *kernel.Task, s socket.Socket, msgPtr usermem.Addr, flags i
 	}
 	n, mflags, sender, senderLen, cms, e := s.RecvMsg(t, dst, int(flags), haveDeadline, deadline, msg.NameLen != 0, msg.ControlLen)
 	if e != nil {
-		return 0, syserror.ConvertIntr(e.ToError(), kernel.ERESTARTSYS)
+		return 0, syserror.ConvertIntr(e.ToError(), syserror.ERESTARTSYS)
 	}
 	defer cms.Release(t)
 
@@ -882,7 +882,7 @@ func recvFrom(t *kernel.Task, fd int32, bufPtr usermem.Addr, bufLen uint64, flag
 	n, _, sender, senderLen, cm, e := s.RecvMsg(t, dst, int(flags), haveDeadline, deadline, nameLenPtr != 0, 0)
 	cm.Release(t)
 	if e != nil {
-		return 0, syserror.ConvertIntr(e.ToError(), kernel.ERESTARTSYS)
+		return 0, syserror.ConvertIntr(e.ToError(), syserror.ERESTARTSYS)
 	}
 
 	// Copy the address to the caller.
@@ -1064,7 +1064,7 @@ func sendSingleMsg(t *kernel.Task, s socket.Socket, file *fs.File, msgPtr userme
 
 	// Call the syscall implementation.
 	n, e := s.SendMsg(t, src, to, int(flags), haveDeadline, deadline, controlMessages)
-	err = handleIOError(t, n != 0, e.ToError(), kernel.ERESTARTSYS, "sendmsg", file)
+	err = handleIOError(t, n != 0, e.ToError(), syserror.ERESTARTSYS, "sendmsg", file)
 	if err != nil {
 		controlMessages.Release(t)
 	}
@@ -1124,7 +1124,7 @@ func sendTo(t *kernel.Task, fd int32, bufPtr usermem.Addr, bufLen uint64, flags
 
 	// Call the syscall implementation.
 	n, e := s.SendMsg(t, src, to, int(flags), haveDeadline, deadline, socket.ControlMessages{Unix: control.New(t, s, nil)})
-	return uintptr(n), handleIOError(t, n != 0, e.ToError(), kernel.ERESTARTSYS, "sendto", file)
+	return uintptr(n), handleIOError(t, n != 0, e.ToError(), syserror.ERESTARTSYS, "sendto", file)
 }
 
 // SendTo implements the linux syscall sendto(2).
diff --git a/pkg/sentry/syscalls/linux/sys_splice.go b/pkg/sentry/syscalls/linux/sys_splice.go
index b8846a10ac..c69941feb9 100644
--- a/pkg/sentry/syscalls/linux/sys_splice.go
+++ b/pkg/sentry/syscalls/linux/sys_splice.go
@@ -170,7 +170,7 @@ func Sendfile(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc
 
 	// We can only pass a single file to handleIOError, so pick inFile
 	// arbitrarily. This is used only for debugging purposes.
-	return uintptr(n), nil, handleIOError(t, false, err, kernel.ERESTARTSYS, "sendfile", inFile)
+	return uintptr(n), nil, handleIOError(t, false, err, syserror.ERESTARTSYS, "sendfile", inFile)
 }
 
 // Splice implements splice(2).
@@ -280,7 +280,7 @@ func Splice(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal
 	}
 
 	// See above; inFile is chosen arbitrarily here.
-	return uintptr(n), nil, handleIOError(t, n != 0, err, kernel.ERESTARTSYS, "splice", inFile)
+	return uintptr(n), nil, handleIOError(t, n != 0, err, syserror.ERESTARTSYS, "splice", inFile)
 }
 
 // Tee imlements tee(2).
@@ -333,5 +333,5 @@ func Tee(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallCo
 	}
 
 	// See above; inFile is chosen arbitrarily here.
-	return uintptr(n), nil, handleIOError(t, false, err, kernel.ERESTARTSYS, "tee", inFile)
+	return uintptr(n), nil, handleIOError(t, false, err, syserror.ERESTARTSYS, "tee", inFile)
 }
diff --git a/pkg/sentry/syscalls/linux/sys_sync.go b/pkg/sentry/syscalls/linux/sys_sync.go
index f2c0e5069d..048a21c6e7 100644
--- a/pkg/sentry/syscalls/linux/sys_sync.go
+++ b/pkg/sentry/syscalls/linux/sys_sync.go
@@ -57,7 +57,7 @@ func Fsync(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
 	defer file.DecRef(t)
 
 	err := file.Fsync(t, 0, fs.FileMaxOffset, fs.SyncAll)
-	return 0, nil, syserror.ConvertIntr(err, kernel.ERESTARTSYS)
+	return 0, nil, syserror.ConvertIntr(err, syserror.ERESTARTSYS)
 }
 
 // Fdatasync implements linux syscall fdatasync(2).
@@ -73,7 +73,7 @@ func Fdatasync(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys
 	defer file.DecRef(t)
 
 	err := file.Fsync(t, 0, fs.FileMaxOffset, fs.SyncData)
-	return 0, nil, syserror.ConvertIntr(err, kernel.ERESTARTSYS)
+	return 0, nil, syserror.ConvertIntr(err, syserror.ERESTARTSYS)
 }
 
 // SyncFileRange implements linux syscall sync_file_rage(2)
@@ -135,7 +135,7 @@ func SyncFileRange(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel
 		err = file.Fsync(t, offset, fs.FileMaxOffset, fs.SyncData)
 	}
 
-	return 0, nil, syserror.ConvertIntr(err, kernel.ERESTARTSYS)
+	return 0, nil, syserror.ConvertIntr(err, syserror.ERESTARTSYS)
 }
 
 // LINT.ThenChange(vfs2/sync.go)
diff --git a/pkg/sentry/syscalls/linux/sys_thread.go b/pkg/sentry/syscalls/linux/sys_thread.go
index 2d16e49332..1010960383 100644
--- a/pkg/sentry/syscalls/linux/sys_thread.go
+++ b/pkg/sentry/syscalls/linux/sys_thread.go
@@ -262,7 +262,7 @@ func parseCommonWaitOptions(wopts *kernel.WaitOptions, options int) error {
 		wopts.Events |= kernel.EventGroupContinue
 	}
 	if options&linux.WNOHANG == 0 {
-		wopts.BlockInterruptErr = kernel.ERESTARTSYS
+		wopts.BlockInterruptErr = syserror.ERESTARTSYS
 	}
 	if options&linux.WNOTHREAD == 0 {
 		wopts.SiblingChildren = true
diff --git a/pkg/sentry/syscalls/linux/sys_time.go b/pkg/sentry/syscalls/linux/sys_time.go
index 2d2aa08199..a2a24a027a 100644
--- a/pkg/sentry/syscalls/linux/sys_time.go
+++ b/pkg/sentry/syscalls/linux/sys_time.go
@@ -213,7 +213,7 @@ func clockNanosleepUntil(t *kernel.Task, c ktime.Clock, ts linux.Timespec) error
 		return nil
 	}
 
-	return syserror.ConvertIntr(err, kernel.ERESTARTNOHAND)
+	return syserror.ConvertIntr(err, syserror.ERESTARTNOHAND)
 }
 
 // clockNanosleepFor blocks for a specified duration.
@@ -254,7 +254,7 @@ func clockNanosleepFor(t *kernel.Task, c ktime.Clock, dur time.Duration, rem use
 			duration: remaining,
 			rem:      rem,
 		})
-		return kernel.ERESTART_RESTARTBLOCK
+		return syserror.ERESTART_RESTARTBLOCK
 	default:
 		panic(fmt.Sprintf("Impossible BlockWithTimer error %v", err))
 	}
diff --git a/pkg/sentry/syscalls/linux/sys_write.go b/pkg/sentry/syscalls/linux/sys_write.go
index 485526e281..95bfe66062 100644
--- a/pkg/sentry/syscalls/linux/sys_write.go
+++ b/pkg/sentry/syscalls/linux/sys_write.go
@@ -71,7 +71,7 @@ func Write(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
 
 	n, err := writev(t, file, src)
 	t.IOUsage().AccountWriteSyscall(n)
-	return uintptr(n), nil, handleIOError(t, n != 0, err, kernel.ERESTARTSYS, "write", file)
+	return uintptr(n), nil, handleIOError(t, n != 0, err, syserror.ERESTARTSYS, "write", file)
 }
 
 // Pwrite64 implements linux syscall pwrite64(2).
@@ -118,7 +118,7 @@ func Pwrite64(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc
 
 	n, err := pwritev(t, file, src, offset)
 	t.IOUsage().AccountWriteSyscall(n)
-	return uintptr(n), nil, handleIOError(t, n != 0, err, kernel.ERESTARTSYS, "pwrite64", file)
+	return uintptr(n), nil, handleIOError(t, n != 0, err, syserror.ERESTARTSYS, "pwrite64", file)
 }
 
 // Writev implements linux syscall writev(2).
@@ -148,7 +148,7 @@ func Writev(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal
 
 	n, err := writev(t, file, src)
 	t.IOUsage().AccountWriteSyscall(n)
-	return uintptr(n), nil, handleIOError(t, n != 0, err, kernel.ERESTARTSYS, "writev", file)
+	return uintptr(n), nil, handleIOError(t, n != 0, err, syserror.ERESTARTSYS, "writev", file)
 }
 
 // Pwritev implements linux syscall pwritev(2).
@@ -189,7 +189,7 @@ func Pwritev(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca
 
 	n, err := pwritev(t, file, src, offset)
 	t.IOUsage().AccountWriteSyscall(n)
-	return uintptr(n), nil, handleIOError(t, n != 0, err, kernel.ERESTARTSYS, "pwritev", file)
+	return uintptr(n), nil, handleIOError(t, n != 0, err, syserror.ERESTARTSYS, "pwritev", file)
 }
 
 // Pwritev2 implements linux syscall pwritev2(2).
@@ -250,12 +250,12 @@ func Pwritev2(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc
 	if offset == -1 {
 		n, err := writev(t, file, src)
 		t.IOUsage().AccountWriteSyscall(n)
-		return uintptr(n), nil, handleIOError(t, n != 0, err, kernel.ERESTARTSYS, "pwritev2", file)
+		return uintptr(n), nil, handleIOError(t, n != 0, err, syserror.ERESTARTSYS, "pwritev2", file)
 	}
 
 	n, err := pwritev(t, file, src, offset)
 	t.IOUsage().AccountWriteSyscall(n)
-	return uintptr(n), nil, handleIOError(t, n != 0, err, kernel.ERESTARTSYS, "pwritev2", file)
+	return uintptr(n), nil, handleIOError(t, n != 0, err, syserror.ERESTARTSYS, "pwritev2", file)
 }
 
 func writev(t *kernel.Task, f *fs.File, src usermem.IOSequence) (int64, error) {
diff --git a/pkg/sentry/syscalls/linux/vfs2/poll.go b/pkg/sentry/syscalls/linux/vfs2/poll.go
index 7b9d5e18ae..79ad640398 100644
--- a/pkg/sentry/syscalls/linux/vfs2/poll.go
+++ b/pkg/sentry/syscalls/linux/vfs2/poll.go
@@ -415,7 +415,7 @@ func poll(t *kernel.Task, pfdAddr usermem.Addr, nfds uint, timeout time.Duration
 			nfds:    nfds,
 			timeout: remainingTimeout,
 		})
-		return 0, kernel.ERESTART_RESTARTBLOCK
+		return 0, syserror.ERESTART_RESTARTBLOCK
 	}
 	return n, err
 }
@@ -462,7 +462,7 @@ func Ppoll(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
 	// Note that this means that if err is nil but copyErr is not, copyErr is
 	// ignored. This is consistent with Linux.
 	if err == syserror.EINTR && copyErr == nil {
-		err = kernel.ERESTARTNOHAND
+		err = syserror.ERESTARTNOHAND
 	}
 	return n, nil, err
 }
@@ -492,7 +492,7 @@ func Select(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal
 	copyErr := copyOutTimevalRemaining(t, startNs, timeout, timevalAddr)
 	// See comment in Ppoll.
 	if err == syserror.EINTR && copyErr == nil {
-		err = kernel.ERESTARTNOHAND
+		err = syserror.ERESTARTNOHAND
 	}
 	return n, nil, err
 }
@@ -533,7 +533,7 @@ func Pselect(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca
 	copyErr := copyOutTimespecRemaining(t, startNs, timeout, timespecAddr)
 	// See comment in Ppoll.
 	if err == syserror.EINTR && copyErr == nil {
-		err = kernel.ERESTARTNOHAND
+		err = syserror.ERESTARTNOHAND
 	}
 	return n, nil, err
 }
diff --git a/pkg/sentry/syscalls/linux/vfs2/read_write.go b/pkg/sentry/syscalls/linux/vfs2/read_write.go
index a905dae0a4..b77b29dcca 100644
--- a/pkg/sentry/syscalls/linux/vfs2/read_write.go
+++ b/pkg/sentry/syscalls/linux/vfs2/read_write.go
@@ -62,7 +62,7 @@ func Read(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallC
 
 	n, err := read(t, file, dst, vfs.ReadOptions{})
 	t.IOUsage().AccountReadSyscall(n)
-	return uintptr(n), nil, slinux.HandleIOErrorVFS2(t, n != 0, err, kernel.ERESTARTSYS, "read", file)
+	return uintptr(n), nil, slinux.HandleIOErrorVFS2(t, n != 0, err, syserror.ERESTARTSYS, "read", file)
 }
 
 // Readv implements Linux syscall readv(2).
@@ -87,7 +87,7 @@ func Readv(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
 
 	n, err := read(t, file, dst, vfs.ReadOptions{})
 	t.IOUsage().AccountReadSyscall(n)
-	return uintptr(n), nil, slinux.HandleIOErrorVFS2(t, n != 0, err, kernel.ERESTARTSYS, "readv", file)
+	return uintptr(n), nil, slinux.HandleIOErrorVFS2(t, n != 0, err, syserror.ERESTARTSYS, "readv", file)
 }
 
 func read(t *kernel.Task, file *vfs.FileDescription, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) {
@@ -174,7 +174,7 @@ func Pread64(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca
 
 	n, err := pread(t, file, dst, offset, vfs.ReadOptions{})
 	t.IOUsage().AccountReadSyscall(n)
-	return uintptr(n), nil, slinux.HandleIOErrorVFS2(t, n != 0, err, kernel.ERESTARTSYS, "pread64", file)
+	return uintptr(n), nil, slinux.HandleIOErrorVFS2(t, n != 0, err, syserror.ERESTARTSYS, "pread64", file)
 }
 
 // Preadv implements Linux syscall preadv(2).
@@ -205,7 +205,7 @@ func Preadv(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal
 
 	n, err := pread(t, file, dst, offset, vfs.ReadOptions{})
 	t.IOUsage().AccountReadSyscall(n)
-	return uintptr(n), nil, slinux.HandleIOErrorVFS2(t, n != 0, err, kernel.ERESTARTSYS, "preadv", file)
+	return uintptr(n), nil, slinux.HandleIOErrorVFS2(t, n != 0, err, syserror.ERESTARTSYS, "preadv", file)
 }
 
 // Preadv2 implements Linux syscall preadv2(2).
@@ -251,7 +251,7 @@ func Preadv2(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca
 		n, err = pread(t, file, dst, offset, opts)
 	}
 	t.IOUsage().AccountReadSyscall(n)
-	return uintptr(n), nil, slinux.HandleIOErrorVFS2(t, n != 0, err, kernel.ERESTARTSYS, "preadv2", file)
+	return uintptr(n), nil, slinux.HandleIOErrorVFS2(t, n != 0, err, syserror.ERESTARTSYS, "preadv2", file)
 }
 
 func pread(t *kernel.Task, file *vfs.FileDescription, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) {
@@ -332,7 +332,7 @@ func Write(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
 
 	n, err := write(t, file, src, vfs.WriteOptions{})
 	t.IOUsage().AccountWriteSyscall(n)
-	return uintptr(n), nil, slinux.HandleIOErrorVFS2(t, n != 0, err, kernel.ERESTARTSYS, "write", file)
+	return uintptr(n), nil, slinux.HandleIOErrorVFS2(t, n != 0, err, syserror.ERESTARTSYS, "write", file)
 }
 
 // Writev implements Linux syscall writev(2).
@@ -357,7 +357,7 @@ func Writev(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal
 
 	n, err := write(t, file, src, vfs.WriteOptions{})
 	t.IOUsage().AccountWriteSyscall(n)
-	return uintptr(n), nil, slinux.HandleIOErrorVFS2(t, n != 0, err, kernel.ERESTARTSYS, "writev", file)
+	return uintptr(n), nil, slinux.HandleIOErrorVFS2(t, n != 0, err, syserror.ERESTARTSYS, "writev", file)
 }
 
 func write(t *kernel.Task, file *vfs.FileDescription, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) {
@@ -444,7 +444,7 @@ func Pwrite64(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc
 
 	n, err := pwrite(t, file, src, offset, vfs.WriteOptions{})
 	t.IOUsage().AccountWriteSyscall(n)
-	return uintptr(n), nil, slinux.HandleIOErrorVFS2(t, n != 0, err, kernel.ERESTARTSYS, "pwrite64", file)
+	return uintptr(n), nil, slinux.HandleIOErrorVFS2(t, n != 0, err, syserror.ERESTARTSYS, "pwrite64", file)
 }
 
 // Pwritev implements Linux syscall pwritev(2).
@@ -475,7 +475,7 @@ func Pwritev(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca
 
 	n, err := pwrite(t, file, src, offset, vfs.WriteOptions{})
 	t.IOUsage().AccountReadSyscall(n)
-	return uintptr(n), nil, slinux.HandleIOErrorVFS2(t, n != 0, err, kernel.ERESTARTSYS, "pwritev", file)
+	return uintptr(n), nil, slinux.HandleIOErrorVFS2(t, n != 0, err, syserror.ERESTARTSYS, "pwritev", file)
 }
 
 // Pwritev2 implements Linux syscall pwritev2(2).
@@ -521,7 +521,7 @@ func Pwritev2(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc
 		n, err = pwrite(t, file, src, offset, opts)
 	}
 	t.IOUsage().AccountWriteSyscall(n)
-	return uintptr(n), nil, slinux.HandleIOErrorVFS2(t, n != 0, err, kernel.ERESTARTSYS, "pwritev2", file)
+	return uintptr(n), nil, slinux.HandleIOErrorVFS2(t, n != 0, err, syserror.ERESTARTSYS, "pwritev2", file)
 }
 
 func pwrite(t *kernel.Task, file *vfs.FileDescription, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) {
diff --git a/pkg/sentry/syscalls/linux/vfs2/socket.go b/pkg/sentry/syscalls/linux/vfs2/socket.go
index 4a68c64f32..a5032657a0 100644
--- a/pkg/sentry/syscalls/linux/vfs2/socket.go
+++ b/pkg/sentry/syscalls/linux/vfs2/socket.go
@@ -288,7 +288,7 @@ func Connect(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca
 	}
 
 	blocking := (file.StatusFlags() & linux.SOCK_NONBLOCK) == 0
-	return 0, nil, syserror.ConvertIntr(s.Connect(t, a, blocking).ToError(), kernel.ERESTARTSYS)
+	return 0, nil, syserror.ConvertIntr(s.Connect(t, a, blocking).ToError(), syserror.ERESTARTSYS)
 }
 
 // accept is the implementation of the accept syscall. It is called by accept
@@ -319,7 +319,7 @@ func accept(t *kernel.Task, fd int32, addr usermem.Addr, addrLen usermem.Addr, f
 	peerRequested := addrLen != 0
 	nfd, peer, peerLen, e := s.Accept(t, peerRequested, flags, blocking)
 	if e != nil {
-		return 0, syserror.ConvertIntr(e.ToError(), kernel.ERESTARTSYS)
+		return 0, syserror.ConvertIntr(e.ToError(), syserror.ERESTARTSYS)
 	}
 	if peerRequested {
 		// NOTE(magi): Linux does not give you an error if it can't
@@ -774,7 +774,7 @@ func recvSingleMsg(t *kernel.Task, s socket.SocketVFS2, msgPtr usermem.Addr, fla
 	if msg.ControlLen == 0 && msg.NameLen == 0 {
 		n, mflags, _, _, cms, err := s.RecvMsg(t, dst, int(flags), haveDeadline, deadline, false, 0)
 		if err != nil {
-			return 0, syserror.ConvertIntr(err.ToError(), kernel.ERESTARTSYS)
+			return 0, syserror.ConvertIntr(err.ToError(), syserror.ERESTARTSYS)
 		}
 		if !cms.Unix.Empty() {
 			mflags |= linux.MSG_CTRUNC
@@ -796,7 +796,7 @@ func recvSingleMsg(t *kernel.Task, s socket.SocketVFS2, msgPtr usermem.Addr, fla
 	}
 	n, mflags, sender, senderLen, cms, e := s.RecvMsg(t, dst, int(flags), haveDeadline, deadline, msg.NameLen != 0, msg.ControlLen)
 	if e != nil {
-		return 0, syserror.ConvertIntr(e.ToError(), kernel.ERESTARTSYS)
+		return 0, syserror.ConvertIntr(e.ToError(), syserror.ERESTARTSYS)
 	}
 	defer cms.Release(t)
 
@@ -885,7 +885,7 @@ func recvFrom(t *kernel.Task, fd int32, bufPtr usermem.Addr, bufLen uint64, flag
 	n, _, sender, senderLen, cm, e := s.RecvMsg(t, dst, int(flags), haveDeadline, deadline, nameLenPtr != 0, 0)
 	cm.Release(t)
 	if e != nil {
-		return 0, syserror.ConvertIntr(e.ToError(), kernel.ERESTARTSYS)
+		return 0, syserror.ConvertIntr(e.ToError(), syserror.ERESTARTSYS)
 	}
 
 	// Copy the address to the caller.
@@ -1067,7 +1067,7 @@ func sendSingleMsg(t *kernel.Task, s socket.SocketVFS2, file *vfs.FileDescriptio
 
 	// Call the syscall implementation.
 	n, e := s.SendMsg(t, src, to, int(flags), haveDeadline, deadline, controlMessages)
-	err = slinux.HandleIOErrorVFS2(t, n != 0, e.ToError(), kernel.ERESTARTSYS, "sendmsg", file)
+	err = slinux.HandleIOErrorVFS2(t, n != 0, e.ToError(), syserror.ERESTARTSYS, "sendmsg", file)
 	if err != nil {
 		controlMessages.Release(t)
 	}
@@ -1127,7 +1127,7 @@ func sendTo(t *kernel.Task, fd int32, bufPtr usermem.Addr, bufLen uint64, flags
 
 	// Call the syscall implementation.
 	n, e := s.SendMsg(t, src, to, int(flags), haveDeadline, deadline, socket.ControlMessages{Unix: control.New(t, s, nil)})
-	return uintptr(n), slinux.HandleIOErrorVFS2(t, n != 0, e.ToError(), kernel.ERESTARTSYS, "sendto", file)
+	return uintptr(n), slinux.HandleIOErrorVFS2(t, n != 0, e.ToError(), syserror.ERESTARTSYS, "sendto", file)
 }
 
 // SendTo implements the linux syscall sendto(2).
diff --git a/pkg/sentry/syscalls/linux/vfs2/sync.go b/pkg/sentry/syscalls/linux/vfs2/sync.go
index a6491ac37c..6e9b599e2b 100644
--- a/pkg/sentry/syscalls/linux/vfs2/sync.go
+++ b/pkg/sentry/syscalls/linux/vfs2/sync.go
@@ -108,7 +108,7 @@ func SyncFileRange(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel
 
 	if flags&linux.SYNC_FILE_RANGE_WAIT_AFTER != 0 {
 		if err := file.Sync(t); err != nil {
-			return 0, nil, syserror.ConvertIntr(err, kernel.ERESTARTSYS)
+			return 0, nil, syserror.ConvertIntr(err, syserror.ERESTARTSYS)
 		}
 	}
 	return 0, nil, nil
diff --git a/pkg/syserror/syserror.go b/pkg/syserror/syserror.go
index 798e07b01e..fe9f50169f 100644
--- a/pkg/syserror/syserror.go
+++ b/pkg/syserror/syserror.go
@@ -153,6 +153,73 @@ func ConvertIntr(err, intr error) error {
 	return err
 }
 
+// SyscallRestartErrno represents a ERESTART* errno defined in the Linux's kernel
+// include/linux/errno.h. These errnos are never returned to userspace
+// directly, but are used to communicate the expected behavior of an
+// interrupted syscall from the syscall to signal handling.
+type SyscallRestartErrno int
+
+// These numeric values are significant because ptrace syscall exit tracing can
+// observe them.
+//
+// For all of the following errnos, if the syscall is not interrupted by a
+// signal delivered to a user handler, the syscall is restarted.
+const (
+	// ERESTARTSYS is returned by an interrupted syscall to indicate that it
+	// should be converted to EINTR if interrupted by a signal delivered to a
+	// user handler without SA_RESTART set, and restarted otherwise.
+	ERESTARTSYS = SyscallRestartErrno(512)
+
+	// ERESTARTNOINTR is returned by an interrupted syscall to indicate that it
+	// should always be restarted.
+	ERESTARTNOINTR = SyscallRestartErrno(513)
+
+	// ERESTARTNOHAND is returned by an interrupted syscall to indicate that it
+	// should be converted to EINTR if interrupted by a signal delivered to a
+	// user handler, and restarted otherwise.
+	ERESTARTNOHAND = SyscallRestartErrno(514)
+
+	// ERESTART_RESTARTBLOCK is returned by an interrupted syscall to indicate
+	// that it should be restarted using a custom function. The interrupted
+	// syscall must register a custom restart function by calling
+	// Task.SetRestartSyscallFn.
+	ERESTART_RESTARTBLOCK = SyscallRestartErrno(516)
+)
+
+// Error implements error.Error.
+func (e SyscallRestartErrno) Error() string {
+	// Descriptions are borrowed from strace.
+	switch e {
+	case ERESTARTSYS:
+		return "to be restarted if SA_RESTART is set"
+	case ERESTARTNOINTR:
+		return "to be restarted"
+	case ERESTARTNOHAND:
+		return "to be restarted if no handler"
+	case ERESTART_RESTARTBLOCK:
+		return "interrupted by signal"
+	default:
+		return "(unknown interrupt error)"
+	}
+}
+
+// SyscallRestartErrnoFromReturn returns the SyscallRestartErrno represented by
+// rv, the value in a syscall return register.
+func SyscallRestartErrnoFromReturn(rv uintptr) (SyscallRestartErrno, bool) {
+	switch int(rv) {
+	case -int(ERESTARTSYS):
+		return ERESTARTSYS, true
+	case -int(ERESTARTNOINTR):
+		return ERESTARTNOINTR, true
+	case -int(ERESTARTNOHAND):
+		return ERESTARTNOHAND, true
+	case -int(ERESTART_RESTARTBLOCK):
+		return ERESTART_RESTARTBLOCK, true
+	default:
+		return 0, false
+	}
+}
+
 func init() {
 	AddErrorTranslation(ErrWouldBlock, syscall.EWOULDBLOCK)
 	AddErrorTranslation(ErrInterrupted, syscall.EINTR)

From ab98a35a9adb2df0359478b8898e78337e2d0392 Mon Sep 17 00:00:00 2001
From: Dean Deng <deandeng@google.com>
Date: Tue, 18 Aug 2020 21:52:08 -0700
Subject: [PATCH 025/211] Get rid of kernfs.Inode.Destroy.

This interface method is unneeded.

PiperOrigin-RevId: 327370325
---
 pkg/sentry/fsimpl/host/host.go              | 19 ++++++++-----------
 pkg/sentry/fsimpl/kernfs/inode_impl_util.go | 19 ++++++-------------
 pkg/sentry/fsimpl/kernfs/kernfs.go          |  4 ----
 3 files changed, 14 insertions(+), 28 deletions(-)

diff --git a/pkg/sentry/fsimpl/host/host.go b/pkg/sentry/fsimpl/host/host.go
index bd6caba063..56869f59a1 100644
--- a/pkg/sentry/fsimpl/host/host.go
+++ b/pkg/sentry/fsimpl/host/host.go
@@ -432,17 +432,14 @@ func (i *inode) SetStat(ctx context.Context, fs *vfs.Filesystem, creds *auth.Cre
 
 // DecRef implements kernfs.Inode.
 func (i *inode) DecRef(ctx context.Context) {
-	i.AtomicRefCount.DecRefWithDestructor(ctx, i.Destroy)
-}
-
-// Destroy implements kernfs.Inode.
-func (i *inode) Destroy(context.Context) {
-	if i.wouldBlock {
-		fdnotifier.RemoveFD(int32(i.hostFD))
-	}
-	if err := unix.Close(i.hostFD); err != nil {
-		log.Warningf("failed to close host fd %d: %v", i.hostFD, err)
-	}
+	i.AtomicRefCount.DecRefWithDestructor(ctx, func(context.Context) {
+		if i.wouldBlock {
+			fdnotifier.RemoveFD(int32(i.hostFD))
+		}
+		if err := unix.Close(i.hostFD); err != nil {
+			log.Warningf("failed to close host fd %d: %v", i.hostFD, err)
+		}
+	})
 }
 
 // Open implements kernfs.Inode.
diff --git a/pkg/sentry/fsimpl/kernfs/inode_impl_util.go b/pkg/sentry/fsimpl/kernfs/inode_impl_util.go
index c3efcf3ec6..fe8a1e7100 100644
--- a/pkg/sentry/fsimpl/kernfs/inode_impl_util.go
+++ b/pkg/sentry/fsimpl/kernfs/inode_impl_util.go
@@ -48,10 +48,6 @@ func (InodeNoopRefCount) TryIncRef() bool {
 	return true
 }
 
-// Destroy implements Inode.Destroy.
-func (InodeNoopRefCount) Destroy(context.Context) {
-}
-
 // InodeDirectoryNoNewChildren partially implements the Inode interface.
 // InodeDirectoryNoNewChildren represents a directory inode which does not
 // support creation of new children.
@@ -367,15 +363,12 @@ func (o *OrderedChildren) Init(opts OrderedChildrenOptions) {
 
 // DecRef implements Inode.DecRef.
 func (o *OrderedChildren) DecRef(ctx context.Context) {
-	o.AtomicRefCount.DecRefWithDestructor(ctx, o.Destroy)
-}
-
-// Destroy cleans up resources referenced by this OrderedChildren.
-func (o *OrderedChildren) Destroy(context.Context) {
-	o.mu.Lock()
-	defer o.mu.Unlock()
-	o.order.Reset()
-	o.set = nil
+	o.AtomicRefCount.DecRefWithDestructor(ctx, func(context.Context) {
+		o.mu.Lock()
+		defer o.mu.Unlock()
+		o.order.Reset()
+		o.set = nil
+	})
 }
 
 // Populate inserts children into this OrderedChildren, and d's dentry
diff --git a/pkg/sentry/fsimpl/kernfs/kernfs.go b/pkg/sentry/fsimpl/kernfs/kernfs.go
index 0801188414..51dbc050c1 100644
--- a/pkg/sentry/fsimpl/kernfs/kernfs.go
+++ b/pkg/sentry/fsimpl/kernfs/kernfs.go
@@ -328,10 +328,6 @@ type inodeRefs interface {
 	IncRef()
 	DecRef(ctx context.Context)
 	TryIncRef() bool
-	// Destroy is called when the inode reaches zero references. Destroy release
-	// all resources (references) on objects referenced by the inode, including
-	// any child dentries.
-	Destroy(ctx context.Context)
 }
 
 type inodeMetadata interface {

From 01098ad9a23c01bbbd3f8d60242646f88dd42040 Mon Sep 17 00:00:00 2001
From: Ayush Ranjan <ayushranjan@google.com>
Date: Tue, 18 Aug 2020 21:55:16 -0700
Subject: [PATCH 026/211] [vfs] Allow offsets for special files other than
 regular files.

Some character and block devices can be seekable. So allow their FD to maintain
file offset.

PiperOrigin-RevId: 327370684
---
 pkg/sentry/fsimpl/gofer/special_file.go | 35 ++++++++++++++++---------
 1 file changed, 22 insertions(+), 13 deletions(-)

diff --git a/pkg/sentry/fsimpl/gofer/special_file.go b/pkg/sentry/fsimpl/gofer/special_file.go
index a6368fdd0b..3c39aa9b71 100644
--- a/pkg/sentry/fsimpl/gofer/special_file.go
+++ b/pkg/sentry/fsimpl/gofer/special_file.go
@@ -39,8 +39,14 @@ type specialFileFD struct {
 	// handle is used for file I/O. handle is immutable.
 	handle handle
 
+	// isRegularFile is true if this FD represents a regular file which is only
+	// possible when filesystemOptions.regularFilesUseSpecialFileFD is in
+	// effect. isRegularFile is immutable.
+	isRegularFile bool
+
 	// seekable is true if this file description represents a file for which
-	// file offset is significant, i.e. a regular file. seekable is immutable.
+	// file offset is significant, i.e. a regular file, character device or
+	// block device. seekable is immutable.
 	seekable bool
 
 	// haveQueue is true if this file description represents a file for which
@@ -55,12 +61,13 @@ type specialFileFD struct {
 
 func newSpecialFileFD(h handle, mnt *vfs.Mount, d *dentry, locks *vfs.FileLocks, flags uint32) (*specialFileFD, error) {
 	ftype := d.fileType()
-	seekable := ftype == linux.S_IFREG
+	seekable := ftype == linux.S_IFREG || ftype == linux.S_IFCHR || ftype == linux.S_IFBLK
 	haveQueue := (ftype == linux.S_IFIFO || ftype == linux.S_IFSOCK) && h.fd >= 0
 	fd := &specialFileFD{
-		handle:    h,
-		seekable:  seekable,
-		haveQueue: haveQueue,
+		handle:        h,
+		isRegularFile: ftype == linux.S_IFREG,
+		seekable:      seekable,
+		haveQueue:     haveQueue,
 	}
 	fd.LockFD.Init(locks)
 	if haveQueue {
@@ -200,13 +207,13 @@ func (fd *specialFileFD) pwrite(ctx context.Context, src usermem.IOSequence, off
 	// If the regular file fd was opened with O_APPEND, make sure the file size
 	// is updated. There is a possible race here if size is modified externally
 	// after metadata cache is updated.
-	if fd.seekable && fd.vfsfd.StatusFlags()&linux.O_APPEND != 0 && !d.cachedMetadataAuthoritative() {
+	if fd.isRegularFile && fd.vfsfd.StatusFlags()&linux.O_APPEND != 0 && !d.cachedMetadataAuthoritative() {
 		if err := d.updateFromGetattr(ctx); err != nil {
 			return 0, offset, err
 		}
 	}
 
-	if fd.seekable {
+	if fd.isRegularFile {
 		// We need to hold the metadataMu *while* writing to a regular file.
 		d.metadataMu.Lock()
 		defer d.metadataMu.Unlock()
@@ -236,18 +243,20 @@ func (fd *specialFileFD) pwrite(ctx context.Context, src usermem.IOSequence, off
 	if err == syserror.EAGAIN {
 		err = syserror.ErrWouldBlock
 	}
-	finalOff = offset
+	// Update offset if the offset is valid.
+	if offset >= 0 {
+		offset += int64(n)
+	}
 	// Update file size for regular files.
-	if fd.seekable {
-		finalOff += int64(n)
+	if fd.isRegularFile {
 		// d.metadataMu is already locked at this point.
-		if uint64(finalOff) > d.size {
+		if uint64(offset) > d.size {
 			d.dataMu.Lock()
 			defer d.dataMu.Unlock()
-			atomic.StoreUint64(&d.size, uint64(finalOff))
+			atomic.StoreUint64(&d.size, uint64(offset))
 		}
 	}
-	return int64(n), finalOff, err
+	return int64(n), offset, err
 }
 
 // Write implements vfs.FileDescriptionImpl.Write.

From 1c3c12a37e01adffe5f2ed44d094f29baf0fd2a6 Mon Sep 17 00:00:00 2001
From: Dean Deng <deandeng@google.com>
Date: Wed, 19 Aug 2020 08:50:59 -0700
Subject: [PATCH 027/211] Return appropriate errors when file locking is
 unsuccessful.

test_eintr now passes in the Python runtime tests.

Updates #3515.

PiperOrigin-RevId: 327441081
---
 pkg/sentry/vfs/lock.go       | 16 +++++++++--
 test/syscalls/linux/flock.cc | 54 ++++++++++++++++++++++++++++++++++--
 2 files changed, 66 insertions(+), 4 deletions(-)

diff --git a/pkg/sentry/vfs/lock.go b/pkg/sentry/vfs/lock.go
index 6c7583a810..42666eebf3 100644
--- a/pkg/sentry/vfs/lock.go
+++ b/pkg/sentry/vfs/lock.go
@@ -46,7 +46,13 @@ func (fl *FileLocks) LockBSD(uid fslock.UniqueID, t fslock.LockType, block fsloc
 	if fl.bsd.LockRegion(uid, t, fslock.LockRange{0, fslock.LockEOF}, block) {
 		return nil
 	}
-	return syserror.ErrWouldBlock
+
+	// Return an appropriate error for the unsuccessful lock attempt, depending on
+	// whether this is a blocking or non-blocking operation.
+	if block == nil {
+		return syserror.ErrWouldBlock
+	}
+	return syserror.ERESTARTSYS
 }
 
 // UnlockBSD releases a BSD-style lock on the entire file.
@@ -66,7 +72,13 @@ func (fl *FileLocks) LockPOSIX(ctx context.Context, fd *FileDescription, uid fsl
 	if fl.posix.LockRegion(uid, t, rng, block) {
 		return nil
 	}
-	return syserror.ErrWouldBlock
+
+	// Return an appropriate error for the unsuccessful lock attempt, depending on
+	// whether this is a blocking or non-blocking operation.
+	if block == nil {
+		return syserror.ErrWouldBlock
+	}
+	return syserror.ERESTARTSYS
 }
 
 // UnlockPOSIX releases a POSIX-style lock on a file region.
diff --git a/test/syscalls/linux/flock.cc b/test/syscalls/linux/flock.cc
index 638a93979a..549141cbb9 100644
--- a/test/syscalls/linux/flock.cc
+++ b/test/syscalls/linux/flock.cc
@@ -185,7 +185,7 @@ TEST_F(FlockTest, TestMultipleHolderSharedExclusive) {
   ASSERT_THAT(flock(test_file_fd_.get(), LOCK_UN), SyscallSucceedsWithValue(0));
 }
 
-TEST_F(FlockTest, TestSharedLockFailExclusiveHolder) {
+TEST_F(FlockTest, TestSharedLockFailExclusiveHolderNonblocking) {
   // This test will verify that a shared lock is denied while
   // someone holds an exclusive lock.
   ASSERT_THAT(flock(test_file_fd_.get(), LOCK_EX | LOCK_NB),
@@ -203,7 +203,33 @@ TEST_F(FlockTest, TestSharedLockFailExclusiveHolder) {
   ASSERT_THAT(flock(test_file_fd_.get(), LOCK_UN), SyscallSucceedsWithValue(0));
 }
 
-TEST_F(FlockTest, TestExclusiveLockFailExclusiveHolder) {
+void trivial_handler(int signum) {}
+
+TEST_F(FlockTest, TestSharedLockFailExclusiveHolderBlocking_NoRandomSave) {
+  const DisableSave ds;  // Timing-related.
+
+  // This test will verify that a shared lock is denied while
+  // someone holds an exclusive lock.
+  ASSERT_THAT(flock(test_file_fd_.get(), LOCK_EX | LOCK_NB),
+              SyscallSucceedsWithValue(0));
+
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDWR));
+
+  // Register a signal handler for SIGALRM and set an alarm that will go off
+  // while blocking in the subsequent flock() call. This will interrupt flock()
+  // and cause it to return EINTR.
+  struct sigaction act = {};
+  act.sa_handler = trivial_handler;
+  ASSERT_THAT(sigaction(SIGALRM, &act, NULL), SyscallSucceeds());
+  ASSERT_THAT(ualarm(10000, 0), SyscallSucceeds());
+  ASSERT_THAT(flock(fd.get(), LOCK_SH), SyscallFailsWithErrno(EINTR));
+
+  // Unlock
+  ASSERT_THAT(flock(test_file_fd_.get(), LOCK_UN), SyscallSucceedsWithValue(0));
+}
+
+TEST_F(FlockTest, TestExclusiveLockFailExclusiveHolderNonblocking) {
   // This test will verify that an exclusive lock is denied while
   // someone already holds an exclsuive lock.
   ASSERT_THAT(flock(test_file_fd_.get(), LOCK_EX | LOCK_NB),
@@ -221,6 +247,30 @@ TEST_F(FlockTest, TestExclusiveLockFailExclusiveHolder) {
   ASSERT_THAT(flock(test_file_fd_.get(), LOCK_UN), SyscallSucceedsWithValue(0));
 }
 
+TEST_F(FlockTest, TestExclusiveLockFailExclusiveHolderBlocking_NoRandomSave) {
+  const DisableSave ds;  // Timing-related.
+
+  // This test will verify that an exclusive lock is denied while
+  // someone already holds an exclsuive lock.
+  ASSERT_THAT(flock(test_file_fd_.get(), LOCK_EX | LOCK_NB),
+              SyscallSucceedsWithValue(0));
+
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDWR));
+
+  // Register a signal handler for SIGALRM and set an alarm that will go off
+  // while blocking in the subsequent flock() call. This will interrupt flock()
+  // and cause it to return EINTR.
+  struct sigaction act = {};
+  act.sa_handler = trivial_handler;
+  ASSERT_THAT(sigaction(SIGALRM, &act, NULL), SyscallSucceeds());
+  ASSERT_THAT(ualarm(10000, 0), SyscallSucceeds());
+  ASSERT_THAT(flock(fd.get(), LOCK_EX), SyscallFailsWithErrno(EINTR));
+
+  // Unlock
+  ASSERT_THAT(flock(test_file_fd_.get(), LOCK_UN), SyscallSucceedsWithValue(0));
+}
+
 TEST_F(FlockTest, TestMultipleHolderSharedExclusiveUpgrade) {
   // This test will verify that we cannot obtain an exclusive lock while
   // a shared lock is held by another descriptor, then verify that an upgrade

From da96ad6bacd604f535ea21d7e559e8501087d227 Mon Sep 17 00:00:00 2001
From: Zach Koopmans <zkoopmans@google.com>
Date: Wed, 19 Aug 2020 09:48:55 -0700
Subject: [PATCH 028/211] Skip ECN test for native/linux tests.

Skip check for ECN bits in native/linux tests. General advice
for the ECN field is to leave the setting to the kernel, so
behavior of the test is undefined for different kernels.

http://www.masterraghu.com/subjects/np/introduction/unix_network_programming_v1.3/ch07lev1sec6.html

PiperOrigin-RevId: 327451414
---
 test/syscalls/linux/socket_ip_unbound.cc | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/test/syscalls/linux/socket_ip_unbound.cc b/test/syscalls/linux/socket_ip_unbound.cc
index 1c7b0cf90a..8f7ccc8681 100644
--- a/test/syscalls/linux/socket_ip_unbound.cc
+++ b/test/syscalls/linux/socket_ip_unbound.cc
@@ -217,6 +217,8 @@ TEST_P(IPUnboundSocketTest, InvalidLargeTOS) {
 }
 
 TEST_P(IPUnboundSocketTest, CheckSkipECN) {
+  // Test is inconsistant on different kernels.
+  SKIP_IF(!IsRunningOnGvisor());
   auto socket = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
   int set = 0xFF;
   socklen_t set_sz = sizeof(set);

From 55ad34a05b5b1a36e89d2269c9ca4918f81adf48 Mon Sep 17 00:00:00 2001
From: Fabricio Voznika <fvoznika@google.com>
Date: Wed, 19 Aug 2020 11:43:24 -0700
Subject: [PATCH 029/211] Change runtimeoptions proto handling.

Stolen from cl/327337408 (ascannell is OOO)

PiperOrigin-RevId: 327475423
---
 pkg/shim/v2/runtimeoptions/BUILD              | 16 +++++-
 pkg/shim/v2/runtimeoptions/runtimeoptions.go  |  3 ++
 .../v2/runtimeoptions/runtimeoptions.proto    |  4 +-
 .../v2/runtimeoptions/runtimeoptions_test.go  | 52 +++++++++++++++++++
 4 files changed, 71 insertions(+), 4 deletions(-)
 create mode 100644 pkg/shim/v2/runtimeoptions/runtimeoptions_test.go

diff --git a/pkg/shim/v2/runtimeoptions/BUILD b/pkg/shim/v2/runtimeoptions/BUILD
index 01716034c2..ba2ed1ea7e 100644
--- a/pkg/shim/v2/runtimeoptions/BUILD
+++ b/pkg/shim/v2/runtimeoptions/BUILD
@@ -1,4 +1,4 @@
-load("//tools:defs.bzl", "go_library", "proto_library")
+load("//tools:defs.bzl", "go_library", "go_test", "proto_library")
 
 package(licenses = ["notice"])
 
@@ -14,7 +14,19 @@ go_library(
     srcs = ["runtimeoptions.go"],
     visibility = ["//pkg/shim/v2:__pkg__"],
     deps = [
-        "//pkg/shim/v2/runtimeoptions:api_go_proto",
+        ":api_go_proto",
         "@com_github_gogo_protobuf//proto:go_default_library",
     ],
 )
+
+go_test(
+    name = "runtimeoptions_test",
+    size = "small",
+    srcs = ["runtimeoptions_test.go"],
+    library = ":runtimeoptions",
+    deps = [
+        "@com_github_containerd_containerd//runtime/v1/shim/v1:go_default_library",
+        "@com_github_containerd_typeurl//:go_default_library",
+        "@com_github_golang_protobuf//proto:go_default_library",
+    ],
+)
diff --git a/pkg/shim/v2/runtimeoptions/runtimeoptions.go b/pkg/shim/v2/runtimeoptions/runtimeoptions.go
index 1c1a0c5d13..aaf17b87a7 100644
--- a/pkg/shim/v2/runtimeoptions/runtimeoptions.go
+++ b/pkg/shim/v2/runtimeoptions/runtimeoptions.go
@@ -23,5 +23,8 @@ import (
 type Options = pb.Options
 
 func init() {
+	// The generated proto file auto registers with "golang/protobuf/proto"
+	// package. However, typeurl uses "golang/gogo/protobuf/proto". So registers
+	// the type there too.
 	proto.RegisterType((*Options)(nil), "cri.runtimeoptions.v1.Options")
 }
diff --git a/pkg/shim/v2/runtimeoptions/runtimeoptions.proto b/pkg/shim/v2/runtimeoptions/runtimeoptions.proto
index edb19020af..057032e348 100644
--- a/pkg/shim/v2/runtimeoptions/runtimeoptions.proto
+++ b/pkg/shim/v2/runtimeoptions/runtimeoptions.proto
@@ -14,11 +14,11 @@
 
 syntax = "proto3";
 
-package runtimeoptions;
+package cri.runtimeoptions.v1;
 
 // This is a version of the runtimeoptions CRI API that is vendored.
 //
-// Imported the full CRI package is a nightmare.
+// Importing the full CRI package is a nightmare.
 message Options {
   string type_url = 1;
   string config_path = 2;
diff --git a/pkg/shim/v2/runtimeoptions/runtimeoptions_test.go b/pkg/shim/v2/runtimeoptions/runtimeoptions_test.go
new file mode 100644
index 0000000000..f4c238a009
--- /dev/null
+++ b/pkg/shim/v2/runtimeoptions/runtimeoptions_test.go
@@ -0,0 +1,52 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package runtimeoptions
+
+import (
+	"testing"
+
+	shim "github.com/containerd/containerd/runtime/v1/shim/v1"
+	"github.com/containerd/typeurl"
+	"github.com/golang/protobuf/proto"
+)
+
+func TestCreateTaskRequest(t *testing.T) {
+	// Serialize the top-level message.
+	const encodedText = `options: <
+  type_url: "cri.runtimeoptions.v1.Options"
+  value: "\n\010type_url\022\013config_path"
+>`
+	got := &shim.CreateTaskRequest{} // Should have raw options.
+	if err := proto.UnmarshalText(encodedText, got); err != nil {
+		t.Fatalf("unable to unmarshal text: %v", err)
+	}
+	t.Logf("got: %s", proto.MarshalTextString(got))
+
+	// Check the options.
+	wantOptions := &Options{}
+	wantOptions.TypeUrl = "type_url"
+	wantOptions.ConfigPath = "config_path"
+	gotMessage, err := typeurl.UnmarshalAny(got.Options)
+	if err != nil {
+		t.Fatalf("unable to unmarshal any: %v", err)
+	}
+	gotOptions, ok := gotMessage.(*Options)
+	if !ok {
+		t.Fatalf("got %v, want %v", gotMessage, wantOptions)
+	}
+	if !proto.Equal(gotOptions, wantOptions) {
+		t.Fatalf("got %v, want %v", gotOptions, wantOptions)
+	}
+}

From 15f7c43b75f34635261df05003a4d58519bbe02e Mon Sep 17 00:00:00 2001
From: Jamie Liu <jamieliu@google.com>
Date: Wed, 19 Aug 2020 11:50:54 -0700
Subject: [PATCH 030/211] Remove use of channels from p9.connState legacy
 transport.

- Remove sendDone, which currently does nothing whatsoever (errors sent to the
  channel are completely unused). Instead, have request handlers log errors
  they get from p9.send() inline.

- Replace recvOkay and recvDone with recvMu/recvIdle/recvShutdown. In addition
  to being slightly clearer (IMO), this eliminates the p9.connState.service()
  goroutine, significantly reducing the overhead involved in passing connection
  receive access between goroutines (from buffered chan send/recv + unbuffered
  chan send/recv to just a mutex unlock/lock).

PiperOrigin-RevId: 327476755
---
 pkg/p9/server.go | 147 ++++++++++++++++++++++-------------------------
 1 file changed, 68 insertions(+), 79 deletions(-)

diff --git a/pkg/p9/server.go b/pkg/p9/server.go
index b9f15e4ed4..3736f12a3b 100644
--- a/pkg/p9/server.go
+++ b/pkg/p9/server.go
@@ -60,12 +60,6 @@ type connState struct {
 	// server is the backing server.
 	server *Server
 
-	// sendMu is the send lock.
-	sendMu sync.Mutex
-
-	// conn is the connection.
-	conn *unet.Socket
-
 	// fids is the set of active FIDs.
 	//
 	// This is used to find FIDs for files.
@@ -92,14 +86,25 @@ type connState struct {
 
 	// -- below relates to the legacy handler --
 
-	// recvOkay indicates that a receive may start.
-	recvOkay chan bool
+	// recvMu serializes receiving from conn.
+	recvMu sync.Mutex
+
+	// recvIdle is the number of goroutines in handleRequests() attempting to
+	// lock recvMu so that they can receive from conn. recvIdle is accessed
+	// using atomic memory operations.
+	recvIdle int32
+
+	// If recvShutdown is true, at least one goroutine has observed a
+	// connection error while receiving from conn, and all goroutines in
+	// handleRequests() should exit immediately. recvShutdown is protected by
+	// recvMu.
+	recvShutdown bool
 
-	// recvDone is signalled when a message is received.
-	recvDone chan error
+	// sendMu serializes sending to conn.
+	sendMu sync.Mutex
 
-	// sendDone is signalled when a send is finished.
-	sendDone chan error
+	// conn is the connection used by the legacy transport.
+	conn *unet.Socket
 
 	// -- below relates to the flipcall handler --
 
@@ -508,11 +513,21 @@ func (cs *connState) handle(m message) (r message) {
 	return
 }
 
-// handleRequest handles a single request.
-//
-// The recvDone channel is signaled when recv is done (with a error if
-// necessary). The sendDone channel is signaled with the result of the send.
-func (cs *connState) handleRequest() {
+// handleRequest handles a single request. It returns true if the caller should
+// continue handling requests and false if it should terminate.
+func (cs *connState) handleRequest() bool {
+	// Obtain the right to receive a message from cs.conn.
+	atomic.AddInt32(&cs.recvIdle, 1)
+	cs.recvMu.Lock()
+	atomic.AddInt32(&cs.recvIdle, -1)
+
+	if cs.recvShutdown {
+		// Another goroutine already detected a connection problem; exit
+		// immediately.
+		cs.recvMu.Unlock()
+		return false
+	}
+
 	messageSize := atomic.LoadUint32(&cs.messageSize)
 	if messageSize == 0 {
 		// Default or not yet negotiated.
@@ -523,12 +538,17 @@ func (cs *connState) handleRequest() {
 	tag, m, err := recv(cs.conn, messageSize, msgRegistry.get)
 	if errSocket, ok := err.(ErrSocket); ok {
 		// Connection problem; stop serving.
-		cs.recvDone <- errSocket.error
-		return
+		log.Debugf("p9.recv: %v", errSocket.error)
+		cs.recvShutdown = true
+		cs.recvMu.Unlock()
+		return false
 	}
 
-	// Signal receive is done.
-	cs.recvDone <- nil
+	// Ensure that another goroutine is available to receive from cs.conn.
+	if atomic.LoadInt32(&cs.recvIdle) == 0 {
+		go cs.handleRequests() // S/R-SAFE: Irrelevant.
+	}
+	cs.recvMu.Unlock()
 
 	// Deal with other errors.
 	if err != nil && err != io.EOF {
@@ -537,16 +557,17 @@ func (cs *connState) handleRequest() {
 		cs.sendMu.Lock()
 		err := send(cs.conn, tag, newErr(err))
 		cs.sendMu.Unlock()
-		cs.sendDone <- err
-		return
+		if err != nil {
+			log.Debugf("p9.send: %v", err)
+		}
+		return true
 	}
 
 	// Try to start the tag.
 	if !cs.StartTag(tag) {
 		// Nothing we can do at this point; client is bogus.
 		log.Debugf("no valid tag [%05d]", tag)
-		cs.sendDone <- ErrNoValidMessage
-		return
+		return true
 	}
 
 	// Handle the message.
@@ -560,15 +581,21 @@ func (cs *connState) handleRequest() {
 	cs.sendMu.Lock()
 	err = send(cs.conn, tag, r)
 	cs.sendMu.Unlock()
-	cs.sendDone <- err
+	if err != nil {
+		log.Debugf("p9.send: %v", err)
+	}
 
 	// Return the message to the cache.
 	msgRegistry.put(m)
+
+	return true
 }
 
 func (cs *connState) handleRequests() {
-	for range cs.recvOkay {
-		cs.handleRequest()
+	for {
+		if !cs.handleRequest() {
+			return
+		}
 	}
 }
 
@@ -578,11 +605,6 @@ func (cs *connState) stop() {
 	// us with SIGABRT to get a stack dump of the offending handler.
 	cs.pendingWg.Wait()
 
-	// Close all channels.
-	close(cs.recvOkay)
-	close(cs.recvDone)
-	close(cs.sendDone)
-
 	// Free the channels.
 	cs.channelMu.Lock()
 	for _, ch := range cs.channels {
@@ -600,6 +622,9 @@ func (cs *connState) stop() {
 		cs.channelAlloc.Destroy()
 	}
 
+	// Ensure the connection is closed.
+	cs.conn.Close()
+
 	// Close all remaining fids.
 	for fid, fidRef := range cs.fids {
 		delete(cs.fids, fid)
@@ -609,59 +634,23 @@ func (cs *connState) stop() {
 		// handlers running via the wait for Pending => 0 below.
 		fidRef.DecRef()
 	}
-
-	// Ensure the connection is closed.
-	cs.conn.Close()
-}
-
-// service services requests concurrently.
-func (cs *connState) service() error {
-	// Start the first request handler.
-	go cs.handleRequests() // S/R-SAFE: Irrelevant.
-	cs.recvOkay <- true
-
-	// We loop and make sure there's always one goroutine waiting for a new
-	// request. We process all the data for a single request in one
-	// goroutine however, to ensure the best turnaround time possible.
-	for {
-		select {
-		case err := <-cs.recvDone:
-			if err != nil {
-				return err
-			}
-
-			// Kick the next receiver, or start a new handler
-			// if no receiver is currently waiting.
-			select {
-			case cs.recvOkay <- true:
-			default:
-				go cs.handleRequests() // S/R-SAFE: Irrelevant.
-				cs.recvOkay <- true
-			}
-
-		case <-cs.sendDone:
-			// Error sending a response? Nothing can be done.
-			//
-			// We don't terminate on a send error though, since
-			// we still have a pending receive. The error would
-			// have been logged above, we just ignore it here.
-		}
-	}
 }
 
 // Handle handles a single connection.
 func (s *Server) Handle(conn *unet.Socket) error {
 	cs := &connState{
-		server:   s,
-		conn:     conn,
-		fids:     make(map[FID]*fidRef),
-		tags:     make(map[Tag]chan struct{}),
-		recvOkay: make(chan bool),
-		recvDone: make(chan error, 10),
-		sendDone: make(chan error, 10),
+		server: s,
+		fids:   make(map[FID]*fidRef),
+		tags:   make(map[Tag]chan struct{}),
+		conn:   conn,
 	}
 	defer cs.stop()
-	return cs.service()
+
+	// Serve requests from conn in the current goroutine; handleRequests() will
+	// create more goroutines as needed.
+	cs.handleRequests()
+
+	return nil
 }
 
 // Serve handles requests from the bound socket.

From 2915cc7f49ed03466badb0e940b765837afe64d2 Mon Sep 17 00:00:00 2001
From: Kevin Krakauer <krakauer@google.com>
Date: Wed, 19 Aug 2020 11:55:21 -0700
Subject: [PATCH 031/211] ip6tables: test initial state

Tests that we have the correct initial (empty) state for ip6tables.

#3549

PiperOrigin-RevId: 327477657
---
 test/syscalls/BUILD              |   4 +
 test/syscalls/linux/BUILD        |  18 ++++
 test/syscalls/linux/ip6tables.cc | 163 +++++++++++++++++++++++++++++++
 test/syscalls/linux/iptables.cc  |   6 +-
 test/syscalls/linux/iptables.h   | 132 ++++++++++++++++++++++---
 5 files changed, 306 insertions(+), 17 deletions(-)
 create mode 100644 test/syscalls/linux/ip6tables.cc

diff --git a/test/syscalls/BUILD b/test/syscalls/BUILD
index 0eadc6b084..d11412c55c 100644
--- a/test/syscalls/BUILD
+++ b/test/syscalls/BUILD
@@ -250,6 +250,10 @@ syscall_test(
     test = "//test/syscalls/linux:iptables_test",
 )
 
+syscall_test(
+    test = "//test/syscalls/linux:ip6tables_test",
+)
+
 syscall_test(
     size = "large",
     shard_count = 5,
diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD
index 6299870bc0..bd1d9584a8 100644
--- a/test/syscalls/linux/BUILD
+++ b/test/syscalls/linux/BUILD
@@ -1029,6 +1029,24 @@ cc_binary(
     ],
 )
 
+cc_binary(
+    name = "ip6tables_test",
+    testonly = 1,
+    srcs = [
+        "ip6tables.cc",
+    ],
+    linkstatic = 1,
+    deps = [
+        ":iptables_types",
+        ":socket_test_util",
+        "//test/util:capability_util",
+        "//test/util:file_descriptor",
+        gtest,
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
 cc_binary(
     name = "itimer_test",
     testonly = 1,
diff --git a/test/syscalls/linux/ip6tables.cc b/test/syscalls/linux/ip6tables.cc
new file mode 100644
index 0000000000..685e513f86
--- /dev/null
+++ b/test/syscalls/linux/ip6tables.cc
@@ -0,0 +1,163 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <linux/capability.h>
+#include <sys/socket.h>
+
+#include "gtest/gtest.h"
+#include "test/syscalls/linux/iptables.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/util/capability_util.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+constexpr char kNatTablename[] = "nat";
+constexpr char kErrorTarget[] = "ERROR";
+constexpr size_t kEmptyStandardEntrySize =
+    sizeof(struct ip6t_entry) + sizeof(struct xt_standard_target);
+constexpr size_t kEmptyErrorEntrySize =
+    sizeof(struct ip6t_entry) + sizeof(struct xt_error_target);
+
+// This tests the initial state of a machine with empty ip6tables via
+// getsockopt(IP6T_SO_GET_INFO). We don't have a guarantee that the iptables are
+// empty when running in native, but we can test that gVisor has the same
+// initial state that a newly-booted Linux machine would have.
+TEST(IP6TablesTest, InitialInfo) {
+  // TODO(gvisor.dev/issue/3549): Enable for ip6tables.
+  SKIP_IF(true);
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+
+  FileDescriptor sock =
+      ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET6, SOCK_RAW, IPPROTO_RAW));
+
+  // Get info via sockopt.
+  struct ipt_getinfo info = {};
+  snprintf(info.name, XT_TABLE_MAXNAMELEN, "%s", kNatTablename);
+  socklen_t info_size = sizeof(info);
+  ASSERT_THAT(
+      getsockopt(sock.get(), SOL_IPV6, IP6T_SO_GET_INFO, &info, &info_size),
+      SyscallSucceeds());
+
+  // The nat table supports PREROUTING, and OUTPUT.
+  unsigned int valid_hooks =
+      (1 << NF_IP6_PRE_ROUTING) | (1 << NF_IP6_LOCAL_OUT) |
+      (1 << NF_IP6_POST_ROUTING) | (1 << NF_IP6_LOCAL_IN);
+  EXPECT_EQ(info.valid_hooks, valid_hooks);
+
+  // Each chain consists of an empty entry with a standard target..
+  EXPECT_EQ(info.hook_entry[NF_IP6_PRE_ROUTING], 0);
+  EXPECT_EQ(info.hook_entry[NF_IP6_LOCAL_IN], kEmptyStandardEntrySize);
+  EXPECT_EQ(info.hook_entry[NF_IP6_LOCAL_OUT], kEmptyStandardEntrySize * 2);
+  EXPECT_EQ(info.hook_entry[NF_IP6_POST_ROUTING], kEmptyStandardEntrySize * 3);
+
+  // The underflow points are the same as the entry points.
+  EXPECT_EQ(info.underflow[NF_IP6_PRE_ROUTING], 0);
+  EXPECT_EQ(info.underflow[NF_IP6_LOCAL_IN], kEmptyStandardEntrySize);
+  EXPECT_EQ(info.underflow[NF_IP6_LOCAL_OUT], kEmptyStandardEntrySize * 2);
+  EXPECT_EQ(info.underflow[NF_IP6_POST_ROUTING], kEmptyStandardEntrySize * 3);
+
+  // One entry for each chain, plus an error entry at the end.
+  EXPECT_EQ(info.num_entries, 5);
+
+  EXPECT_EQ(info.size, 4 * kEmptyStandardEntrySize + kEmptyErrorEntrySize);
+  EXPECT_EQ(strcmp(info.name, kNatTablename), 0);
+}
+
+// This tests the initial state of a machine with empty ip6tables via
+// getsockopt(IP6T_SO_GET_ENTRIES). We don't have a guarantee that the iptables
+// are empty when running in native, but we can test that gVisor has the same
+// initial state that a newly-booted Linux machine would have.
+TEST(IP6TablesTest, InitialEntries) {
+  // TODO(gvisor.dev/issue/3549): Enable for ip6tables.
+  SKIP_IF(true);
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+
+  FileDescriptor sock =
+      ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET6, SOCK_RAW, IPPROTO_RAW));
+
+  // Get info via sockopt.
+  struct ipt_getinfo info = {};
+  snprintf(info.name, XT_TABLE_MAXNAMELEN, "%s", kNatTablename);
+  socklen_t info_size = sizeof(info);
+  ASSERT_THAT(
+      getsockopt(sock.get(), SOL_IPV6, IP6T_SO_GET_INFO, &info, &info_size),
+      SyscallSucceeds());
+
+  // Use info to get entries.
+  socklen_t entries_size = sizeof(struct ip6t_get_entries) + info.size;
+  struct ip6t_get_entries* entries =
+      static_cast<struct ip6t_get_entries*>(malloc(entries_size));
+  snprintf(entries->name, XT_TABLE_MAXNAMELEN, "%s", kNatTablename);
+  entries->size = info.size;
+  ASSERT_THAT(getsockopt(sock.get(), SOL_IPV6, IP6T_SO_GET_ENTRIES, entries,
+                         &entries_size),
+              SyscallSucceeds());
+
+  // Verify the name and size.
+  ASSERT_EQ(info.size, entries->size);
+  ASSERT_EQ(strcmp(entries->name, kNatTablename), 0);
+
+  // Verify that the entrytable is 4 entries with accept targets and no matches
+  // followed by a single error target.
+  size_t entry_offset = 0;
+  while (entry_offset < entries->size) {
+    struct ip6t_entry* entry = reinterpret_cast<struct ip6t_entry*>(
+        reinterpret_cast<char*>(entries->entrytable) + entry_offset);
+
+    // ipv6 should be zeroed.
+    struct ip6t_ip6 zeroed = {};
+    ASSERT_EQ(memcmp(static_cast<void*>(&zeroed),
+                     static_cast<void*>(&entry->ipv6), sizeof(zeroed)),
+              0);
+
+    // target_offset should be zero.
+    EXPECT_EQ(entry->target_offset, sizeof(ip6t_entry));
+
+    if (entry_offset < kEmptyStandardEntrySize * 4) {
+      // The first 4 entries are standard targets
+      struct xt_standard_target* target =
+          reinterpret_cast<struct xt_standard_target*>(entry->elems);
+      EXPECT_EQ(entry->next_offset, kEmptyStandardEntrySize);
+      EXPECT_EQ(target->target.u.user.target_size, sizeof(*target));
+      EXPECT_EQ(strcmp(target->target.u.user.name, ""), 0);
+      EXPECT_EQ(target->target.u.user.revision, 0);
+      // This is what's returned for an accept verdict. I don't know why.
+      EXPECT_EQ(target->verdict, -NF_ACCEPT - 1);
+    } else {
+      // The last entry is an error target
+      struct xt_error_target* target =
+          reinterpret_cast<struct xt_error_target*>(entry->elems);
+      EXPECT_EQ(entry->next_offset, kEmptyErrorEntrySize);
+      EXPECT_EQ(target->target.u.user.target_size, sizeof(*target));
+      EXPECT_EQ(strcmp(target->target.u.user.name, kErrorTarget), 0);
+      EXPECT_EQ(target->target.u.user.revision, 0);
+      EXPECT_EQ(strcmp(target->errorname, kErrorTarget), 0);
+    }
+
+    entry_offset += entry->next_offset;
+    break;
+  }
+
+  free(entries);
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/iptables.cc b/test/syscalls/linux/iptables.cc
index b8e4ece649..9b338d9707 100644
--- a/test/syscalls/linux/iptables.cc
+++ b/test/syscalls/linux/iptables.cc
@@ -67,7 +67,7 @@ TEST(IPTablesBasic, FailSockoptNonRaw) {
   struct ipt_getinfo info = {};
   snprintf(info.name, XT_TABLE_MAXNAMELEN, "%s", kNatTablename);
   socklen_t info_size = sizeof(info);
-  EXPECT_THAT(getsockopt(sock, IPPROTO_IP, SO_GET_INFO, &info, &info_size),
+  EXPECT_THAT(getsockopt(sock, IPPROTO_IP, IPT_SO_GET_INFO, &info, &info_size),
               SyscallFailsWithErrno(ENOPROTOOPT));
 
   ASSERT_THAT(close(sock), SyscallSucceeds());
@@ -112,7 +112,7 @@ TEST_F(IPTablesTest, InitialState) {
   struct ipt_getinfo info = {};
   snprintf(info.name, XT_TABLE_MAXNAMELEN, "%s", kNatTablename);
   socklen_t info_size = sizeof(info);
-  ASSERT_THAT(getsockopt(s_, IPPROTO_IP, SO_GET_INFO, &info, &info_size),
+  ASSERT_THAT(getsockopt(s_, IPPROTO_IP, IPT_SO_GET_INFO, &info, &info_size),
               SyscallSucceeds());
 
   // The nat table supports PREROUTING, and OUTPUT.
@@ -148,7 +148,7 @@ TEST_F(IPTablesTest, InitialState) {
   snprintf(entries->name, XT_TABLE_MAXNAMELEN, "%s", kNatTablename);
   entries->size = info.size;
   ASSERT_THAT(
-      getsockopt(s_, IPPROTO_IP, SO_GET_ENTRIES, entries, &entries_size),
+      getsockopt(s_, IPPROTO_IP, IPT_SO_GET_ENTRIES, entries, &entries_size),
       SyscallSucceeds());
 
   // Verify the name and size.
diff --git a/test/syscalls/linux/iptables.h b/test/syscalls/linux/iptables.h
index 0719c60a44..d0fc10feaa 100644
--- a/test/syscalls/linux/iptables.h
+++ b/test/syscalls/linux/iptables.h
@@ -27,27 +27,32 @@
 
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter_ipv4.h>
+#include <linux/netfilter_ipv6.h>
 #include <net/if.h>
 #include <netinet/ip.h>
 #include <stdint.h>
 
+//
+// IPv4 ABI.
+//
+
 #define ipt_standard_target xt_standard_target
 #define ipt_entry_target xt_entry_target
 #define ipt_error_target xt_error_target
 
 enum SockOpts {
   // For setsockopt.
-  BASE_CTL = 64,
-  SO_SET_REPLACE = BASE_CTL,
-  SO_SET_ADD_COUNTERS,
-  SO_SET_MAX = SO_SET_ADD_COUNTERS,
+  IPT_BASE_CTL = 64,
+  IPT_SO_SET_REPLACE = IPT_BASE_CTL,
+  IPT_SO_SET_ADD_COUNTERS = IPT_BASE_CTL + 1,
+  IPT_SO_SET_MAX = IPT_SO_SET_ADD_COUNTERS,
 
   // For getsockopt.
-  SO_GET_INFO = BASE_CTL,
-  SO_GET_ENTRIES,
-  SO_GET_REVISION_MATCH,
-  SO_GET_REVISION_TARGET,
-  SO_GET_MAX = SO_GET_REVISION_TARGET
+  IPT_SO_GET_INFO = IPT_BASE_CTL,
+  IPT_SO_GET_ENTRIES = IPT_BASE_CTL + 1,
+  IPT_SO_GET_REVISION_MATCH = IPT_BASE_CTL + 2,
+  IPT_SO_GET_REVISION_TARGET = IPT_BASE_CTL + 3,
+  IPT_SO_GET_MAX = IPT_SO_GET_REVISION_TARGET
 };
 
 // ipt_ip specifies basic matching criteria that can be applied by examining
@@ -115,7 +120,7 @@ struct ipt_entry {
   unsigned char elems[0];
 };
 
-// Passed to getsockopt(SO_GET_INFO).
+// Passed to getsockopt(IPT_SO_GET_INFO).
 struct ipt_getinfo {
   // The name of the table. The user only fills this in, the rest is filled in
   // when returning from getsockopt. Currently "nat" and "mangle" are supported.
@@ -127,7 +132,7 @@ struct ipt_getinfo {
   unsigned int valid_hooks;
 
   // The offset into the entry table for each valid hook. The entry table is
-  // returned by getsockopt(SO_GET_ENTRIES).
+  // returned by getsockopt(IPT_SO_GET_ENTRIES).
   unsigned int hook_entry[NF_IP_NUMHOOKS];
 
   // For each valid hook, the underflow is the offset into the entry table to
@@ -142,14 +147,14 @@ struct ipt_getinfo {
   unsigned int underflow[NF_IP_NUMHOOKS];
 
   // The number of entries in the entry table returned by
-  // getsockopt(SO_GET_ENTRIES).
+  // getsockopt(IPT_SO_GET_ENTRIES).
   unsigned int num_entries;
 
-  // The size of the entry table returned by getsockopt(SO_GET_ENTRIES).
+  // The size of the entry table returned by getsockopt(IPT_SO_GET_ENTRIES).
   unsigned int size;
 };
 
-// Passed to getsockopt(SO_GET_ENTRIES).
+// Passed to getsockopt(IPT_SO_GET_ENTRIES).
 struct ipt_get_entries {
   // The name of the table. The user fills this in. Currently "nat" and "mangle"
   // are supported.
@@ -195,4 +200,103 @@ struct ipt_replace {
   struct ipt_entry entries[0];
 };
 
+//
+// IPv6 ABI.
+//
+
+enum SockOpts6 {
+  // For setsockopt.
+  IP6T_BASE_CTL = 64,
+  IP6T_SO_SET_REPLACE = IP6T_BASE_CTL,
+  IP6T_SO_SET_ADD_COUNTERS = IP6T_BASE_CTL + 1,
+  IP6T_SO_SET_MAX = IP6T_SO_SET_ADD_COUNTERS,
+
+  // For getsockopt.
+  IP6T_SO_GET_INFO = IP6T_BASE_CTL,
+  IP6T_SO_GET_ENTRIES = IP6T_BASE_CTL + 1,
+  IP6T_SO_GET_REVISION_MATCH = IP6T_BASE_CTL + 4,
+  IP6T_SO_GET_REVISION_TARGET = IP6T_BASE_CTL + 5,
+  IP6T_SO_GET_MAX = IP6T_SO_GET_REVISION_TARGET
+};
+
+// ip6t_ip6 specifies basic matching criteria that can be applied by examining
+// only the IP header of a packet.
+struct ip6t_ip6 {
+  // Source IP address.
+  struct in6_addr src;
+
+  // Destination IP address.
+  struct in6_addr dst;
+
+  // Source IP address mask.
+  struct in6_addr smsk;
+
+  // Destination IP address mask.
+  struct in6_addr dmsk;
+
+  // Input interface.
+  char iniface[IFNAMSIZ];
+
+  // Output interface.
+  char outiface[IFNAMSIZ];
+
+  // Input interface mask.
+  unsigned char iniface_mask[IFNAMSIZ];
+
+  // Output interface mask.
+  unsigned char outiface_mask[IFNAMSIZ];
+
+  // Transport protocol.
+  uint16_t proto;
+
+  // TOS.
+  uint8_t tos;
+
+  // Flags.
+  uint8_t flags;
+
+  // Inverse flags.
+  uint8_t invflags;
+};
+
+// ip6t_entry is an ip6tables rule.
+struct ip6t_entry {
+  // Basic matching information used to match a packet's IP header.
+  struct ip6t_ip6 ipv6;
+
+  // A caching field that isn't used by userspace.
+  unsigned int nfcache;
+
+  // The number of bytes between the start of this entry and the rule's target.
+  uint16_t target_offset;
+
+  // The total size of this rule, from the beginning of the entry to the end of
+  // the target.
+  uint16_t next_offset;
+
+  // A return pointer not used by userspace.
+  unsigned int comefrom;
+
+  // Counters for packets and bytes, which we don't yet implement.
+  struct xt_counters counters;
+
+  // The data for all this rules matches followed by the target. This runs
+  // beyond the value of sizeof(struct ip6t_entry).
+  unsigned char elems[0];
+};
+
+// Passed to getsockopt(IP6T_SO_GET_ENTRIES).
+struct ip6t_get_entries {
+  // The name of the table.
+  char name[XT_TABLE_MAXNAMELEN];
+
+  // The size of the entry table in bytes. The user fills this in with the value
+  // from struct ipt_getinfo.size.
+  unsigned int size;
+
+  // The entries for the given table. This will run past the size defined by
+  // sizeof(struct ip6t_get_entries).
+  struct ip6t_entry entrytable[0];
+};
+
 #endif  // GVISOR_TEST_SYSCALLS_IPTABLES_TYPES_H_

From 167b2efc94816b0ff823e12c22023c3ccbd16ae9 Mon Sep 17 00:00:00 2001
From: Kevin Krakauer <krakauer@google.com>
Date: Wed, 19 Aug 2020 13:45:20 -0700
Subject: [PATCH 032/211] ip6tables: move ipv4-specific logic into its own file

A later change will introduce the equivalent IPv6 logic.

#3549

PiperOrigin-RevId: 327499064
---
 pkg/sentry/socket/netfilter/BUILD           |   1 +
 pkg/sentry/socket/netfilter/ipv4.go         | 235 ++++++++++++++++++
 pkg/sentry/socket/netfilter/netfilter.go    | 262 +++-----------------
 pkg/sentry/socket/netstack/netstack.go      |  13 +-
 pkg/sentry/socket/netstack/netstack_vfs2.go |  13 +-
 pkg/sentry/strace/socket.go                 |   2 +
 6 files changed, 293 insertions(+), 233 deletions(-)
 create mode 100644 pkg/sentry/socket/netfilter/ipv4.go

diff --git a/pkg/sentry/socket/netfilter/BUILD b/pkg/sentry/socket/netfilter/BUILD
index 721094bbfd..795620589d 100644
--- a/pkg/sentry/socket/netfilter/BUILD
+++ b/pkg/sentry/socket/netfilter/BUILD
@@ -6,6 +6,7 @@ go_library(
     name = "netfilter",
     srcs = [
         "extensions.go",
+        "ipv4.go",
         "netfilter.go",
         "owner_matcher.go",
         "targets.go",
diff --git a/pkg/sentry/socket/netfilter/ipv4.go b/pkg/sentry/socket/netfilter/ipv4.go
new file mode 100644
index 0000000000..4fb887e494
--- /dev/null
+++ b/pkg/sentry/socket/netfilter/ipv4.go
@@ -0,0 +1,235 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package netfilter
+
+import (
+	"bytes"
+	"fmt"
+
+	"gvisor.dev/gvisor/pkg/abi/linux"
+	"gvisor.dev/gvisor/pkg/binary"
+	"gvisor.dev/gvisor/pkg/syserr"
+	"gvisor.dev/gvisor/pkg/tcpip"
+	"gvisor.dev/gvisor/pkg/tcpip/header"
+	"gvisor.dev/gvisor/pkg/tcpip/stack"
+	"gvisor.dev/gvisor/pkg/usermem"
+)
+
+// emptyIPv4Filter is for comparison with a rule's filters to determine whether
+// it is also empty. It is immutable.
+var emptyIPv4Filter = stack.IPHeaderFilter{
+	Dst:     "\x00\x00\x00\x00",
+	DstMask: "\x00\x00\x00\x00",
+	Src:     "\x00\x00\x00\x00",
+	SrcMask: "\x00\x00\x00\x00",
+}
+
+func getEntries4(table stack.Table, info *linux.IPTGetinfo) linux.KernelIPTGetEntries {
+	var entries linux.KernelIPTGetEntries
+	copy(entries.Name[:], info.Name[:])
+
+	for ruleIdx, rule := range table.Rules {
+		nflog("convert to binary: current offset: %d", entries.Size)
+
+		setHooksAndUnderflow(info, table, entries.Size, ruleIdx)
+		// Each rule corresponds to an entry.
+		entry := linux.KernelIPTEntry{
+			Entry: linux.IPTEntry{
+				IP: linux.IPTIP{
+					Protocol: uint16(rule.Filter.Protocol),
+				},
+				NextOffset:   linux.SizeOfIPTEntry,
+				TargetOffset: linux.SizeOfIPTEntry,
+			},
+		}
+		copy(entry.Entry.IP.Dst[:], rule.Filter.Dst)
+		copy(entry.Entry.IP.DstMask[:], rule.Filter.DstMask)
+		copy(entry.Entry.IP.Src[:], rule.Filter.Src)
+		copy(entry.Entry.IP.SrcMask[:], rule.Filter.SrcMask)
+		copy(entry.Entry.IP.OutputInterface[:], rule.Filter.OutputInterface)
+		copy(entry.Entry.IP.OutputInterfaceMask[:], rule.Filter.OutputInterfaceMask)
+		if rule.Filter.DstInvert {
+			entry.Entry.IP.InverseFlags |= linux.IPT_INV_DSTIP
+		}
+		if rule.Filter.SrcInvert {
+			entry.Entry.IP.InverseFlags |= linux.IPT_INV_SRCIP
+		}
+		if rule.Filter.OutputInterfaceInvert {
+			entry.Entry.IP.InverseFlags |= linux.IPT_INV_VIA_OUT
+		}
+
+		for _, matcher := range rule.Matchers {
+			// Serialize the matcher and add it to the
+			// entry.
+			serialized := marshalMatcher(matcher)
+			nflog("convert to binary: matcher serialized as: %v", serialized)
+			if len(serialized)%8 != 0 {
+				panic(fmt.Sprintf("matcher %T is not 64-bit aligned", matcher))
+			}
+			entry.Elems = append(entry.Elems, serialized...)
+			entry.Entry.NextOffset += uint16(len(serialized))
+			entry.Entry.TargetOffset += uint16(len(serialized))
+		}
+
+		// Serialize and append the target.
+		serialized := marshalTarget(rule.Target)
+		if len(serialized)%8 != 0 {
+			panic(fmt.Sprintf("target %T is not 64-bit aligned", rule.Target))
+		}
+		entry.Elems = append(entry.Elems, serialized...)
+		entry.Entry.NextOffset += uint16(len(serialized))
+
+		nflog("convert to binary: adding entry: %+v", entry)
+
+		entries.Size += uint32(entry.Entry.NextOffset)
+		entries.Entrytable = append(entries.Entrytable, entry)
+		info.NumEntries++
+	}
+
+	info.Size = entries.Size
+	nflog("convert to binary: finished with an marshalled size of %d", info.Size)
+	return entries
+}
+
+func modifyEntries4(stk *stack.Stack, optVal []byte, replace *linux.IPTReplace, table *stack.Table) (map[uint32]int, *syserr.Error) {
+	nflog("set entries: setting entries in table %q", replace.Name.String())
+
+	// Convert input into a list of rules and their offsets.
+	var offset uint32
+	// offsets maps rule byte offsets to their position in table.Rules.
+	offsets := map[uint32]int{}
+	for entryIdx := uint32(0); entryIdx < replace.NumEntries; entryIdx++ {
+		nflog("set entries: processing entry at offset %d", offset)
+
+		// Get the struct ipt_entry.
+		if len(optVal) < linux.SizeOfIPTEntry {
+			nflog("optVal has insufficient size for entry %d", len(optVal))
+			return nil, syserr.ErrInvalidArgument
+		}
+		var entry linux.IPTEntry
+		buf := optVal[:linux.SizeOfIPTEntry]
+		binary.Unmarshal(buf, usermem.ByteOrder, &entry)
+		initialOptValLen := len(optVal)
+		optVal = optVal[linux.SizeOfIPTEntry:]
+
+		if entry.TargetOffset < linux.SizeOfIPTEntry {
+			nflog("entry has too-small target offset %d", entry.TargetOffset)
+			return nil, syserr.ErrInvalidArgument
+		}
+
+		// TODO(gvisor.dev/issue/170): We should support more IPTIP
+		// filtering fields.
+		filter, err := filterFromIPTIP(entry.IP)
+		if err != nil {
+			nflog("bad iptip: %v", err)
+			return nil, syserr.ErrInvalidArgument
+		}
+
+		// TODO(gvisor.dev/issue/170): Matchers and targets can specify
+		// that they only work for certain protocols, hooks, tables.
+		// Get matchers.
+		matchersSize := entry.TargetOffset - linux.SizeOfIPTEntry
+		if len(optVal) < int(matchersSize) {
+			nflog("entry doesn't have enough room for its matchers (only %d bytes remain)", len(optVal))
+			return nil, syserr.ErrInvalidArgument
+		}
+		matchers, err := parseMatchers(filter, optVal[:matchersSize])
+		if err != nil {
+			nflog("failed to parse matchers: %v", err)
+			return nil, syserr.ErrInvalidArgument
+		}
+		optVal = optVal[matchersSize:]
+
+		// Get the target of the rule.
+		targetSize := entry.NextOffset - entry.TargetOffset
+		if len(optVal) < int(targetSize) {
+			nflog("entry doesn't have enough room for its target (only %d bytes remain)", len(optVal))
+			return nil, syserr.ErrInvalidArgument
+		}
+		target, err := parseTarget(filter, optVal[:targetSize])
+		if err != nil {
+			nflog("failed to parse target: %v", err)
+			return nil, syserr.ErrInvalidArgument
+		}
+		optVal = optVal[targetSize:]
+
+		table.Rules = append(table.Rules, stack.Rule{
+			Filter:   filter,
+			Target:   target,
+			Matchers: matchers,
+		})
+		offsets[offset] = int(entryIdx)
+		offset += uint32(entry.NextOffset)
+
+		if initialOptValLen-len(optVal) != int(entry.NextOffset) {
+			nflog("entry NextOffset is %d, but entry took up %d bytes", entry.NextOffset, initialOptValLen-len(optVal))
+			return nil, syserr.ErrInvalidArgument
+		}
+	}
+	return offsets, nil
+}
+
+func filterFromIPTIP(iptip linux.IPTIP) (stack.IPHeaderFilter, error) {
+	if containsUnsupportedFields4(iptip) {
+		return stack.IPHeaderFilter{}, fmt.Errorf("unsupported fields in struct iptip: %+v", iptip)
+	}
+	if len(iptip.Dst) != header.IPv4AddressSize || len(iptip.DstMask) != header.IPv4AddressSize {
+		return stack.IPHeaderFilter{}, fmt.Errorf("incorrect length of destination (%d) and/or destination mask (%d) fields", len(iptip.Dst), len(iptip.DstMask))
+	}
+	if len(iptip.Src) != header.IPv4AddressSize || len(iptip.SrcMask) != header.IPv4AddressSize {
+		return stack.IPHeaderFilter{}, fmt.Errorf("incorrect length of source (%d) and/or source mask (%d) fields", len(iptip.Src), len(iptip.SrcMask))
+	}
+
+	n := bytes.IndexByte([]byte(iptip.OutputInterface[:]), 0)
+	if n == -1 {
+		n = len(iptip.OutputInterface)
+	}
+	ifname := string(iptip.OutputInterface[:n])
+
+	n = bytes.IndexByte([]byte(iptip.OutputInterfaceMask[:]), 0)
+	if n == -1 {
+		n = len(iptip.OutputInterfaceMask)
+	}
+	ifnameMask := string(iptip.OutputInterfaceMask[:n])
+
+	return stack.IPHeaderFilter{
+		Protocol:              tcpip.TransportProtocolNumber(iptip.Protocol),
+		Dst:                   tcpip.Address(iptip.Dst[:]),
+		DstMask:               tcpip.Address(iptip.DstMask[:]),
+		DstInvert:             iptip.InverseFlags&linux.IPT_INV_DSTIP != 0,
+		Src:                   tcpip.Address(iptip.Src[:]),
+		SrcMask:               tcpip.Address(iptip.SrcMask[:]),
+		SrcInvert:             iptip.InverseFlags&linux.IPT_INV_SRCIP != 0,
+		OutputInterface:       ifname,
+		OutputInterfaceMask:   ifnameMask,
+		OutputInterfaceInvert: iptip.InverseFlags&linux.IPT_INV_VIA_OUT != 0,
+	}, nil
+}
+
+func containsUnsupportedFields4(iptip linux.IPTIP) bool {
+	// The following features are supported:
+	// - Protocol
+	// - Dst and DstMask
+	// - Src and SrcMask
+	// - The inverse destination IP check flag
+	// - OutputInterface, OutputInterfaceMask and its inverse.
+	var emptyInterface = [linux.IFNAMSIZ]byte{}
+	// Disable any supported inverse flags.
+	inverseMask := uint8(linux.IPT_INV_DSTIP) | uint8(linux.IPT_INV_SRCIP) | uint8(linux.IPT_INV_VIA_OUT)
+	return iptip.InputInterface != emptyInterface ||
+		iptip.InputInterfaceMask != emptyInterface ||
+		iptip.Flags != 0 ||
+		iptip.InverseFlags&^inverseMask != 0
+}
diff --git a/pkg/sentry/socket/netfilter/netfilter.go b/pkg/sentry/socket/netfilter/netfilter.go
index e91b0624cd..df256676fe 100644
--- a/pkg/sentry/socket/netfilter/netfilter.go
+++ b/pkg/sentry/socket/netfilter/netfilter.go
@@ -17,7 +17,6 @@
 package netfilter
 
 import (
-	"bytes"
 	"errors"
 	"fmt"
 
@@ -26,8 +25,6 @@ import (
 	"gvisor.dev/gvisor/pkg/log"
 	"gvisor.dev/gvisor/pkg/sentry/kernel"
 	"gvisor.dev/gvisor/pkg/syserr"
-	"gvisor.dev/gvisor/pkg/tcpip"
-	"gvisor.dev/gvisor/pkg/tcpip/header"
 	"gvisor.dev/gvisor/pkg/tcpip/stack"
 	"gvisor.dev/gvisor/pkg/usermem"
 )
@@ -37,15 +34,6 @@ import (
 // developing iptables, but can pollute sentry logs otherwise.
 const enableLogging = false
 
-// emptyFilter is for comparison with a rule's filters to determine whether it
-// is also empty. It is immutable.
-var emptyFilter = stack.IPHeaderFilter{
-	Dst:     "\x00\x00\x00\x00",
-	DstMask: "\x00\x00\x00\x00",
-	Src:     "\x00\x00\x00\x00",
-	SrcMask: "\x00\x00\x00\x00",
-}
-
 // nflog logs messages related to the writing and reading of iptables.
 func nflog(format string, args ...interface{}) {
 	if enableLogging && log.IsLogging(log.Debug) {
@@ -71,9 +59,9 @@ func GetInfo(t *kernel.Task, stack *stack.Stack, outPtr usermem.Addr) (linux.IPT
 	return info, nil
 }
 
-// GetEntries returns netstack's iptables rules encoded for the iptables tool.
-func GetEntries(t *kernel.Task, stack *stack.Stack, outPtr usermem.Addr, outLen int) (linux.KernelIPTGetEntries, *syserr.Error) {
-	// Read in the struct and table name.
+// GetEntries4 returns netstack's iptables rules encoded for the iptables tool.
+func GetEntries4(t *kernel.Task, stack *stack.Stack, outPtr usermem.Addr, outLen int) (linux.KernelIPTGetEntries, *syserr.Error) {
+	// Read in the ABI struct.
 	var userEntries linux.IPTGetEntries
 	if _, err := userEntries.CopyIn(t, outPtr); err != nil {
 		nflog("couldn't copy in entries %q", userEntries.Name)
@@ -99,108 +87,48 @@ func GetEntries(t *kernel.Task, stack *stack.Stack, outPtr usermem.Addr, outLen
 // format expected by the iptables tool. Linux stores each table as a binary
 // blob that can only be traversed by parsing a bit, reading some offsets,
 // jumping to those offsets, parsing again, etc.
-func convertNetstackToBinary(stack *stack.Stack, tablename linux.TableName) (linux.KernelIPTGetEntries, linux.IPTGetinfo, error) {
-	table, ok := stack.IPTables().GetTable(tablename.String())
+func convertNetstackToBinary(stk *stack.Stack, tablename linux.TableName) (linux.KernelIPTGetEntries, linux.IPTGetinfo, error) {
+	// The table name has to fit in the struct.
+	if linux.XT_TABLE_MAXNAMELEN < len(tablename) {
+		return linux.KernelIPTGetEntries{}, linux.IPTGetinfo{}, fmt.Errorf("table name %q too long", tablename)
+	}
+
+	table, ok := stk.IPTables().GetTable(tablename.String())
 	if !ok {
 		return linux.KernelIPTGetEntries{}, linux.IPTGetinfo{}, fmt.Errorf("couldn't find table %q", tablename)
 	}
 
-	var entries linux.KernelIPTGetEntries
+	// Setup the info struct.
 	var info linux.IPTGetinfo
 	info.ValidHooks = table.ValidHooks()
-
-	// The table name has to fit in the struct.
-	if linux.XT_TABLE_MAXNAMELEN < len(tablename) {
-		return linux.KernelIPTGetEntries{}, linux.IPTGetinfo{}, fmt.Errorf("table name %q too long", tablename)
-	}
 	copy(info.Name[:], tablename[:])
-	copy(entries.Name[:], tablename[:])
-
-	for ruleIdx, rule := range table.Rules {
-		nflog("convert to binary: current offset: %d", entries.Size)
-
-		// Is this a chain entry point?
-		for hook, hookRuleIdx := range table.BuiltinChains {
-			if hookRuleIdx == ruleIdx {
-				nflog("convert to binary: found hook %d at offset %d", hook, entries.Size)
-				info.HookEntry[hook] = entries.Size
-			}
-		}
-		// Is this a chain underflow point?
-		for underflow, underflowRuleIdx := range table.Underflows {
-			if underflowRuleIdx == ruleIdx {
-				nflog("convert to binary: found underflow %d at offset %d", underflow, entries.Size)
-				info.Underflow[underflow] = entries.Size
-			}
-		}
 
-		// Each rule corresponds to an entry.
-		entry := linux.KernelIPTEntry{
-			Entry: linux.IPTEntry{
-				IP: linux.IPTIP{
-					Protocol: uint16(rule.Filter.Protocol),
-				},
-				NextOffset:   linux.SizeOfIPTEntry,
-				TargetOffset: linux.SizeOfIPTEntry,
-			},
-		}
-		copy(entry.Entry.IP.Dst[:], rule.Filter.Dst)
-		copy(entry.Entry.IP.DstMask[:], rule.Filter.DstMask)
-		copy(entry.Entry.IP.Src[:], rule.Filter.Src)
-		copy(entry.Entry.IP.SrcMask[:], rule.Filter.SrcMask)
-		copy(entry.Entry.IP.OutputInterface[:], rule.Filter.OutputInterface)
-		copy(entry.Entry.IP.OutputInterfaceMask[:], rule.Filter.OutputInterfaceMask)
-		if rule.Filter.DstInvert {
-			entry.Entry.IP.InverseFlags |= linux.IPT_INV_DSTIP
-		}
-		if rule.Filter.SrcInvert {
-			entry.Entry.IP.InverseFlags |= linux.IPT_INV_SRCIP
-		}
-		if rule.Filter.OutputInterfaceInvert {
-			entry.Entry.IP.InverseFlags |= linux.IPT_INV_VIA_OUT
-		}
+	entries := getEntries4(table, &info)
+	return entries, info, nil
+}
 
-		for _, matcher := range rule.Matchers {
-			// Serialize the matcher and add it to the
-			// entry.
-			serialized := marshalMatcher(matcher)
-			nflog("convert to binary: matcher serialized as: %v", serialized)
-			if len(serialized)%8 != 0 {
-				panic(fmt.Sprintf("matcher %T is not 64-bit aligned", matcher))
-			}
-			entry.Elems = append(entry.Elems, serialized...)
-			entry.Entry.NextOffset += uint16(len(serialized))
-			entry.Entry.TargetOffset += uint16(len(serialized))
+// setHooksAndUnderflow checks whether the rule at ruleIdx is a hook entrypoint
+// or underflow, in which case it fills in info.HookEntry and info.Underflows.
+func setHooksAndUnderflow(info *linux.IPTGetinfo, table stack.Table, offset uint32, ruleIdx int) {
+	// Is this a chain entry point?
+	for hook, hookRuleIdx := range table.BuiltinChains {
+		if hookRuleIdx == ruleIdx {
+			nflog("convert to binary: found hook %d at offset %d", hook, offset)
+			info.HookEntry[hook] = offset
 		}
-
-		// Serialize and append the target.
-		serialized := marshalTarget(rule.Target)
-		if len(serialized)%8 != 0 {
-			panic(fmt.Sprintf("target %T is not 64-bit aligned", rule.Target))
+	}
+	// Is this a chain underflow point?
+	for underflow, underflowRuleIdx := range table.Underflows {
+		if underflowRuleIdx == ruleIdx {
+			nflog("convert to binary: found underflow %d at offset %d", underflow, offset)
+			info.Underflow[underflow] = offset
 		}
-		entry.Elems = append(entry.Elems, serialized...)
-		entry.Entry.NextOffset += uint16(len(serialized))
-
-		nflog("convert to binary: adding entry: %+v", entry)
-
-		entries.Size += uint32(entry.Entry.NextOffset)
-		entries.Entrytable = append(entries.Entrytable, entry)
-		info.NumEntries++
 	}
-
-	nflog("convert to binary: finished with an marshalled size of %d", info.Size)
-	info.Size = entries.Size
-	return entries, info, nil
 }
 
 // SetEntries sets iptables rules for a single table. See
 // net/ipv4/netfilter/ip_tables.c:translate_table for reference.
 func SetEntries(stk *stack.Stack, optVal []byte) *syserr.Error {
-	// Get the basic rules data (struct ipt_replace).
-	if len(optVal) < linux.SizeOfIPTReplace {
-		nflog("optVal has insufficient size for replace %d", len(optVal))
-		return syserr.ErrInvalidArgument
-	}
 	var replace linux.IPTReplace
 	replaceBuf := optVal[:linux.SizeOfIPTReplace]
 	optVal = optVal[linux.SizeOfIPTReplace:]
@@ -218,79 +146,9 @@ func SetEntries(stk *stack.Stack, optVal []byte) *syserr.Error {
 		return syserr.ErrInvalidArgument
 	}
 
-	nflog("set entries: setting entries in table %q", replace.Name.String())
-
-	// Convert input into a list of rules and their offsets.
-	var offset uint32
-	// offsets maps rule byte offsets to their position in table.Rules.
-	offsets := map[uint32]int{}
-	for entryIdx := uint32(0); entryIdx < replace.NumEntries; entryIdx++ {
-		nflog("set entries: processing entry at offset %d", offset)
-
-		// Get the struct ipt_entry.
-		if len(optVal) < linux.SizeOfIPTEntry {
-			nflog("optVal has insufficient size for entry %d", len(optVal))
-			return syserr.ErrInvalidArgument
-		}
-		var entry linux.IPTEntry
-		buf := optVal[:linux.SizeOfIPTEntry]
-		binary.Unmarshal(buf, usermem.ByteOrder, &entry)
-		initialOptValLen := len(optVal)
-		optVal = optVal[linux.SizeOfIPTEntry:]
-
-		if entry.TargetOffset < linux.SizeOfIPTEntry {
-			nflog("entry has too-small target offset %d", entry.TargetOffset)
-			return syserr.ErrInvalidArgument
-		}
-
-		// TODO(gvisor.dev/issue/170): We should support more IPTIP
-		// filtering fields.
-		filter, err := filterFromIPTIP(entry.IP)
-		if err != nil {
-			nflog("bad iptip: %v", err)
-			return syserr.ErrInvalidArgument
-		}
-
-		// TODO(gvisor.dev/issue/170): Matchers and targets can specify
-		// that they only work for certain protocols, hooks, tables.
-		// Get matchers.
-		matchersSize := entry.TargetOffset - linux.SizeOfIPTEntry
-		if len(optVal) < int(matchersSize) {
-			nflog("entry doesn't have enough room for its matchers (only %d bytes remain)", len(optVal))
-			return syserr.ErrInvalidArgument
-		}
-		matchers, err := parseMatchers(filter, optVal[:matchersSize])
-		if err != nil {
-			nflog("failed to parse matchers: %v", err)
-			return syserr.ErrInvalidArgument
-		}
-		optVal = optVal[matchersSize:]
-
-		// Get the target of the rule.
-		targetSize := entry.NextOffset - entry.TargetOffset
-		if len(optVal) < int(targetSize) {
-			nflog("entry doesn't have enough room for its target (only %d bytes remain)", len(optVal))
-			return syserr.ErrInvalidArgument
-		}
-		target, err := parseTarget(filter, optVal[:targetSize])
-		if err != nil {
-			nflog("failed to parse target: %v", err)
-			return syserr.ErrInvalidArgument
-		}
-		optVal = optVal[targetSize:]
-
-		table.Rules = append(table.Rules, stack.Rule{
-			Filter:   filter,
-			Target:   target,
-			Matchers: matchers,
-		})
-		offsets[offset] = int(entryIdx)
-		offset += uint32(entry.NextOffset)
-
-		if initialOptValLen-len(optVal) != int(entry.NextOffset) {
-			nflog("entry NextOffset is %d, but entry took up %d bytes", entry.NextOffset, initialOptValLen-len(optVal))
-			return syserr.ErrInvalidArgument
-		}
+	offsets, err := modifyEntries4(stk, optVal, &replace, &table)
+	if err != nil {
+		return err
 	}
 
 	// Go through the list of supported hooks for this table and, for each
@@ -323,7 +181,7 @@ func SetEntries(stk *stack.Stack, optVal []byte) *syserr.Error {
 		}
 	}
 
-	// Add the user chains.
+	// Check the user chains.
 	for ruleIdx, rule := range table.Rules {
 		if _, ok := rule.Target.(stack.UserChainTarget); !ok {
 			continue
@@ -404,7 +262,6 @@ func parseMatchers(filter stack.IPHeaderFilter, optVal []byte) ([]stack.Matcher,
 
 		// Check some invariants.
 		if match.MatchSize < linux.SizeOfXTEntryMatch {
-
 			return nil, fmt.Errorf("match size is too small, must be at least %d", linux.SizeOfXTEntryMatch)
 		}
 		if len(optVal) < int(match.MatchSize) {
@@ -429,64 +286,11 @@ func parseMatchers(filter stack.IPHeaderFilter, optVal []byte) ([]stack.Matcher,
 	return matchers, nil
 }
 
-func filterFromIPTIP(iptip linux.IPTIP) (stack.IPHeaderFilter, error) {
-	if containsUnsupportedFields(iptip) {
-		return stack.IPHeaderFilter{}, fmt.Errorf("unsupported fields in struct iptip: %+v", iptip)
-	}
-	if len(iptip.Dst) != header.IPv4AddressSize || len(iptip.DstMask) != header.IPv4AddressSize {
-		return stack.IPHeaderFilter{}, fmt.Errorf("incorrect length of destination (%d) and/or destination mask (%d) fields", len(iptip.Dst), len(iptip.DstMask))
-	}
-	if len(iptip.Src) != header.IPv4AddressSize || len(iptip.SrcMask) != header.IPv4AddressSize {
-		return stack.IPHeaderFilter{}, fmt.Errorf("incorrect length of source (%d) and/or source mask (%d) fields", len(iptip.Src), len(iptip.SrcMask))
-	}
-
-	n := bytes.IndexByte([]byte(iptip.OutputInterface[:]), 0)
-	if n == -1 {
-		n = len(iptip.OutputInterface)
-	}
-	ifname := string(iptip.OutputInterface[:n])
-
-	n = bytes.IndexByte([]byte(iptip.OutputInterfaceMask[:]), 0)
-	if n == -1 {
-		n = len(iptip.OutputInterfaceMask)
-	}
-	ifnameMask := string(iptip.OutputInterfaceMask[:n])
-
-	return stack.IPHeaderFilter{
-		Protocol:              tcpip.TransportProtocolNumber(iptip.Protocol),
-		Dst:                   tcpip.Address(iptip.Dst[:]),
-		DstMask:               tcpip.Address(iptip.DstMask[:]),
-		DstInvert:             iptip.InverseFlags&linux.IPT_INV_DSTIP != 0,
-		Src:                   tcpip.Address(iptip.Src[:]),
-		SrcMask:               tcpip.Address(iptip.SrcMask[:]),
-		SrcInvert:             iptip.InverseFlags&linux.IPT_INV_SRCIP != 0,
-		OutputInterface:       ifname,
-		OutputInterfaceMask:   ifnameMask,
-		OutputInterfaceInvert: iptip.InverseFlags&linux.IPT_INV_VIA_OUT != 0,
-	}, nil
-}
-
-func containsUnsupportedFields(iptip linux.IPTIP) bool {
-	// The following features are supported:
-	// - Protocol
-	// - Dst and DstMask
-	// - Src and SrcMask
-	// - The inverse destination IP check flag
-	// - OutputInterface, OutputInterfaceMask and its inverse.
-	var emptyInterface = [linux.IFNAMSIZ]byte{}
-	// Disable any supported inverse flags.
-	inverseMask := uint8(linux.IPT_INV_DSTIP) | uint8(linux.IPT_INV_SRCIP) | uint8(linux.IPT_INV_VIA_OUT)
-	return iptip.InputInterface != emptyInterface ||
-		iptip.InputInterfaceMask != emptyInterface ||
-		iptip.Flags != 0 ||
-		iptip.InverseFlags&^inverseMask != 0
-}
-
 func validUnderflow(rule stack.Rule) bool {
 	if len(rule.Matchers) != 0 {
 		return false
 	}
-	if rule.Filter != emptyFilter {
+	if rule.Filter != emptyIPv4Filter {
 		return false
 	}
 	switch rule.Target.(type) {
diff --git a/pkg/sentry/socket/netstack/netstack.go b/pkg/sentry/socket/netstack/netstack.go
index e4846bc0bc..0e5913b60a 100644
--- a/pkg/sentry/socket/netstack/netstack.go
+++ b/pkg/sentry/socket/netstack/netstack.go
@@ -949,6 +949,9 @@ func (s *SocketOperations) GetSockOpt(t *kernel.Task, level, name int, outPtr us
 			if outLen < linux.SizeOfIPTGetinfo {
 				return nil, syserr.ErrInvalidArgument
 			}
+			if s.family != linux.AF_INET {
+				return nil, syserr.ErrInvalidArgument
+			}
 
 			stack := inet.StackFromContext(t)
 			if stack == nil {
@@ -964,12 +967,15 @@ func (s *SocketOperations) GetSockOpt(t *kernel.Task, level, name int, outPtr us
 			if outLen < linux.SizeOfIPTGetEntries {
 				return nil, syserr.ErrInvalidArgument
 			}
+			if s.family != linux.AF_INET {
+				return nil, syserr.ErrInvalidArgument
+			}
 
 			stack := inet.StackFromContext(t)
 			if stack == nil {
 				return nil, syserr.ErrNoDevice
 			}
-			entries, err := netfilter.GetEntries(t, stack.(*Stack).Stack, outPtr, outLen)
+			entries, err := netfilter.GetEntries4(t, stack.(*Stack).Stack, outPtr, outLen)
 			if err != nil {
 				return nil, err
 			}
@@ -1650,12 +1656,15 @@ func (s *SocketOperations) SetSockOpt(t *kernel.Task, level int, name int, optVa
 		return nil
 	}
 
-	if s.skType == linux.SOCK_RAW && level == linux.IPPROTO_IP {
+	if s.skType == linux.SOCK_RAW && level == linux.SOL_IP {
 		switch name {
 		case linux.IPT_SO_SET_REPLACE:
 			if len(optVal) < linux.SizeOfIPTReplace {
 				return syserr.ErrInvalidArgument
 			}
+			if s.family != linux.AF_INET {
+				return syserr.ErrInvalidArgument
+			}
 
 			stack := inet.StackFromContext(t)
 			if stack == nil {
diff --git a/pkg/sentry/socket/netstack/netstack_vfs2.go b/pkg/sentry/socket/netstack/netstack_vfs2.go
index 3335e7430a..1db8ae4913 100644
--- a/pkg/sentry/socket/netstack/netstack_vfs2.go
+++ b/pkg/sentry/socket/netstack/netstack_vfs2.go
@@ -239,6 +239,9 @@ func (s *SocketVFS2) GetSockOpt(t *kernel.Task, level, name int, outPtr usermem.
 			if outLen < linux.SizeOfIPTGetinfo {
 				return nil, syserr.ErrInvalidArgument
 			}
+			if s.family != linux.AF_INET {
+				return nil, syserr.ErrInvalidArgument
+			}
 
 			stack := inet.StackFromContext(t)
 			if stack == nil {
@@ -254,12 +257,15 @@ func (s *SocketVFS2) GetSockOpt(t *kernel.Task, level, name int, outPtr usermem.
 			if outLen < linux.SizeOfIPTGetEntries {
 				return nil, syserr.ErrInvalidArgument
 			}
+			if s.family != linux.AF_INET {
+				return nil, syserr.ErrInvalidArgument
+			}
 
 			stack := inet.StackFromContext(t)
 			if stack == nil {
 				return nil, syserr.ErrNoDevice
 			}
-			entries, err := netfilter.GetEntries(t, stack.(*Stack).Stack, outPtr, outLen)
+			entries, err := netfilter.GetEntries4(t, stack.(*Stack).Stack, outPtr, outLen)
 			if err != nil {
 				return nil, err
 			}
@@ -298,12 +304,15 @@ func (s *SocketVFS2) SetSockOpt(t *kernel.Task, level int, name int, optVal []by
 		return nil
 	}
 
-	if s.skType == linux.SOCK_RAW && level == linux.IPPROTO_IP {
+	if s.skType == linux.SOCK_RAW && level == linux.SOL_IP {
 		switch name {
 		case linux.IPT_SO_SET_REPLACE:
 			if len(optVal) < linux.SizeOfIPTReplace {
 				return syserr.ErrInvalidArgument
 			}
+			if s.family != linux.AF_INET {
+				return syserr.ErrInvalidArgument
+			}
 
 			stack := inet.StackFromContext(t)
 			if stack == nil {
diff --git a/pkg/sentry/strace/socket.go b/pkg/sentry/strace/socket.go
index b51c4c9412..08e97e6c4e 100644
--- a/pkg/sentry/strace/socket.go
+++ b/pkg/sentry/strace/socket.go
@@ -632,6 +632,8 @@ var sockOptNames = map[uint64]abi.ValueSet{
 		linux.IPV6_UNICAST_IF:          "IPV6_UNICAST_IF",
 		linux.MCAST_MSFILTER:           "MCAST_MSFILTER",
 		linux.IPV6_ADDRFORM:            "IPV6_ADDRFORM",
+		linux.IP6T_SO_GET_INFO:         "IP6T_SO_GET_INFO",
+		linux.IP6T_SO_GET_ENTRIES:      "IP6T_SO_GET_ENTRIES",
 	},
 	linux.SOL_NETLINK: {
 		linux.NETLINK_BROADCAST_ERROR:  "NETLINK_BROADCAST_ERROR",

From 29fc0a272957105da566d17b1962322088864b6a Mon Sep 17 00:00:00 2001
From: Zach Koopmans <zkoopmans@google.com>
Date: Wed, 19 Aug 2020 14:44:42 -0700
Subject: [PATCH 033/211] Fix return for rseq_test.

Accept 128 + SIGNAL as well as SIGNAL as valid
returns for fork/exec tests.

Also, make changes so that test compiles in opensource. Test
had compile errors on latest Ubuntu 16.04 image with updated bazel to
3.4.0 (as well as base 2.0) used for Kokoro tests.

PiperOrigin-RevId: 327510310
---
 test/syscalls/linux/BUILD        |  1 +
 test/syscalls/linux/rseq.cc      |  6 ++-
 test/syscalls/linux/rseq/rseq.cc | 87 ++++++++++++++++++--------------
 test/syscalls/linux/rseq/test.h  | 28 +++++-----
 4 files changed, 68 insertions(+), 54 deletions(-)

diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD
index bd1d9584a8..3009f5cada 100644
--- a/test/syscalls/linux/BUILD
+++ b/test/syscalls/linux/BUILD
@@ -1972,6 +1972,7 @@ cc_binary(
         gtest,
         "//test/util:logging",
         "//test/util:multiprocess_util",
+        "//test/util:posix_error",
         "//test/util:test_main",
         "//test/util:test_util",
     ],
diff --git a/test/syscalls/linux/rseq.cc b/test/syscalls/linux/rseq.cc
index 4bfb1ff569..94f9154a01 100644
--- a/test/syscalls/linux/rseq.cc
+++ b/test/syscalls/linux/rseq.cc
@@ -24,6 +24,7 @@
 #include "test/syscalls/linux/rseq/uapi.h"
 #include "test/util/logging.h"
 #include "test/util/multiprocess_util.h"
+#include "test/util/posix_error.h"
 #include "test/util/test_util.h"
 
 namespace gvisor {
@@ -31,6 +32,9 @@ namespace testing {
 
 namespace {
 
+using ::testing::AnyOf;
+using ::testing::Eq;
+
 // Syscall test for rseq (restartable sequences).
 //
 // We must be very careful about how these tests are written. Each thread may
@@ -98,7 +102,7 @@ void RunChildTest(std::string test_case, int want_status) {
 
   int status = 0;
   ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), SyscallSucceeds());
-  ASSERT_EQ(status, want_status);
+  ASSERT_THAT(status, AnyOf(Eq(want_status), Eq(128 + want_status)));
 }
 
 // Test that rseq must be aligned.
diff --git a/test/syscalls/linux/rseq/rseq.cc b/test/syscalls/linux/rseq/rseq.cc
index f036db26d3..6f5d38bba2 100644
--- a/test/syscalls/linux/rseq/rseq.cc
+++ b/test/syscalls/linux/rseq/rseq.cc
@@ -74,84 +74,95 @@ int TestUnaligned() {
 // Sanity test that registration works.
 int TestRegister() {
   struct rseq r = {};
-  if (int ret = sys_rseq(&r, sizeof(r), 0, 0); sys_errno(ret) != 0) {
+  int ret = sys_rseq(&r, sizeof(r), 0, 0);
+  if (sys_errno(ret) != 0) {
     return 1;
   }
   return 0;
-};
+}
 
 // Registration can't be done twice.
 int TestDoubleRegister() {
   struct rseq r = {};
-  if (int ret = sys_rseq(&r, sizeof(r), 0, 0); sys_errno(ret) != 0) {
+  int ret = sys_rseq(&r, sizeof(r), 0, 0);
+  if (sys_errno(ret) != 0) {
     return 1;
   }
 
-  if (int ret = sys_rseq(&r, sizeof(r), 0, 0); sys_errno(ret) != EBUSY) {
+  ret = sys_rseq(&r, sizeof(r), 0, 0);
+  if (sys_errno(ret) != EBUSY) {
     return 1;
   }
 
   return 0;
-};
+}
 
 // Registration can be done again after unregister.
 int TestRegisterUnregister() {
   struct rseq r = {};
-  if (int ret = sys_rseq(&r, sizeof(r), 0, 0); sys_errno(ret) != 0) {
+
+  int ret = sys_rseq(&r, sizeof(r), 0, 0);
+  if (sys_errno(ret) != 0) {
     return 1;
   }
 
-  if (int ret = sys_rseq(&r, sizeof(r), kRseqFlagUnregister, 0);
-      sys_errno(ret) != 0) {
+  ret = sys_rseq(&r, sizeof(r), kRseqFlagUnregister, 0);
+  if (sys_errno(ret) != 0) {
     return 1;
   }
 
-  if (int ret = sys_rseq(&r, sizeof(r), 0, 0); sys_errno(ret) != 0) {
+  ret = sys_rseq(&r, sizeof(r), 0, 0);
+  if (sys_errno(ret) != 0) {
     return 1;
   }
 
   return 0;
-};
+}
 
 // The pointer to rseq must match on register/unregister.
 int TestUnregisterDifferentPtr() {
   struct rseq r = {};
-  if (int ret = sys_rseq(&r, sizeof(r), 0, 0); sys_errno(ret) != 0) {
+
+  int ret = sys_rseq(&r, sizeof(r), 0, 0);
+  if (sys_errno(ret) != 0) {
     return 1;
   }
 
   struct rseq r2 = {};
-  if (int ret = sys_rseq(&r2, sizeof(r2), kRseqFlagUnregister, 0);
-      sys_errno(ret) != EINVAL) {
+
+  ret = sys_rseq(&r2, sizeof(r2), kRseqFlagUnregister, 0);
+  if (sys_errno(ret) != EINVAL) {
     return 1;
   }
 
   return 0;
-};
+}
 
 // The signature must match on register/unregister.
 int TestUnregisterDifferentSignature() {
   constexpr int kSignature = 0;
 
   struct rseq r = {};
-  if (int ret = sys_rseq(&r, sizeof(r), 0, kSignature); sys_errno(ret) != 0) {
+  int ret = sys_rseq(&r, sizeof(r), 0, kSignature);
+  if (sys_errno(ret) != 0) {
     return 1;
   }
 
-  if (int ret = sys_rseq(&r, sizeof(r), kRseqFlagUnregister, kSignature + 1);
-      sys_errno(ret) != EPERM) {
+  ret = sys_rseq(&r, sizeof(r), kRseqFlagUnregister, kSignature + 1);
+  if (sys_errno(ret) != EPERM) {
     return 1;
   }
 
   return 0;
-};
+}
 
 // The CPU ID is initialized.
 int TestCPU() {
   struct rseq r = {};
   r.cpu_id = kRseqCPUIDUninitialized;
 
-  if (int ret = sys_rseq(&r, sizeof(r), 0, 0); sys_errno(ret) != 0) {
+  int ret = sys_rseq(&r, sizeof(r), 0, 0);
+  if (sys_errno(ret) != 0) {
     return 1;
   }
 
@@ -163,13 +174,13 @@ int TestCPU() {
   }
 
   return 0;
-};
+}
 
 // Critical section is eventually aborted.
 int TestAbort() {
   struct rseq r = {};
-  if (int ret = sys_rseq(&r, sizeof(r), 0, kRseqSignature);
-      sys_errno(ret) != 0) {
+  int ret = sys_rseq(&r, sizeof(r), 0, kRseqSignature);
+  if (sys_errno(ret) != 0) {
     return 1;
   }
 
@@ -185,13 +196,13 @@ int TestAbort() {
   rseq_loop(&r, &cs);
 
   return 0;
-};
+}
 
 // Abort may be before the critical section.
 int TestAbortBefore() {
   struct rseq r = {};
-  if (int ret = sys_rseq(&r, sizeof(r), 0, kRseqSignature);
-      sys_errno(ret) != 0) {
+  int ret = sys_rseq(&r, sizeof(r), 0, kRseqSignature);
+  if (sys_errno(ret) != 0) {
     return 1;
   }
 
@@ -207,13 +218,13 @@ int TestAbortBefore() {
   rseq_loop(&r, &cs);
 
   return 0;
-};
+}
 
 // Signature must match.
 int TestAbortSignature() {
   struct rseq r = {};
-  if (int ret = sys_rseq(&r, sizeof(r), 0, kRseqSignature + 1);
-      sys_errno(ret) != 0) {
+  int ret = sys_rseq(&r, sizeof(r), 0, kRseqSignature + 1);
+  if (sys_errno(ret) != 0) {
     return 1;
   }
 
@@ -229,13 +240,13 @@ int TestAbortSignature() {
   rseq_loop(&r, &cs);
 
   return 1;
-};
+}
 
 // Abort must not be in the critical section.
 int TestAbortPreCommit() {
   struct rseq r = {};
-  if (int ret = sys_rseq(&r, sizeof(r), 0, kRseqSignature + 1);
-      sys_errno(ret) != 0) {
+  int ret = sys_rseq(&r, sizeof(r), 0, kRseqSignature + 1);
+  if (sys_errno(ret) != 0) {
     return 1;
   }
 
@@ -251,13 +262,13 @@ int TestAbortPreCommit() {
   rseq_loop(&r, &cs);
 
   return 1;
-};
+}
 
 // rseq.rseq_cs is cleared on abort.
 int TestAbortClearsCS() {
   struct rseq r = {};
-  if (int ret = sys_rseq(&r, sizeof(r), 0, kRseqSignature);
-      sys_errno(ret) != 0) {
+  int ret = sys_rseq(&r, sizeof(r), 0, kRseqSignature);
+  if (sys_errno(ret) != 0) {
     return 1;
   }
 
@@ -277,13 +288,13 @@ int TestAbortClearsCS() {
   }
 
   return 0;
-};
+}
 
 // rseq.rseq_cs is cleared on abort outside of critical section.
 int TestInvalidAbortClearsCS() {
   struct rseq r = {};
-  if (int ret = sys_rseq(&r, sizeof(r), 0, kRseqSignature);
-      sys_errno(ret) != 0) {
+  int ret = sys_rseq(&r, sizeof(r), 0, kRseqSignature);
+  if (sys_errno(ret) != 0) {
     return 1;
   }
 
@@ -306,7 +317,7 @@ int TestInvalidAbortClearsCS() {
   }
 
   return 0;
-};
+}
 
 // Exit codes:
 //  0 - Pass
diff --git a/test/syscalls/linux/rseq/test.h b/test/syscalls/linux/rseq/test.h
index 3b7bb74b1a..ff0dd6e482 100644
--- a/test/syscalls/linux/rseq/test.h
+++ b/test/syscalls/linux/rseq/test.h
@@ -20,22 +20,20 @@ namespace testing {
 
 // Test cases supported by rseq binary.
 
-inline constexpr char kRseqTestUnaligned[] = "unaligned";
-inline constexpr char kRseqTestRegister[] = "register";
-inline constexpr char kRseqTestDoubleRegister[] = "double-register";
-inline constexpr char kRseqTestRegisterUnregister[] = "register-unregister";
-inline constexpr char kRseqTestUnregisterDifferentPtr[] =
-    "unregister-different-ptr";
-inline constexpr char kRseqTestUnregisterDifferentSignature[] =
+constexpr char kRseqTestUnaligned[] = "unaligned";
+constexpr char kRseqTestRegister[] = "register";
+constexpr char kRseqTestDoubleRegister[] = "double-register";
+constexpr char kRseqTestRegisterUnregister[] = "register-unregister";
+constexpr char kRseqTestUnregisterDifferentPtr[] = "unregister-different-ptr";
+constexpr char kRseqTestUnregisterDifferentSignature[] =
     "unregister-different-signature";
-inline constexpr char kRseqTestCPU[] = "cpu";
-inline constexpr char kRseqTestAbort[] = "abort";
-inline constexpr char kRseqTestAbortBefore[] = "abort-before";
-inline constexpr char kRseqTestAbortSignature[] = "abort-signature";
-inline constexpr char kRseqTestAbortPreCommit[] = "abort-precommit";
-inline constexpr char kRseqTestAbortClearsCS[] = "abort-clears-cs";
-inline constexpr char kRseqTestInvalidAbortClearsCS[] =
-    "invalid-abort-clears-cs";
+constexpr char kRseqTestCPU[] = "cpu";
+constexpr char kRseqTestAbort[] = "abort";
+constexpr char kRseqTestAbortBefore[] = "abort-before";
+constexpr char kRseqTestAbortSignature[] = "abort-signature";
+constexpr char kRseqTestAbortPreCommit[] = "abort-precommit";
+constexpr char kRseqTestAbortClearsCS[] = "abort-clears-cs";
+constexpr char kRseqTestInvalidAbortClearsCS[] = "invalid-abort-clears-cs";
 
 }  // namespace testing
 }  // namespace gvisor

From 59a394f856108dcb33f242a61ff6e1700161b4f3 Mon Sep 17 00:00:00 2001
From: Fabricio Voznika <fvoznika@google.com>
Date: Wed, 19 Aug 2020 17:03:21 -0700
Subject: [PATCH 034/211] Run bazel build before deleting cached gopath files

bazel creates sysmlink to the cache on the first invokation.
On a new clone, there are no symlink, thus `rm -rf bazel-bin/gopath`
has no effect. Call `bazel build something` first, then delete
cached gopath.

PiperOrigin-RevId: 327536044
---
 .github/workflows/go.yml | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml
index 4da3853b22..b51c221581 100644
--- a/.github/workflows/go.yml
+++ b/.github/workflows/go.yml
@@ -49,7 +49,12 @@ jobs:
         key: ${{ runner.os }}-bazel-${{ hashFiles('WORKSPACE') }}
         restore-keys: |
           ${{ runner.os }}-bazel-
+      # Create gopath to merge the changes. The first execution will create
+      # symlinks to the cache, e.g. bazel-bin. Once the cache is setup, delete
+      # old gopath files that may exist from previous runs (and could contain
+      # files that are now deleted). Then run gopath again for good.
     - run: |
+        make build TARGETS="//:gopath"
         rm -rf bazel-bin/gopath
         make build TARGETS="//:gopath"
     - run: tools/go_branch.sh

From 00ee4cb1a26d8f3cabbbb7fc05d719d8aabbee60 Mon Sep 17 00:00:00 2001
From: Fabricio Voznika <fvoznika@google.com>
Date: Wed, 19 Aug 2020 18:03:15 -0700
Subject: [PATCH 035/211] Remove path walk from localFile.Mknod

Replace mknod call with mknodat equivalent to protect
against symlink attacks. Also added Mknod tests.

Remove goferfs reliance on gofer to check for file
existence before creating a synthetic entry.

Updates #2923

PiperOrigin-RevId: 327544516
---
 pkg/sentry/fsimpl/gofer/directory.go  |  12 +-
 pkg/sentry/fsimpl/gofer/filesystem.go |  77 ++++----
 runsc/fsgofer/BUILD                   |   1 +
 runsc/fsgofer/fsgofer.go              | 258 ++++++++++++++------------
 runsc/fsgofer/fsgofer_amd64_unsafe.go |  16 +-
 runsc/fsgofer/fsgofer_arm64_unsafe.go |  16 +-
 runsc/fsgofer/fsgofer_test.go         | 147 ++++++++++-----
 runsc/fsgofer/fsgofer_unsafe.go       |  18 +-
 8 files changed, 312 insertions(+), 233 deletions(-)

diff --git a/pkg/sentry/fsimpl/gofer/directory.go b/pkg/sentry/fsimpl/gofer/directory.go
index 2a8011eb49..40dce553eb 100644
--- a/pkg/sentry/fsimpl/gofer/directory.go
+++ b/pkg/sentry/fsimpl/gofer/directory.go
@@ -82,7 +82,7 @@ type createSyntheticOpts struct {
 // Preconditions: d.dirMu must be locked. d.isDir(). d does not already contain
 // a child with the given name.
 func (d *dentry) createSyntheticChildLocked(opts *createSyntheticOpts) {
-	d2 := &dentry{
+	child := &dentry{
 		refs:      1, // held by d
 		fs:        d.fs,
 		ino:       d.fs.nextSyntheticIno(),
@@ -97,16 +97,16 @@ func (d *dentry) createSyntheticChildLocked(opts *createSyntheticOpts) {
 	case linux.S_IFDIR:
 		// Nothing else needs to be done.
 	case linux.S_IFSOCK:
-		d2.endpoint = opts.endpoint
+		child.endpoint = opts.endpoint
 	case linux.S_IFIFO:
-		d2.pipe = opts.pipe
+		child.pipe = opts.pipe
 	default:
 		panic(fmt.Sprintf("failed to create synthetic file of unrecognized type: %v", opts.mode.FileType()))
 	}
-	d2.pf.dentry = d2
-	d2.vfsd.Init(d2)
+	child.pf.dentry = child
+	child.vfsd.Init(child)
 
-	d.cacheNewChildLocked(d2, opts.name)
+	d.cacheNewChildLocked(child, opts.name)
 	d.syntheticChildren++
 }
 
diff --git a/pkg/sentry/fsimpl/gofer/filesystem.go b/pkg/sentry/fsimpl/gofer/filesystem.go
index 9a90351e5b..1b6fa4e148 100644
--- a/pkg/sentry/fsimpl/gofer/filesystem.go
+++ b/pkg/sentry/fsimpl/gofer/filesystem.go
@@ -330,7 +330,7 @@ func (fs *filesystem) resolveLocked(ctx context.Context, rp *vfs.ResolvingPath,
 //
 // Preconditions: !rp.Done(). For the final path component in rp,
 // !rp.ShouldFollowSymlink().
-func (fs *filesystem) doCreateAt(ctx context.Context, rp *vfs.ResolvingPath, dir bool, createInRemoteDir func(parent *dentry, name string) error, createInSyntheticDir func(parent *dentry, name string) error) error {
+func (fs *filesystem) doCreateAt(ctx context.Context, rp *vfs.ResolvingPath, dir bool, createInRemoteDir func(parent *dentry, name string, ds **[]*dentry) error, createInSyntheticDir func(parent *dentry, name string) error) error {
 	var ds *[]*dentry
 	fs.renameMu.RLock()
 	defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
@@ -399,7 +399,7 @@ func (fs *filesystem) doCreateAt(ctx context.Context, rp *vfs.ResolvingPath, dir
 		// RPC will fail with EEXIST like we would have. If the RPC succeeds, and a
 		// stale dentry exists, the dentry will fail revalidation next time it's
 		// used.
-		if err := createInRemoteDir(parent, name); err != nil {
+		if err := createInRemoteDir(parent, name, &ds); err != nil {
 			return err
 		}
 		ev := linux.IN_CREATE
@@ -414,7 +414,7 @@ func (fs *filesystem) doCreateAt(ctx context.Context, rp *vfs.ResolvingPath, dir
 	}
 	// No cached dentry exists; however, there might still be an existing file
 	// at name. As above, we attempt the file creation RPC anyway.
-	if err := createInRemoteDir(parent, name); err != nil {
+	if err := createInRemoteDir(parent, name, &ds); err != nil {
 		return err
 	}
 	if child, ok := parent.children[name]; ok && child == nil {
@@ -721,7 +721,7 @@ func (fs *filesystem) GetParentDentryAt(ctx context.Context, rp *vfs.ResolvingPa
 
 // LinkAt implements vfs.FilesystemImpl.LinkAt.
 func (fs *filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs.VirtualDentry) error {
-	return fs.doCreateAt(ctx, rp, false /* dir */, func(parent *dentry, childName string) error {
+	return fs.doCreateAt(ctx, rp, false /* dir */, func(parent *dentry, childName string, _ **[]*dentry) error {
 		if rp.Mount() != vd.Mount() {
 			return syserror.EXDEV
 		}
@@ -754,7 +754,7 @@ func (fs *filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs.
 // MkdirAt implements vfs.FilesystemImpl.MkdirAt.
 func (fs *filesystem) MkdirAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.MkdirOptions) error {
 	creds := rp.Credentials()
-	return fs.doCreateAt(ctx, rp, true /* dir */, func(parent *dentry, name string) error {
+	return fs.doCreateAt(ctx, rp, true /* dir */, func(parent *dentry, name string, _ **[]*dentry) error {
 		if _, err := parent.file.mkdir(ctx, name, (p9.FileMode)(opts.Mode), (p9.UID)(creds.EffectiveKUID), (p9.GID)(creds.EffectiveKGID)); err != nil {
 			if !opts.ForSyntheticMountpoint || err == syserror.EEXIST {
 				return err
@@ -789,34 +789,49 @@ func (fs *filesystem) MkdirAt(ctx context.Context, rp *vfs.ResolvingPath, opts v
 
 // MknodAt implements vfs.FilesystemImpl.MknodAt.
 func (fs *filesystem) MknodAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.MknodOptions) error {
-	return fs.doCreateAt(ctx, rp, false /* dir */, func(parent *dentry, name string) error {
+	return fs.doCreateAt(ctx, rp, false /* dir */, func(parent *dentry, name string, ds **[]*dentry) error {
 		creds := rp.Credentials()
 		_, err := parent.file.mknod(ctx, name, (p9.FileMode)(opts.Mode), opts.DevMajor, opts.DevMinor, (p9.UID)(creds.EffectiveKUID), (p9.GID)(creds.EffectiveKGID))
-		// If the gofer does not allow creating a socket or pipe, create a
-		// synthetic one, i.e. one that is kept entirely in memory.
-		if err == syserror.EPERM {
-			switch opts.Mode.FileType() {
-			case linux.S_IFSOCK:
-				parent.createSyntheticChildLocked(&createSyntheticOpts{
-					name:     name,
-					mode:     opts.Mode,
-					kuid:     creds.EffectiveKUID,
-					kgid:     creds.EffectiveKGID,
-					endpoint: opts.Endpoint,
-				})
-				return nil
-			case linux.S_IFIFO:
-				parent.createSyntheticChildLocked(&createSyntheticOpts{
-					name: name,
-					mode: opts.Mode,
-					kuid: creds.EffectiveKUID,
-					kgid: creds.EffectiveKGID,
-					pipe: pipe.NewVFSPipe(true /* isNamed */, pipe.DefaultPipeSize, usermem.PageSize),
-				})
-				return nil
-			}
+		if err != syserror.EPERM {
+			return err
 		}
-		return err
+
+		// EPERM means that gofer does not allow creating a socket or pipe. Fallback
+		// to creating a synthetic one, i.e. one that is kept entirely in memory.
+
+		// Check that we're not overriding an existing file with a synthetic one.
+		_, err = fs.stepLocked(ctx, rp, parent, true, ds)
+		switch {
+		case err == nil:
+			// Step succeeded, another file exists.
+			return syserror.EEXIST
+		case err != syserror.ENOENT:
+			// Unexpected error.
+			return err
+		}
+
+		switch opts.Mode.FileType() {
+		case linux.S_IFSOCK:
+			parent.createSyntheticChildLocked(&createSyntheticOpts{
+				name:     name,
+				mode:     opts.Mode,
+				kuid:     creds.EffectiveKUID,
+				kgid:     creds.EffectiveKGID,
+				endpoint: opts.Endpoint,
+			})
+			return nil
+		case linux.S_IFIFO:
+			parent.createSyntheticChildLocked(&createSyntheticOpts{
+				name: name,
+				mode: opts.Mode,
+				kuid: creds.EffectiveKUID,
+				kgid: creds.EffectiveKGID,
+				pipe: pipe.NewVFSPipe(true /* isNamed */, pipe.DefaultPipeSize, usermem.PageSize),
+			})
+			return nil
+		}
+		// Retain error from gofer if synthetic file cannot be created internally.
+		return syserror.EPERM
 	}, nil)
 }
 
@@ -1452,7 +1467,7 @@ func (fs *filesystem) StatFSAt(ctx context.Context, rp *vfs.ResolvingPath) (linu
 
 // SymlinkAt implements vfs.FilesystemImpl.SymlinkAt.
 func (fs *filesystem) SymlinkAt(ctx context.Context, rp *vfs.ResolvingPath, target string) error {
-	return fs.doCreateAt(ctx, rp, false /* dir */, func(parent *dentry, name string) error {
+	return fs.doCreateAt(ctx, rp, false /* dir */, func(parent *dentry, name string, _ **[]*dentry) error {
 		creds := rp.Credentials()
 		_, err := parent.file.symlink(ctx, target, name, (p9.UID)(creds.EffectiveKUID), (p9.GID)(creds.EffectiveKGID))
 		return err
diff --git a/runsc/fsgofer/BUILD b/runsc/fsgofer/BUILD
index 05e3637f7e..96c57a4262 100644
--- a/runsc/fsgofer/BUILD
+++ b/runsc/fsgofer/BUILD
@@ -32,5 +32,6 @@ go_test(
         "//pkg/log",
         "//pkg/p9",
         "//pkg/test/testutil",
+        "@org_golang_x_sys//unix:go_default_library",
     ],
 )
diff --git a/runsc/fsgofer/fsgofer.go b/runsc/fsgofer/fsgofer.go
index 639de9ca15..b0788bd231 100644
--- a/runsc/fsgofer/fsgofer.go
+++ b/runsc/fsgofer/fsgofer.go
@@ -29,7 +29,6 @@ import (
 	"path/filepath"
 	"runtime"
 	"strconv"
-	"syscall"
 
 	"golang.org/x/sys/unix"
 	"gvisor.dev/gvisor/pkg/abi/linux"
@@ -45,7 +44,7 @@ const (
 	// modes to ensure an unopened/closed file fails all mode checks.
 	invalidMode = p9.OpenFlags(math.MaxUint32)
 
-	openFlags = syscall.O_NOFOLLOW | syscall.O_CLOEXEC
+	openFlags = unix.O_NOFOLLOW | unix.O_CLOEXEC
 
 	allowedOpenFlags = unix.O_TRUNC
 )
@@ -125,7 +124,7 @@ func (a *attachPoint) Attach() (p9.File, error) {
 }
 
 // makeQID returns a unique QID for the given stat buffer.
-func (a *attachPoint) makeQID(stat syscall.Stat_t) p9.QID {
+func (a *attachPoint) makeQID(stat unix.Stat_t) p9.QID {
 	a.deviceMu.Lock()
 	defer a.deviceMu.Unlock()
 
@@ -156,9 +155,7 @@ func (a *attachPoint) makeQID(stat syscall.Stat_t) p9.QID {
 // localFile implements p9.File wrapping a local file. The underlying file
 // is opened during Walk() and stored in 'file' to be used with other
 // operations. The file is opened as readonly, unless it's a symlink or there is
-// no read access, which requires O_PATH. 'file' is dup'ed when Walk(nil) is
-// called to clone the file. This reduces the number of walks that need to be
-// done by the host file system when files are reused.
+// no read access, which requires O_PATH.
 //
 // The file may be reopened if the requested mode in Open() is not a subset of
 // current mode. Consequently, 'file' could have a mode wider than requested and
@@ -170,11 +167,28 @@ func (a *attachPoint) makeQID(stat syscall.Stat_t) p9.QID {
 // performance with 'overlay2' storage driver. overlay2 eagerly copies the
 // entire file up when it's opened in write mode, and would perform badly when
 // multiple files are only being opened for read (esp. startup).
+//
+// File operations must use "at" functions whenever possible:
+//   * Local operations must use AT_EMPTY_PATH:
+//  	   fchownat(fd, "", AT_EMPTY_PATH, ...), instead of chown(fullpath, ...)
+//   * Creation operations must use (fd + name):
+//       mkdirat(fd, name, ...), instead of mkdir(fullpath, ...)
+//
+// Apart from being faster, it also adds another layer of defense against
+// symlink attacks (note that O_NOFOLLOW applies only to the last element in
+// the path).
+//
+// The few exceptions where it cannot be done are: utimensat on symlinks, and
+// Connect() for the socket address.
 type localFile struct {
 	// attachPoint is the attachPoint that serves this localFile.
 	attachPoint *attachPoint
 
-	// hostPath will be safely updated by the Renamed hook.
+	// hostPath is the full path to the host file. It can be used for logging and
+	// the few cases where full path is required to operation the host file. In
+	// all other cases, use "file" directly.
+	//
+	// Note: it's safely updated by the Renamed hook.
 	hostPath string
 
 	// file is opened when localFile is created and it's never nil. It may be
@@ -191,7 +205,7 @@ type localFile struct {
 	mode p9.OpenFlags
 
 	// fileType for this file. It is equivalent to:
-	// syscall.Stat_t.Mode & syscall.S_IFMT
+	// unix.Stat_t.Mode & unix.S_IFMT
 	fileType uint32
 
 	qid p9.QID
@@ -211,7 +225,7 @@ var procSelfFD *fd.FD
 // OpenProcSelfFD opens the /proc/self/fd directory, which will be used to
 // reopen file descriptors.
 func OpenProcSelfFD() error {
-	d, err := syscall.Open("/proc/self/fd", syscall.O_RDONLY|syscall.O_DIRECTORY, 0)
+	d, err := unix.Open("/proc/self/fd", unix.O_RDONLY|unix.O_DIRECTORY, 0)
 	if err != nil {
 		return fmt.Errorf("error opening /proc/self/fd: %v", err)
 	}
@@ -220,7 +234,7 @@ func OpenProcSelfFD() error {
 }
 
 func reopenProcFd(f *fd.FD, mode int) (*fd.FD, error) {
-	d, err := syscall.Openat(int(procSelfFD.FD()), strconv.Itoa(f.FD()), mode&^syscall.O_NOFOLLOW, 0)
+	d, err := unix.Openat(int(procSelfFD.FD()), strconv.Itoa(f.FD()), mode&^unix.O_NOFOLLOW, 0)
 	if err != nil {
 		return nil, err
 	}
@@ -229,17 +243,17 @@ func reopenProcFd(f *fd.FD, mode int) (*fd.FD, error) {
 }
 
 func openAnyFileFromParent(parent *localFile, name string) (*fd.FD, string, bool, error) {
-	path := path.Join(parent.hostPath, name)
-	f, readable, err := openAnyFile(path, func(mode int) (*fd.FD, error) {
+	pathDebug := path.Join(parent.hostPath, name)
+	f, readable, err := openAnyFile(pathDebug, func(mode int) (*fd.FD, error) {
 		return fd.OpenAt(parent.file, name, openFlags|mode, 0)
 	})
-	return f, path, readable, err
+	return f, pathDebug, readable, err
 }
 
-// openAnyFile attempts to open the file in O_RDONLY and if it fails fallsback
+// openAnyFile attempts to open the file in O_RDONLY. If it fails, falls back
 // to O_PATH. 'path' is used for logging messages only. 'fn' is what does the
 // actual file open and is customizable by the caller.
-func openAnyFile(path string, fn func(mode int) (*fd.FD, error)) (*fd.FD, bool, error) {
+func openAnyFile(pathDebug string, fn func(mode int) (*fd.FD, error)) (*fd.FD, bool, error) {
 	// Attempt to open file in the following mode in order:
 	//   1. RDONLY | NONBLOCK: for all files, directories, ro mounts, FIFOs.
 	//      Use non-blocking to prevent getting stuck inside open(2) for
@@ -250,7 +264,7 @@ func openAnyFile(path string, fn func(mode int) (*fd.FD, error)) (*fd.FD, bool,
 		readable bool
 	}{
 		{
-			mode:     syscall.O_RDONLY | syscall.O_NONBLOCK,
+			mode:     unix.O_RDONLY | unix.O_NONBLOCK,
 			readable: true,
 		},
 		{
@@ -268,36 +282,36 @@ func openAnyFile(path string, fn func(mode int) (*fd.FD, error)) (*fd.FD, bool,
 			return file, option.readable, nil
 		}
 		switch e := extractErrno(err); e {
-		case syscall.ENOENT:
+		case unix.ENOENT:
 			// File doesn't exist, no point in retrying.
 			return nil, false, e
 		}
 		// File failed to open. Try again with next mode, preserving 'err' in case
 		// this was the last attempt.
-		log.Debugf("Attempt %d to open file failed, mode: %#x, path: %q, err: %v", i, openFlags|option.mode, path, err)
+		log.Debugf("Attempt %d to open file failed, mode: %#x, path: %q, err: %v", i, openFlags|option.mode, pathDebug, err)
 	}
 	// All attempts to open file have failed, return the last error.
-	log.Debugf("Failed to open file, path: %q, err: %v", path, err)
+	log.Debugf("Failed to open file, path: %q, err: %v", pathDebug, err)
 	return nil, false, extractErrno(err)
 }
 
-func checkSupportedFileType(stat syscall.Stat_t, permitSocket bool) error {
-	switch stat.Mode & syscall.S_IFMT {
-	case syscall.S_IFREG, syscall.S_IFDIR, syscall.S_IFLNK:
+func checkSupportedFileType(stat unix.Stat_t, permitSocket bool) error {
+	switch stat.Mode & unix.S_IFMT {
+	case unix.S_IFREG, unix.S_IFDIR, unix.S_IFLNK:
 		return nil
 
-	case syscall.S_IFSOCK:
+	case unix.S_IFSOCK:
 		if !permitSocket {
-			return syscall.EPERM
+			return unix.EPERM
 		}
 		return nil
 
 	default:
-		return syscall.EPERM
+		return unix.EPERM
 	}
 }
 
-func newLocalFile(a *attachPoint, file *fd.FD, path string, readable bool, stat syscall.Stat_t) (*localFile, error) {
+func newLocalFile(a *attachPoint, file *fd.FD, path string, readable bool, stat unix.Stat_t) (*localFile, error) {
 	if err := checkSupportedFileType(stat, a.conf.HostUDS); err != nil {
 		return nil, err
 	}
@@ -307,7 +321,7 @@ func newLocalFile(a *attachPoint, file *fd.FD, path string, readable bool, stat
 		hostPath:        path,
 		file:            file,
 		mode:            invalidMode,
-		fileType:        stat.Mode & syscall.S_IFMT,
+		fileType:        stat.Mode & unix.S_IFMT,
 		qid:             a.makeQID(stat),
 		controlReadable: readable,
 	}, nil
@@ -317,7 +331,7 @@ func newLocalFile(a *attachPoint, file *fd.FD, path string, readable bool, stat
 // non-blocking. If anything fails, returns nil. It's better to have a file
 // without host FD, than to fail the operation.
 func newFDMaybe(file *fd.FD) *fd.FD {
-	dupFD, err := syscall.Dup(file.FD())
+	dupFD, err := unix.Dup(file.FD())
 	// Technically, the runtime may call the finalizer on file as soon as
 	// FD() returns.
 	runtime.KeepAlive(file)
@@ -327,31 +341,23 @@ func newFDMaybe(file *fd.FD) *fd.FD {
 	dup := fd.New(dupFD)
 
 	// fd is blocking; non-blocking is required.
-	if err := syscall.SetNonblock(dup.FD(), true); err != nil {
+	if err := unix.SetNonblock(dup.FD(), true); err != nil {
 		_ = dup.Close()
 		return nil
 	}
 	return dup
 }
 
-func fstat(fd int) (syscall.Stat_t, error) {
-	var stat syscall.Stat_t
-	if err := syscall.Fstat(fd, &stat); err != nil {
-		return syscall.Stat_t{}, err
-	}
-	return stat, nil
-}
-
-func stat(path string) (syscall.Stat_t, error) {
-	var stat syscall.Stat_t
-	if err := syscall.Stat(path, &stat); err != nil {
-		return syscall.Stat_t{}, err
+func fstat(fd int) (unix.Stat_t, error) {
+	var stat unix.Stat_t
+	if err := unix.Fstat(fd, &stat); err != nil {
+		return unix.Stat_t{}, err
 	}
 	return stat, nil
 }
 
 func fchown(fd int, uid p9.UID, gid p9.GID) error {
-	return syscall.Fchownat(fd, "", int(uid), int(gid), linux.AT_EMPTY_PATH|unix.AT_SYMLINK_NOFOLLOW)
+	return unix.Fchownat(fd, "", int(uid), int(gid), linux.AT_EMPTY_PATH|unix.AT_SYMLINK_NOFOLLOW)
 }
 
 // Open implements p9.File.
@@ -377,7 +383,7 @@ func (l *localFile) Open(flags p9.OpenFlags) (*fd.FD, p9.QID, uint32, error) {
 		// name_to_handle_at and open_by_handle_at aren't supported by overlay2.
 		log.Debugf("Open reopening file, flags: %v, %q", flags, l.hostPath)
 		var err error
-		osFlags := flags.OSFlags() & (syscall.O_ACCMODE | allowedOpenFlags)
+		osFlags := flags.OSFlags() & (unix.O_ACCMODE | allowedOpenFlags)
 		newFile, err = reopenProcFd(l.file, openFlags|osFlags)
 		if err != nil {
 			return nil, p9.QID{}, 0, extractErrno(err)
@@ -385,7 +391,7 @@ func (l *localFile) Open(flags p9.OpenFlags) (*fd.FD, p9.QID, uint32, error) {
 	}
 
 	var fd *fd.FD
-	if l.fileType == syscall.S_IFREG {
+	if l.fileType == unix.S_IFREG {
 		// Donate FD for regular files only.
 		fd = newFDMaybe(newFile)
 	}
@@ -408,7 +414,7 @@ func (l *localFile) Create(name string, p9Flags p9.OpenFlags, perm p9.FileMode,
 	}
 
 	// Set file creation flags, plus allowed open flags from caller.
-	osFlags := openFlags | syscall.O_CREAT | syscall.O_EXCL
+	osFlags := openFlags | unix.O_CREAT | unix.O_EXCL
 	osFlags |= p9Flags.OSFlags() & allowedOpenFlags
 
 	// 'file' may be used for other operations (e.g. Walk), so read access is
@@ -416,9 +422,9 @@ func (l *localFile) Create(name string, p9Flags p9.OpenFlags, perm p9.FileMode,
 	// than needed for each particular case.
 	mode := p9Flags & p9.OpenFlagsModeMask
 	if mode == p9.WriteOnly {
-		osFlags |= syscall.O_RDWR
+		osFlags |= unix.O_RDWR
 	} else {
-		osFlags |= mode.OSFlags() & unix.O_ACCMODE
+		osFlags |= mode.OSFlags()
 	}
 
 	child, err := fd.OpenAt(l.file, name, osFlags, uint32(perm.Permissions()))
@@ -428,7 +434,7 @@ func (l *localFile) Create(name string, p9Flags p9.OpenFlags, perm p9.FileMode,
 	cu := cleanup.Make(func() {
 		_ = child.Close()
 		// Best effort attempt to remove the file in case of failure.
-		if err := syscall.Unlinkat(l.file.FD(), name); err != nil {
+		if err := unix.Unlinkat(l.file.FD(), name, 0); err != nil {
 			log.Warningf("error unlinking file %q after failure: %v", path.Join(l.hostPath, name), err)
 		}
 	})
@@ -447,7 +453,7 @@ func (l *localFile) Create(name string, p9Flags p9.OpenFlags, perm p9.FileMode,
 		hostPath:    path.Join(l.hostPath, name),
 		file:        child,
 		mode:        mode,
-		fileType:    syscall.S_IFREG,
+		fileType:    unix.S_IFREG,
 		qid:         l.attachPoint.makeQID(stat),
 	}
 
@@ -461,7 +467,7 @@ func (l *localFile) Mkdir(name string, perm p9.FileMode, uid p9.UID, gid p9.GID)
 		return p9.QID{}, err
 	}
 
-	if err := syscall.Mkdirat(l.file.FD(), name, uint32(perm.Permissions())); err != nil {
+	if err := unix.Mkdirat(l.file.FD(), name, uint32(perm.Permissions())); err != nil {
 		return p9.QID{}, extractErrno(err)
 	}
 	cu := cleanup.Make(func() {
@@ -473,7 +479,7 @@ func (l *localFile) Mkdir(name string, perm p9.FileMode, uid p9.UID, gid p9.GID)
 	defer cu.Clean()
 
 	// Open directory to change ownership and stat it.
-	flags := syscall.O_DIRECTORY | syscall.O_RDONLY | openFlags
+	flags := unix.O_DIRECTORY | unix.O_RDONLY | openFlags
 	f, err := fd.OpenAt(l.file, name, flags, 0)
 	if err != nil {
 		return p9.QID{}, extractErrno(err)
@@ -508,20 +514,20 @@ func (l *localFile) WalkGetAttr(names []string) ([]p9.QID, p9.File, p9.AttrMask,
 	return qids, file, mask, attr, nil
 }
 
-func (l *localFile) walk(names []string) ([]p9.QID, p9.File, syscall.Stat_t, error) {
+func (l *localFile) walk(names []string) ([]p9.QID, p9.File, unix.Stat_t, error) {
 	// Duplicate current file if 'names' is empty.
 	if len(names) == 0 {
 		newFile, readable, err := openAnyFile(l.hostPath, func(mode int) (*fd.FD, error) {
 			return reopenProcFd(l.file, openFlags|mode)
 		})
 		if err != nil {
-			return nil, nil, syscall.Stat_t{}, extractErrno(err)
+			return nil, nil, unix.Stat_t{}, extractErrno(err)
 		}
 
 		stat, err := fstat(newFile.FD())
 		if err != nil {
 			_ = newFile.Close()
-			return nil, nil, syscall.Stat_t{}, extractErrno(err)
+			return nil, nil, unix.Stat_t{}, extractErrno(err)
 		}
 
 		c := &localFile{
@@ -537,7 +543,7 @@ func (l *localFile) walk(names []string) ([]p9.QID, p9.File, syscall.Stat_t, err
 	}
 
 	var qids []p9.QID
-	var lastStat syscall.Stat_t
+	var lastStat unix.Stat_t
 	last := l
 	for _, name := range names {
 		f, path, readable, err := openAnyFileFromParent(last, name)
@@ -545,17 +551,17 @@ func (l *localFile) walk(names []string) ([]p9.QID, p9.File, syscall.Stat_t, err
 			_ = last.Close()
 		}
 		if err != nil {
-			return nil, nil, syscall.Stat_t{}, extractErrno(err)
+			return nil, nil, unix.Stat_t{}, extractErrno(err)
 		}
 		lastStat, err = fstat(f.FD())
 		if err != nil {
 			_ = f.Close()
-			return nil, nil, syscall.Stat_t{}, extractErrno(err)
+			return nil, nil, unix.Stat_t{}, extractErrno(err)
 		}
 		c, err := newLocalFile(last.attachPoint, f, path, readable, lastStat)
 		if err != nil {
 			_ = f.Close()
-			return nil, nil, syscall.Stat_t{}, extractErrno(err)
+			return nil, nil, unix.Stat_t{}, extractErrno(err)
 		}
 
 		qids = append(qids, c.qid)
@@ -566,8 +572,8 @@ func (l *localFile) walk(names []string) ([]p9.QID, p9.File, syscall.Stat_t, err
 
 // StatFS implements p9.File.
 func (l *localFile) StatFS() (p9.FSStat, error) {
-	var s syscall.Statfs_t
-	if err := syscall.Fstatfs(l.file.FD(), &s); err != nil {
+	var s unix.Statfs_t
+	if err := unix.Fstatfs(l.file.FD(), &s); err != nil {
 		return p9.FSStat{}, extractErrno(err)
 	}
 
@@ -587,9 +593,9 @@ func (l *localFile) StatFS() (p9.FSStat, error) {
 // FSync implements p9.File.
 func (l *localFile) FSync() error {
 	if !l.isOpen() {
-		return syscall.EBADF
+		return unix.EBADF
 	}
-	if err := syscall.Fsync(l.file.FD()); err != nil {
+	if err := unix.Fsync(l.file.FD()); err != nil {
 		return extractErrno(err)
 	}
 	return nil
@@ -605,7 +611,7 @@ func (l *localFile) GetAttr(_ p9.AttrMask) (p9.QID, p9.AttrMask, p9.Attr, error)
 	return l.qid, mask, attr, nil
 }
 
-func (l *localFile) fillAttr(stat syscall.Stat_t) (p9.AttrMask, p9.Attr) {
+func (l *localFile) fillAttr(stat unix.Stat_t) (p9.AttrMask, p9.Attr) {
 	attr := p9.Attr{
 		Mode:             p9.FileMode(stat.Mode),
 		UID:              p9.UID(stat.Uid),
@@ -665,13 +671,13 @@ func (l *localFile) SetAttr(valid p9.SetAttrMask, attr p9.SetAttr) error {
 	// consistent result that is not attribute dependent.
 	if !valid.IsSubsetOf(allowed) {
 		log.Warningf("SetAttr() failed for %q, mask: %v", l.hostPath, valid)
-		return syscall.EPERM
+		return unix.EPERM
 	}
 
 	// Check if it's possible to use cached file, or if another one needs to be
 	// opened for write.
 	f := l.file
-	if l.fileType == syscall.S_IFREG && l.mode != p9.WriteOnly && l.mode != p9.ReadWrite {
+	if l.fileType == unix.S_IFREG && l.mode != p9.WriteOnly && l.mode != p9.ReadWrite {
 		var err error
 		f, err = reopenProcFd(l.file, openFlags|os.O_WRONLY)
 		if err != nil {
@@ -692,21 +698,21 @@ func (l *localFile) SetAttr(valid p9.SetAttrMask, attr p9.SetAttr) error {
 	// over another.
 	var err error
 	if valid.Permissions {
-		if cerr := syscall.Fchmod(f.FD(), uint32(attr.Permissions)); cerr != nil {
+		if cerr := unix.Fchmod(f.FD(), uint32(attr.Permissions)); cerr != nil {
 			log.Debugf("SetAttr fchmod failed %q, err: %v", l.hostPath, cerr)
 			err = extractErrno(cerr)
 		}
 	}
 
 	if valid.Size {
-		if terr := syscall.Ftruncate(f.FD(), int64(attr.Size)); terr != nil {
+		if terr := unix.Ftruncate(f.FD(), int64(attr.Size)); terr != nil {
 			log.Debugf("SetAttr ftruncate failed %q, err: %v", l.hostPath, terr)
 			err = extractErrno(terr)
 		}
 	}
 
 	if valid.ATime || valid.MTime {
-		utimes := [2]syscall.Timespec{
+		utimes := [2]unix.Timespec{
 			{Sec: 0, Nsec: linux.UTIME_OMIT},
 			{Sec: 0, Nsec: linux.UTIME_OMIT},
 		}
@@ -727,15 +733,15 @@ func (l *localFile) SetAttr(valid p9.SetAttrMask, attr p9.SetAttr) error {
 			}
 		}
 
-		if l.fileType == syscall.S_IFLNK {
+		if l.fileType == unix.S_IFLNK {
 			// utimensat operates different that other syscalls. To operate on a
 			// symlink it *requires* AT_SYMLINK_NOFOLLOW with dirFD and a non-empty
 			// name.
-			parent, err := syscall.Open(path.Dir(l.hostPath), openFlags|unix.O_PATH, 0)
+			parent, err := unix.Open(path.Dir(l.hostPath), openFlags|unix.O_PATH, 0)
 			if err != nil {
 				return extractErrno(err)
 			}
-			defer syscall.Close(parent)
+			defer unix.Close(parent)
 
 			if terr := utimensat(parent, path.Base(l.hostPath), utimes, linux.AT_SYMLINK_NOFOLLOW); terr != nil {
 				log.Debugf("SetAttr utimens failed %q, err: %v", l.hostPath, terr)
@@ -760,7 +766,7 @@ func (l *localFile) SetAttr(valid p9.SetAttrMask, attr p9.SetAttr) error {
 		if valid.GID {
 			gid = int(attr.GID)
 		}
-		if oerr := syscall.Fchownat(f.FD(), "", uid, gid, linux.AT_EMPTY_PATH|linux.AT_SYMLINK_NOFOLLOW); oerr != nil {
+		if oerr := unix.Fchownat(f.FD(), "", uid, gid, linux.AT_EMPTY_PATH|linux.AT_SYMLINK_NOFOLLOW); oerr != nil {
 			log.Debugf("SetAttr fchownat failed %q, err: %v", l.hostPath, oerr)
 			err = extractErrno(oerr)
 		}
@@ -770,28 +776,28 @@ func (l *localFile) SetAttr(valid p9.SetAttrMask, attr p9.SetAttr) error {
 }
 
 func (*localFile) GetXattr(string, uint64) (string, error) {
-	return "", syscall.EOPNOTSUPP
+	return "", unix.EOPNOTSUPP
 }
 
 func (*localFile) SetXattr(string, string, uint32) error {
-	return syscall.EOPNOTSUPP
+	return unix.EOPNOTSUPP
 }
 
 func (*localFile) ListXattr(uint64) (map[string]struct{}, error) {
-	return nil, syscall.EOPNOTSUPP
+	return nil, unix.EOPNOTSUPP
 }
 
 func (*localFile) RemoveXattr(string) error {
-	return syscall.EOPNOTSUPP
+	return unix.EOPNOTSUPP
 }
 
 // Allocate implements p9.File.
 func (l *localFile) Allocate(mode p9.AllocateMode, offset, length uint64) error {
 	if !l.isOpen() {
-		return syscall.EBADF
+		return unix.EBADF
 	}
 
-	if err := syscall.Fallocate(l.file.FD(), mode.ToLinux(), int64(offset), int64(length)); err != nil {
+	if err := unix.Fallocate(l.file.FD(), mode.ToLinux(), int64(offset), int64(length)); err != nil {
 		return extractErrno(err)
 	}
 	return nil
@@ -818,10 +824,10 @@ func (l *localFile) RenameAt(oldName string, directory p9.File, newName string)
 // ReadAt implements p9.File.
 func (l *localFile) ReadAt(p []byte, offset uint64) (int, error) {
 	if l.mode != p9.ReadOnly && l.mode != p9.ReadWrite {
-		return 0, syscall.EBADF
+		return 0, unix.EBADF
 	}
 	if !l.isOpen() {
-		return 0, syscall.EBADF
+		return 0, unix.EBADF
 	}
 
 	r, err := l.file.ReadAt(p, int64(offset))
@@ -836,10 +842,10 @@ func (l *localFile) ReadAt(p []byte, offset uint64) (int, error) {
 // WriteAt implements p9.File.
 func (l *localFile) WriteAt(p []byte, offset uint64) (int, error) {
 	if l.mode != p9.WriteOnly && l.mode != p9.ReadWrite {
-		return 0, syscall.EBADF
+		return 0, unix.EBADF
 	}
 	if !l.isOpen() {
-		return 0, syscall.EBADF
+		return 0, unix.EBADF
 	}
 
 	w, err := l.file.WriteAt(p, int64(offset))
@@ -860,7 +866,7 @@ func (l *localFile) Symlink(target, newName string, uid p9.UID, gid p9.GID) (p9.
 	}
 	cu := cleanup.Make(func() {
 		// Best effort attempt to remove the symlink in case of failure.
-		if err := syscall.Unlinkat(l.file.FD(), newName); err != nil {
+		if err := unix.Unlinkat(l.file.FD(), newName, 0); err != nil {
 			log.Warningf("error unlinking file %q after failure: %v", path.Join(l.hostPath, newName), err)
 		}
 	})
@@ -899,34 +905,46 @@ func (l *localFile) Link(target p9.File, newName string) error {
 }
 
 // Mknod implements p9.File.
-func (l *localFile) Mknod(name string, mode p9.FileMode, _ uint32, _ uint32, _ p9.UID, _ p9.GID) (p9.QID, error) {
+func (l *localFile) Mknod(name string, mode p9.FileMode, _ uint32, _ uint32, uid p9.UID, gid p9.GID) (p9.QID, error) {
 	if err := l.checkROMount(); err != nil {
 		return p9.QID{}, err
 	}
 
-	hostPath := path.Join(l.hostPath, name)
-
-	// Return EEXIST if the file already exists.
-	if _, err := stat(hostPath); err == nil {
-		return p9.QID{}, syscall.EEXIST
-	}
-
 	// From mknod(2) man page:
 	// "EPERM: [...] if the filesystem containing pathname does not support
 	// the type of node requested."
 	if mode.FileType() != p9.ModeRegular {
-		return p9.QID{}, syscall.EPERM
+		return p9.QID{}, unix.EPERM
 	}
 
 	// Allow Mknod to create regular files.
-	if err := syscall.Mknod(hostPath, uint32(mode), 0); err != nil {
+	if err := unix.Mknodat(l.file.FD(), name, uint32(mode), 0); err != nil {
 		return p9.QID{}, err
 	}
+	cu := cleanup.Make(func() {
+		// Best effort attempt to remove the file in case of failure.
+		if err := unix.Unlinkat(l.file.FD(), name, 0); err != nil {
+			log.Warningf("error unlinking file %q after failure: %v", path.Join(l.hostPath, name), err)
+		}
+	})
+	defer cu.Clean()
 
-	stat, err := stat(hostPath)
+	// Open file to change ownership and stat it.
+	child, err := fd.OpenAt(l.file, name, unix.O_PATH|openFlags, 0)
 	if err != nil {
 		return p9.QID{}, extractErrno(err)
 	}
+	defer child.Close()
+
+	if err := fchown(child.FD(), uid, gid); err != nil {
+		return p9.QID{}, extractErrno(err)
+	}
+	stat, err := fstat(child.FD())
+	if err != nil {
+		return p9.QID{}, extractErrno(err)
+	}
+
+	cu.Release()
 	return l.attachPoint.makeQID(stat), nil
 }
 
@@ -945,10 +963,10 @@ func (l *localFile) UnlinkAt(name string, flags uint32) error {
 // Readdir implements p9.File.
 func (l *localFile) Readdir(offset uint64, count uint32) ([]p9.Dirent, error) {
 	if l.mode != p9.ReadOnly && l.mode != p9.ReadWrite {
-		return nil, syscall.EBADF
+		return nil, unix.EBADF
 	}
 	if !l.isOpen() {
-		return nil, syscall.EBADF
+		return nil, unix.EBADF
 	}
 
 	// Readdirnames is a cursor over directories, so seek back to 0 to ensure it's
@@ -965,7 +983,7 @@ func (l *localFile) Readdir(offset uint64, count uint32) ([]p9.Dirent, error) {
 	// which causes the directory stream to resynchronize with the directory's
 	// current contents).
 	if l.lastDirentOffset != offset || offset == 0 {
-		if _, err := syscall.Seek(l.file.FD(), 0, 0); err != nil {
+		if _, err := unix.Seek(l.file.FD(), 0, 0); err != nil {
 			return nil, extractErrno(err)
 		}
 		skip = offset
@@ -998,7 +1016,7 @@ func (l *localFile) readDirent(f int, offset uint64, count uint32, skip uint64)
 
 	end := offset + uint64(count)
 	for offset < end {
-		dirSize, err := syscall.ReadDirent(f, direntsBuf)
+		dirSize, err := unix.ReadDirent(f, direntsBuf)
 		if err != nil {
 			return dirents, err
 		}
@@ -1007,7 +1025,7 @@ func (l *localFile) readDirent(f int, offset uint64, count uint32, skip uint64)
 		}
 
 		names := names[:0]
-		_, _, names = syscall.ParseDirent(direntsBuf[:dirSize], -1, names)
+		_, _, names = unix.ParseDirent(direntsBuf[:dirSize], -1, names)
 
 		// Skip over entries that the caller is not interested in.
 		if skip > 0 {
@@ -1052,7 +1070,7 @@ func (l *localFile) Readlink() (string, error) {
 			return string(b[:n]), nil
 		}
 	}
-	return "", syscall.ENOMEM
+	return "", unix.ENOMEM
 }
 
 // Flush implements p9.File.
@@ -1063,7 +1081,7 @@ func (l *localFile) Flush() error {
 // Connect implements p9.File.
 func (l *localFile) Connect(flags p9.ConnectFlags) (*fd.FD, error) {
 	if !l.attachPoint.conf.HostUDS {
-		return nil, syscall.ECONNREFUSED
+		return nil, unix.ECONNREFUSED
 	}
 
 	// TODO(gvisor.dev/issue/1003): Due to different app vs replacement
@@ -1071,34 +1089,34 @@ func (l *localFile) Connect(flags p9.ConnectFlags) (*fd.FD, error) {
 	// fit f.path in our sockaddr. We'd need to redirect through a shorter
 	// path in order to actually connect to this socket.
 	if len(l.hostPath) > linux.UnixPathMax {
-		return nil, syscall.ECONNREFUSED
+		return nil, unix.ECONNREFUSED
 	}
 
 	var stype int
 	switch flags {
 	case p9.StreamSocket:
-		stype = syscall.SOCK_STREAM
+		stype = unix.SOCK_STREAM
 	case p9.DgramSocket:
-		stype = syscall.SOCK_DGRAM
+		stype = unix.SOCK_DGRAM
 	case p9.SeqpacketSocket:
-		stype = syscall.SOCK_SEQPACKET
+		stype = unix.SOCK_SEQPACKET
 	default:
-		return nil, syscall.ENXIO
+		return nil, unix.ENXIO
 	}
 
-	f, err := syscall.Socket(syscall.AF_UNIX, stype, 0)
+	f, err := unix.Socket(unix.AF_UNIX, stype, 0)
 	if err != nil {
 		return nil, err
 	}
 
-	if err := syscall.SetNonblock(f, true); err != nil {
-		_ = syscall.Close(f)
+	if err := unix.SetNonblock(f, true); err != nil {
+		_ = unix.Close(f)
 		return nil, err
 	}
 
-	sa := syscall.SockaddrUnix{Name: l.hostPath}
-	if err := syscall.Connect(f, &sa); err != nil {
-		_ = syscall.Close(f)
+	sa := unix.SockaddrUnix{Name: l.hostPath}
+	if err := unix.Connect(f, &sa); err != nil {
+		_ = unix.Close(f)
 		return nil, err
 	}
 
@@ -1123,7 +1141,7 @@ func (l *localFile) Renamed(newDir p9.File, newName string) {
 }
 
 // extractErrno tries to determine the errno.
-func extractErrno(err error) syscall.Errno {
+func extractErrno(err error) unix.Errno {
 	if err == nil {
 		// This should never happen. The likely result will be that
 		// some user gets the frustrating "error: SUCCESS" message.
@@ -1133,18 +1151,18 @@ func extractErrno(err error) syscall.Errno {
 
 	switch err {
 	case os.ErrNotExist:
-		return syscall.ENOENT
+		return unix.ENOENT
 	case os.ErrExist:
-		return syscall.EEXIST
+		return unix.EEXIST
 	case os.ErrPermission:
-		return syscall.EACCES
+		return unix.EACCES
 	case os.ErrInvalid:
-		return syscall.EINVAL
+		return unix.EINVAL
 	}
 
 	// See if it's an errno or a common wrapped error.
 	switch e := err.(type) {
-	case syscall.Errno:
+	case unix.Errno:
 		return e
 	case *os.PathError:
 		return extractErrno(e.Err)
@@ -1156,7 +1174,7 @@ func extractErrno(err error) syscall.Errno {
 
 	// Fall back to EIO.
 	log.Debugf("Unknown error: %v, defaulting to EIO", err)
-	return syscall.EIO
+	return unix.EIO
 }
 
 func (l *localFile) checkROMount() error {
@@ -1164,7 +1182,7 @@ func (l *localFile) checkROMount() error {
 		if conf.PanicOnWrite {
 			panic("attempt to write to RO mount")
 		}
-		return syscall.EROFS
+		return unix.EROFS
 	}
 	return nil
 }
diff --git a/runsc/fsgofer/fsgofer_amd64_unsafe.go b/runsc/fsgofer/fsgofer_amd64_unsafe.go
index 5d4aab5977..c469581859 100644
--- a/runsc/fsgofer/fsgofer_amd64_unsafe.go
+++ b/runsc/fsgofer/fsgofer_amd64_unsafe.go
@@ -17,25 +17,25 @@
 package fsgofer
 
 import (
-	"syscall"
 	"unsafe"
 
+	"golang.org/x/sys/unix"
 	"gvisor.dev/gvisor/pkg/abi/linux"
 	"gvisor.dev/gvisor/pkg/syserr"
 )
 
-func statAt(dirFd int, name string) (syscall.Stat_t, error) {
-	nameBytes, err := syscall.BytePtrFromString(name)
+func statAt(dirFd int, name string) (unix.Stat_t, error) {
+	nameBytes, err := unix.BytePtrFromString(name)
 	if err != nil {
-		return syscall.Stat_t{}, err
+		return unix.Stat_t{}, err
 	}
 	namePtr := unsafe.Pointer(nameBytes)
 
-	var stat syscall.Stat_t
+	var stat unix.Stat_t
 	statPtr := unsafe.Pointer(&stat)
 
-	if _, _, errno := syscall.Syscall6(
-		syscall.SYS_NEWFSTATAT,
+	if _, _, errno := unix.Syscall6(
+		unix.SYS_NEWFSTATAT,
 		uintptr(dirFd),
 		uintptr(namePtr),
 		uintptr(statPtr),
@@ -43,7 +43,7 @@ func statAt(dirFd int, name string) (syscall.Stat_t, error) {
 		0,
 		0); errno != 0 {
 
-		return syscall.Stat_t{}, syserr.FromHost(errno).ToError()
+		return unix.Stat_t{}, syserr.FromHost(errno).ToError()
 	}
 	return stat, nil
 }
diff --git a/runsc/fsgofer/fsgofer_arm64_unsafe.go b/runsc/fsgofer/fsgofer_arm64_unsafe.go
index 8041fd3522..4914607187 100644
--- a/runsc/fsgofer/fsgofer_arm64_unsafe.go
+++ b/runsc/fsgofer/fsgofer_arm64_unsafe.go
@@ -17,25 +17,25 @@
 package fsgofer
 
 import (
-	"syscall"
 	"unsafe"
 
+	"golang.org/x/sys/unix"
 	"gvisor.dev/gvisor/pkg/abi/linux"
 	"gvisor.dev/gvisor/pkg/syserr"
 )
 
-func statAt(dirFd int, name string) (syscall.Stat_t, error) {
-	nameBytes, err := syscall.BytePtrFromString(name)
+func statAt(dirFd int, name string) (unix.Stat_t, error) {
+	nameBytes, err := unix.BytePtrFromString(name)
 	if err != nil {
-		return syscall.Stat_t{}, err
+		return unix.Stat_t{}, err
 	}
 	namePtr := unsafe.Pointer(nameBytes)
 
-	var stat syscall.Stat_t
+	var stat unix.Stat_t
 	statPtr := unsafe.Pointer(&stat)
 
-	if _, _, errno := syscall.Syscall6(
-		syscall.SYS_FSTATAT,
+	if _, _, errno := unix.Syscall6(
+		unix.SYS_FSTATAT,
 		uintptr(dirFd),
 		uintptr(namePtr),
 		uintptr(statPtr),
@@ -43,7 +43,7 @@ func statAt(dirFd int, name string) (syscall.Stat_t, error) {
 		0,
 		0); errno != 0 {
 
-		return syscall.Stat_t{}, syserr.FromHost(errno).ToError()
+		return unix.Stat_t{}, syserr.FromHost(errno).ToError()
 	}
 	return stat, nil
 }
diff --git a/runsc/fsgofer/fsgofer_test.go b/runsc/fsgofer/fsgofer_test.go
index 8ed703584a..c91cfd094e 100644
--- a/runsc/fsgofer/fsgofer_test.go
+++ b/runsc/fsgofer/fsgofer_test.go
@@ -21,9 +21,9 @@ import (
 	"os"
 	"path"
 	"path/filepath"
-	"syscall"
 	"testing"
 
+	"golang.org/x/sys/unix"
 	"gvisor.dev/gvisor/pkg/log"
 	"gvisor.dev/gvisor/pkg/p9"
 	"gvisor.dev/gvisor/pkg/test/testutil"
@@ -32,7 +32,7 @@ import (
 var allOpenFlags = []p9.OpenFlags{p9.ReadOnly, p9.WriteOnly, p9.ReadWrite}
 
 var (
-	allTypes = []uint32{syscall.S_IFREG, syscall.S_IFDIR, syscall.S_IFLNK}
+	allTypes = []uint32{unix.S_IFREG, unix.S_IFDIR, unix.S_IFLNK}
 
 	// allConfs is set in init().
 	allConfs []Config
@@ -83,7 +83,7 @@ func testReadWrite(f p9.File, flags p9.OpenFlags, content []byte) error {
 		}
 		want = append(want, b...)
 	} else {
-		if e, ok := err.(syscall.Errno); !ok || e != syscall.EBADF {
+		if e, ok := err.(unix.Errno); !ok || e != unix.EBADF {
 			return fmt.Errorf("WriteAt() should have failed, got: %d, want: EBADFD", err)
 		}
 	}
@@ -101,7 +101,7 @@ func testReadWrite(f p9.File, flags p9.OpenFlags, content []byte) error {
 			return fmt.Errorf("ReadAt() wrong data, got: %s, want: %s", string(rBuf), want)
 		}
 	} else {
-		if e, ok := err.(syscall.Errno); !ok || e != syscall.EBADF {
+		if e, ok := err.(unix.Errno); !ok || e != unix.EBADF {
 			return fmt.Errorf("ReadAt() should have failed, got: %d, want: EBADFD", err)
 		}
 	}
@@ -121,11 +121,11 @@ func (s state) String() string {
 
 func typeName(fileType uint32) string {
 	switch fileType {
-	case syscall.S_IFREG:
+	case unix.S_IFREG:
 		return "file"
-	case syscall.S_IFDIR:
+	case unix.S_IFDIR:
 		return "directory"
-	case syscall.S_IFLNK:
+	case unix.S_IFLNK:
 		return "symlink"
 	default:
 		panic(fmt.Sprintf("invalid file type for test: %d", fileType))
@@ -195,19 +195,19 @@ func setup(fileType uint32) (string, string, error) {
 
 	var name string
 	switch fileType {
-	case syscall.S_IFREG:
+	case unix.S_IFREG:
 		name = "file"
 		_, f, _, _, err := root.Create(name, p9.ReadWrite, 0777, p9.UID(os.Getuid()), p9.GID(os.Getgid()))
 		if err != nil {
 			return "", "", fmt.Errorf("createFile(root, %q) failed, err: %v", "test", err)
 		}
 		defer f.Close()
-	case syscall.S_IFDIR:
+	case unix.S_IFDIR:
 		name = "dir"
 		if _, err := root.Mkdir(name, 0777, p9.UID(os.Getuid()), p9.GID(os.Getgid())); err != nil {
 			return "", "", fmt.Errorf("root.MkDir(%q) failed, err: %v", name, err)
 		}
-	case syscall.S_IFLNK:
+	case unix.S_IFLNK:
 		name = "symlink"
 		if _, err := root.Symlink("/some/target", name, p9.UID(os.Getuid()), p9.GID(os.Getgid())); err != nil {
 			return "", "", fmt.Errorf("root.Symlink(%q) failed, err: %v", name, err)
@@ -227,7 +227,7 @@ func createFile(dir *localFile, name string) (*localFile, error) {
 }
 
 func TestReadWrite(t *testing.T) {
-	runCustom(t, []uint32{syscall.S_IFDIR}, rwConfs, func(t *testing.T, s state) {
+	runCustom(t, []uint32{unix.S_IFDIR}, rwConfs, func(t *testing.T, s state) {
 		child, err := createFile(s.file, "test")
 		if err != nil {
 			t.Fatalf("%v: createFile() failed, err: %v", s, err)
@@ -246,9 +246,13 @@ func TestReadWrite(t *testing.T) {
 			if err != nil {
 				t.Fatalf("%v: Walk(%s) failed, err: %v", s, "test", err)
 			}
-			if _, _, _, err := l.Open(flags); err != nil {
+			fd, _, _, err := l.Open(flags)
+			if err != nil {
 				t.Fatalf("%v: Open(%v) failed, err: %v", s, flags, err)
 			}
+			if fd != nil {
+				defer fd.Close()
+			}
 			if err := testReadWrite(l, flags, want); err != nil {
 				t.Fatalf("%v: testReadWrite(%v) failed: %v", s, flags, err)
 			}
@@ -257,14 +261,14 @@ func TestReadWrite(t *testing.T) {
 }
 
 func TestCreate(t *testing.T) {
-	runCustom(t, []uint32{syscall.S_IFDIR}, rwConfs, func(t *testing.T, s state) {
+	runCustom(t, []uint32{unix.S_IFDIR}, rwConfs, func(t *testing.T, s state) {
 		for i, flags := range allOpenFlags {
 			_, l, _, _, err := s.file.Create(fmt.Sprintf("test-%d", i), flags, 0777, p9.UID(os.Getuid()), p9.GID(os.Getgid()))
 			if err != nil {
 				t.Fatalf("%v, %v: WriteAt() failed, err: %v", s, flags, err)
 			}
 
-			if err := testReadWrite(l, flags, []byte{}); err != nil {
+			if err := testReadWrite(l, flags, nil); err != nil {
 				t.Fatalf("%v: testReadWrite(%v) failed: %v", s, flags, err)
 			}
 		}
@@ -274,7 +278,7 @@ func TestCreate(t *testing.T) {
 // TestReadWriteDup tests that a file opened in any mode can be dup'ed and
 // reopened in any other mode.
 func TestReadWriteDup(t *testing.T) {
-	runCustom(t, []uint32{syscall.S_IFDIR}, rwConfs, func(t *testing.T, s state) {
+	runCustom(t, []uint32{unix.S_IFDIR}, rwConfs, func(t *testing.T, s state) {
 		child, err := createFile(s.file, "test")
 		if err != nil {
 			t.Fatalf("%v: createFile() failed, err: %v", s, err)
@@ -304,9 +308,13 @@ func TestReadWriteDup(t *testing.T) {
 					t.Fatalf("%v: Walk(<empty>) failed: %v", s, err)
 				}
 				defer dup.Close()
-				if _, _, _, err := dup.Open(dupFlags); err != nil {
+				fd, _, _, err := dup.Open(dupFlags)
+				if err != nil {
 					t.Fatalf("%v: Open(%v) failed: %v", s, flags, err)
 				}
+				if fd != nil {
+					defer fd.Close()
+				}
 				if err := testReadWrite(dup, dupFlags, want); err != nil {
 					t.Fatalf("%v: testReadWrite(%v) failed: %v", s, dupFlags, err)
 				}
@@ -316,19 +324,19 @@ func TestReadWriteDup(t *testing.T) {
 }
 
 func TestUnopened(t *testing.T) {
-	runCustom(t, []uint32{syscall.S_IFREG}, allConfs, func(t *testing.T, s state) {
+	runCustom(t, []uint32{unix.S_IFREG}, allConfs, func(t *testing.T, s state) {
 		b := []byte("foobar")
-		if _, err := s.file.WriteAt(b, 0); err != syscall.EBADF {
-			t.Errorf("%v: WriteAt() should have failed, got: %v, expected: syscall.EBADF", s, err)
+		if _, err := s.file.WriteAt(b, 0); err != unix.EBADF {
+			t.Errorf("%v: WriteAt() should have failed, got: %v, expected: unix.EBADF", s, err)
 		}
-		if _, err := s.file.ReadAt(b, 0); err != syscall.EBADF {
-			t.Errorf("%v: ReadAt() should have failed, got: %v, expected: syscall.EBADF", s, err)
+		if _, err := s.file.ReadAt(b, 0); err != unix.EBADF {
+			t.Errorf("%v: ReadAt() should have failed, got: %v, expected: unix.EBADF", s, err)
 		}
-		if _, err := s.file.Readdir(0, 100); err != syscall.EBADF {
-			t.Errorf("%v: Readdir() should have failed, got: %v, expected: syscall.EBADF", s, err)
+		if _, err := s.file.Readdir(0, 100); err != unix.EBADF {
+			t.Errorf("%v: Readdir() should have failed, got: %v, expected: unix.EBADF", s, err)
 		}
-		if err := s.file.FSync(); err != syscall.EBADF {
-			t.Errorf("%v: FSync() should have failed, got: %v, expected: syscall.EBADF", s, err)
+		if err := s.file.FSync(); err != unix.EBADF {
+			t.Errorf("%v: FSync() should have failed, got: %v, expected: unix.EBADF", s, err)
 		}
 	})
 }
@@ -338,7 +346,7 @@ func TestUnopened(t *testing.T) {
 // was open with O_PATH, but Open() was not checking for it and allowing the
 // control file to be reused.
 func TestOpenOPath(t *testing.T) {
-	runCustom(t, []uint32{syscall.S_IFREG}, rwConfs, func(t *testing.T, s state) {
+	runCustom(t, []uint32{unix.S_IFREG}, rwConfs, func(t *testing.T, s state) {
 		// Fist remove all permissions on the file.
 		if err := s.file.SetAttr(p9.SetAttrMask{Permissions: true}, p9.SetAttr{Permissions: p9.FileMode(0)}); err != nil {
 			t.Fatalf("SetAttr(): %v", err)
@@ -353,7 +361,7 @@ func TestOpenOPath(t *testing.T) {
 		if newFile.(*localFile).controlReadable {
 			t.Fatalf("control file didn't open with O_PATH: %+v", newFile)
 		}
-		if _, _, _, err := newFile.Open(p9.ReadOnly); err != syscall.EACCES {
+		if _, _, _, err := newFile.Open(p9.ReadOnly); err != unix.EACCES {
 			t.Fatalf("Open() should have failed, got: %v, wanted: EACCES", err)
 		}
 	})
@@ -375,7 +383,7 @@ func TestSetAttrPerm(t *testing.T) {
 		valid := p9.SetAttrMask{Permissions: true}
 		attr := p9.SetAttr{Permissions: 0777}
 		got, err := SetGetAttr(s.file, valid, attr)
-		if s.fileType == syscall.S_IFLNK {
+		if s.fileType == unix.S_IFLNK {
 			if err == nil {
 				t.Fatalf("%v: SetGetAttr(valid, %v) should have failed", s, attr.Permissions)
 			}
@@ -396,7 +404,7 @@ func TestSetAttrSize(t *testing.T) {
 			valid := p9.SetAttrMask{Size: true}
 			attr := p9.SetAttr{Size: size}
 			got, err := SetGetAttr(s.file, valid, attr)
-			if s.fileType == syscall.S_IFLNK || s.fileType == syscall.S_IFDIR {
+			if s.fileType == unix.S_IFLNK || s.fileType == unix.S_IFDIR {
 				if err == nil {
 					t.Fatalf("%v: SetGetAttr(valid, %v) should have failed", s, attr.Permissions)
 				}
@@ -478,9 +486,9 @@ func TestLink(t *testing.T) {
 		}
 
 		err = dir.Link(s.file, linkFile)
-		if s.fileType == syscall.S_IFDIR {
-			if err != syscall.EPERM {
-				t.Errorf("%v: Link(target, %s) should have failed, got: %v, expected: syscall.EPERM", s, linkFile, err)
+		if s.fileType == unix.S_IFDIR {
+			if err != unix.EPERM {
+				t.Errorf("%v: Link(target, %s) should have failed, got: %v, expected: unix.EPERM", s, linkFile, err)
 			}
 			return
 		}
@@ -491,9 +499,12 @@ func TestLink(t *testing.T) {
 }
 
 func TestROMountChecks(t *testing.T) {
-	const want = syscall.EROFS
+	const want = unix.EROFS
+	uid := p9.UID(os.Getuid())
+	gid := p9.GID(os.Getgid())
+
 	runCustom(t, allTypes, roConfs, func(t *testing.T, s state) {
-		if s.fileType != syscall.S_IFLNK {
+		if s.fileType != unix.S_IFLNK {
 			if _, _, _, err := s.file.Open(p9.WriteOnly); err != want {
 				t.Errorf("Open() should have failed, got: %v, expected: %v", err, want)
 			}
@@ -512,16 +523,16 @@ func TestROMountChecks(t *testing.T) {
 			}
 		}
 
-		if _, _, _, _, err := s.file.Create("some_file", p9.ReadWrite, 0777, p9.UID(os.Getuid()), p9.GID(os.Getgid())); err != want {
+		if _, _, _, _, err := s.file.Create("some_file", p9.ReadWrite, 0777, uid, gid); err != want {
 			t.Errorf("Create() should have failed, got: %v, expected: %v", err, want)
 		}
-		if _, err := s.file.Mkdir("some_dir", 0777, p9.UID(os.Getuid()), p9.GID(os.Getgid())); err != want {
+		if _, err := s.file.Mkdir("some_dir", 0777, uid, gid); err != want {
 			t.Errorf("MkDir() should have failed, got: %v, expected: %v", err, want)
 		}
 		if err := s.file.RenameAt("some_file", s.file, "other_file"); err != want {
 			t.Errorf("Rename() should have failed, got: %v, expected: %v", err, want)
 		}
-		if _, err := s.file.Symlink("some_place", "some_symlink", p9.UID(os.Getuid()), p9.GID(os.Getgid())); err != want {
+		if _, err := s.file.Symlink("some_place", "some_symlink", uid, gid); err != want {
 			t.Errorf("Symlink() should have failed, got: %v, expected: %v", err, want)
 		}
 		if err := s.file.UnlinkAt("some_file", 0); err != want {
@@ -530,6 +541,9 @@ func TestROMountChecks(t *testing.T) {
 		if err := s.file.Link(s.file, "some_link"); err != want {
 			t.Errorf("Link() should have failed, got: %v, expected: %v", err, want)
 		}
+		if _, err := s.file.Mknod("some-nod", 0777, 1, 2, uid, gid); err != want {
+			t.Errorf("Mknod() should have failed, got: %v, expected: %v", err, want)
+		}
 
 		valid := p9.SetAttrMask{Size: true}
 		attr := p9.SetAttr{Size: 0}
@@ -541,16 +555,20 @@ func TestROMountChecks(t *testing.T) {
 
 func TestROMountPanics(t *testing.T) {
 	conf := Config{ROMount: true, PanicOnWrite: true}
+	uid := p9.UID(os.Getuid())
+	gid := p9.GID(os.Getgid())
+
 	runCustom(t, allTypes, []Config{conf}, func(t *testing.T, s state) {
-		if s.fileType != syscall.S_IFLNK {
+		if s.fileType != unix.S_IFLNK {
 			assertPanic(t, func() { s.file.Open(p9.WriteOnly) })
 		}
-		assertPanic(t, func() { s.file.Create("some_file", p9.ReadWrite, 0777, p9.UID(os.Getuid()), p9.GID(os.Getgid())) })
-		assertPanic(t, func() { s.file.Mkdir("some_dir", 0777, p9.UID(os.Getuid()), p9.GID(os.Getgid())) })
+		assertPanic(t, func() { s.file.Create("some_file", p9.ReadWrite, 0777, uid, gid) })
+		assertPanic(t, func() { s.file.Mkdir("some_dir", 0777, uid, gid) })
 		assertPanic(t, func() { s.file.RenameAt("some_file", s.file, "other_file") })
-		assertPanic(t, func() { s.file.Symlink("some_place", "some_symlink", p9.UID(os.Getuid()), p9.GID(os.Getgid())) })
+		assertPanic(t, func() { s.file.Symlink("some_place", "some_symlink", uid, gid) })
 		assertPanic(t, func() { s.file.UnlinkAt("some_file", 0) })
 		assertPanic(t, func() { s.file.Link(s.file, "some_link") })
+		assertPanic(t, func() { s.file.Mknod("some-nod", 0777, 1, 2, uid, gid) })
 
 		valid := p9.SetAttrMask{Size: true}
 		attr := p9.SetAttr{Size: 0}
@@ -559,9 +577,9 @@ func TestROMountPanics(t *testing.T) {
 }
 
 func TestWalkNotFound(t *testing.T) {
-	runCustom(t, []uint32{syscall.S_IFDIR}, allConfs, func(t *testing.T, s state) {
-		if _, _, err := s.file.Walk([]string{"nobody-here"}); err != syscall.ENOENT {
-			t.Errorf("%v: Walk(%q) should have failed, got: %v, expected: syscall.ENOENT", s, "nobody-here", err)
+	runCustom(t, []uint32{unix.S_IFDIR}, allConfs, func(t *testing.T, s state) {
+		if _, _, err := s.file.Walk([]string{"nobody-here"}); err != unix.ENOENT {
+			t.Errorf("%v: Walk(%q) should have failed, got: %v, expected: unix.ENOENT", s, "nobody-here", err)
 		}
 	})
 }
@@ -580,7 +598,7 @@ func TestWalkDup(t *testing.T) {
 }
 
 func TestReaddir(t *testing.T) {
-	runCustom(t, []uint32{syscall.S_IFDIR}, rwConfs, func(t *testing.T, s state) {
+	runCustom(t, []uint32{unix.S_IFDIR}, rwConfs, func(t *testing.T, s state) {
 		name := "dir"
 		if _, err := s.file.Mkdir(name, 0777, p9.UID(os.Getuid()), p9.GID(os.Getgid())); err != nil {
 			t.Fatalf("%v: MkDir(%s) failed, err: %v", s, name, err)
@@ -705,7 +723,7 @@ func TestAttachInvalidType(t *testing.T) {
 	defer os.RemoveAll(dir)
 
 	fifo := filepath.Join(dir, "fifo")
-	if err := syscall.Mkfifo(fifo, 0755); err != nil {
+	if err := unix.Mkfifo(fifo, 0755); err != nil {
 		t.Fatalf("Mkfifo(%q): %v", fifo, err)
 	}
 
@@ -766,16 +784,16 @@ func TestDoubleAttachError(t *testing.T) {
 }
 
 func TestTruncate(t *testing.T) {
-	runCustom(t, []uint32{syscall.S_IFDIR}, rwConfs, func(t *testing.T, s state) {
+	runCustom(t, []uint32{unix.S_IFDIR}, rwConfs, func(t *testing.T, s state) {
 		child, err := createFile(s.file, "test")
 		if err != nil {
-			t.Fatalf("createFile() failed, err: %v", err)
+			t.Fatalf("createFile() failed: %v", err)
 		}
 		defer child.Close()
 		want := []byte("foobar")
 		w, err := child.WriteAt(want, 0)
 		if err != nil {
-			t.Fatalf("Write() failed, err: %v", err)
+			t.Fatalf("Write() failed: %v", err)
 		}
 		if w != len(want) {
 			t.Fatalf("Write() was partial, got: %d, expected: %d", w, len(want))
@@ -783,12 +801,15 @@ func TestTruncate(t *testing.T) {
 
 		_, l, err := s.file.Walk([]string{"test"})
 		if err != nil {
-			t.Fatalf("Walk(%s) failed, err: %v", "test", err)
+			t.Fatalf("Walk(%s) failed: %v", "test", err)
 		}
 		if _, _, _, err := l.Open(p9.ReadOnly | p9.OpenTruncate); err != nil {
-			t.Fatalf("Open() failed, err: %v", err)
+			t.Fatalf("Open() failed: %v", err)
 		}
 		_, mask, attr, err := l.GetAttr(p9.AttrMask{Size: true})
+		if err != nil {
+			t.Fatalf("GetAttr() failed: %v", err)
+		}
 		if !mask.Size {
 			t.Fatalf("GetAttr() didn't return size: %+v", mask)
 		}
@@ -797,3 +818,27 @@ func TestTruncate(t *testing.T) {
 		}
 	})
 }
+
+func TestMknod(t *testing.T) {
+	runCustom(t, []uint32{unix.S_IFDIR}, rwConfs, func(t *testing.T, s state) {
+		_, err := s.file.Mknod("test", p9.ModeRegular|0777, 1, 2, p9.UID(os.Getuid()), p9.GID(os.Getgid()))
+		if err != nil {
+			t.Fatalf("Mknod() failed: %v", err)
+		}
+
+		_, f, err := s.file.Walk([]string{"test"})
+		if err != nil {
+			t.Fatalf("Walk() failed: %v", err)
+		}
+		fd, _, _, err := f.Open(p9.ReadWrite)
+		if err != nil {
+			t.Fatalf("Open() failed: %v", err)
+		}
+		if fd != nil {
+			defer fd.Close()
+		}
+		if err := testReadWrite(f, p9.ReadWrite, nil); err != nil {
+			t.Fatalf("testReadWrite() failed: %v", err)
+		}
+	})
+}
diff --git a/runsc/fsgofer/fsgofer_unsafe.go b/runsc/fsgofer/fsgofer_unsafe.go
index 542b54365c..f11fea40d9 100644
--- a/runsc/fsgofer/fsgofer_unsafe.go
+++ b/runsc/fsgofer/fsgofer_unsafe.go
@@ -15,18 +15,18 @@
 package fsgofer
 
 import (
-	"syscall"
 	"unsafe"
 
+	"golang.org/x/sys/unix"
 	"gvisor.dev/gvisor/pkg/syserr"
 )
 
-func utimensat(dirFd int, name string, times [2]syscall.Timespec, flags int) error {
+func utimensat(dirFd int, name string, times [2]unix.Timespec, flags int) error {
 	// utimensat(2) doesn't accept empty name, instead name must be nil to make it
 	// operate directly on 'dirFd' unlike other *at syscalls.
 	var namePtr unsafe.Pointer
 	if name != "" {
-		nameBytes, err := syscall.BytePtrFromString(name)
+		nameBytes, err := unix.BytePtrFromString(name)
 		if err != nil {
 			return err
 		}
@@ -35,8 +35,8 @@ func utimensat(dirFd int, name string, times [2]syscall.Timespec, flags int) err
 
 	timesPtr := unsafe.Pointer(&times[0])
 
-	if _, _, errno := syscall.Syscall6(
-		syscall.SYS_UTIMENSAT,
+	if _, _, errno := unix.Syscall6(
+		unix.SYS_UTIMENSAT,
 		uintptr(dirFd),
 		uintptr(namePtr),
 		uintptr(timesPtr),
@@ -52,7 +52,7 @@ func utimensat(dirFd int, name string, times [2]syscall.Timespec, flags int) err
 func renameat(oldDirFD int, oldName string, newDirFD int, newName string) error {
 	var oldNamePtr unsafe.Pointer
 	if oldName != "" {
-		nameBytes, err := syscall.BytePtrFromString(oldName)
+		nameBytes, err := unix.BytePtrFromString(oldName)
 		if err != nil {
 			return err
 		}
@@ -60,15 +60,15 @@ func renameat(oldDirFD int, oldName string, newDirFD int, newName string) error
 	}
 	var newNamePtr unsafe.Pointer
 	if newName != "" {
-		nameBytes, err := syscall.BytePtrFromString(newName)
+		nameBytes, err := unix.BytePtrFromString(newName)
 		if err != nil {
 			return err
 		}
 		newNamePtr = unsafe.Pointer(nameBytes)
 	}
 
-	if _, _, errno := syscall.Syscall6(
-		syscall.SYS_RENAMEAT,
+	if _, _, errno := unix.Syscall6(
+		unix.SYS_RENAMEAT,
 		uintptr(oldDirFD),
 		uintptr(oldNamePtr),
 		uintptr(newDirFD),

From 8a725d8a66ef1c38b256c52c1865e5000cc8ca36 Mon Sep 17 00:00:00 2001
From: Fabricio Voznika <fvoznika@google.com>
Date: Wed, 19 Aug 2020 18:35:35 -0700
Subject: [PATCH 036/211] Move boot.Config to its own package

Updates #3494

PiperOrigin-RevId: 327548511
---
 pkg/test/testutil/BUILD                    |  2 +-
 pkg/test/testutil/testutil.go              | 12 ++--
 runsc/BUILD                                |  4 +-
 runsc/boot/BUILD                           |  3 +-
 runsc/boot/controller.go                   |  3 +-
 runsc/boot/fs.go                           | 41 +++++------
 runsc/boot/fs_test.go                      | 11 +--
 runsc/boot/loader.go                       | 19 ++---
 runsc/boot/loader_test.go                  |  7 +-
 runsc/boot/network.go                      | 45 ++----------
 runsc/boot/strace.go                       |  3 +-
 runsc/boot/vfs.go                          | 21 +++---
 runsc/cmd/BUILD                            |  3 +-
 runsc/cmd/boot.go                          |  3 +-
 runsc/cmd/capability_test.go               |  4 +-
 runsc/cmd/checkpoint.go                    |  4 +-
 runsc/cmd/create.go                        |  4 +-
 runsc/cmd/debug.go                         |  4 +-
 runsc/cmd/delete.go                        |  6 +-
 runsc/cmd/delete_test.go                   |  4 +-
 runsc/cmd/do.go                            | 10 +--
 runsc/cmd/events.go                        |  4 +-
 runsc/cmd/exec.go                          |  4 +-
 runsc/cmd/gofer.go                         | 12 ++--
 runsc/cmd/kill.go                          |  4 +-
 runsc/cmd/list.go                          |  4 +-
 runsc/cmd/pause.go                         |  4 +-
 runsc/cmd/ps.go                            |  4 +-
 runsc/cmd/restore.go                       |  4 +-
 runsc/cmd/resume.go                        |  4 +-
 runsc/cmd/run.go                           |  4 +-
 runsc/cmd/start.go                         |  4 +-
 runsc/cmd/state.go                         |  4 +-
 runsc/cmd/wait.go                          |  4 +-
 runsc/config/BUILD                         | 15 ++++
 runsc/{boot => config}/config.go           | 84 ++++++++++++++++------
 runsc/container/BUILD                      |  2 +
 runsc/container/container.go               | 11 +--
 runsc/container/container_test.go          | 12 ++--
 runsc/container/multi_container_test.go    |  3 +-
 runsc/container/shared_volume_test.go      |  6 +-
 runsc/fsgofer/fsgofer_test.go              |  4 +-
 runsc/main.go                              | 16 ++---
 runsc/sandbox/BUILD                        |  1 +
 runsc/sandbox/network.go                   | 11 +--
 runsc/sandbox/sandbox.go                   | 19 ++---
 website/blog/2019-11-18-security-basics.md |  2 +-
 47 files changed, 247 insertions(+), 212 deletions(-)
 create mode 100644 runsc/config/BUILD
 rename runsc/{boot => config}/config.go (89%)

diff --git a/pkg/test/testutil/BUILD b/pkg/test/testutil/BUILD
index 2d8f56bc0c..c4b1318969 100644
--- a/pkg/test/testutil/BUILD
+++ b/pkg/test/testutil/BUILD
@@ -12,7 +12,7 @@ go_library(
     visibility = ["//:sandbox"],
     deps = [
         "//pkg/sync",
-        "//runsc/boot",
+        "//runsc/config",
         "//runsc/specutils",
         "@com_github_cenkalti_backoff//:go_default_library",
         "@com_github_opencontainers_runtime_spec//specs-go:go_default_library",
diff --git a/pkg/test/testutil/testutil.go b/pkg/test/testutil/testutil.go
index 1580527b59..3cb6c6814f 100644
--- a/pkg/test/testutil/testutil.go
+++ b/pkg/test/testutil/testutil.go
@@ -44,7 +44,7 @@ import (
 	"github.com/cenkalti/backoff"
 	specs "github.com/opencontainers/runtime-spec/specs-go"
 	"gvisor.dev/gvisor/pkg/sync"
-	"gvisor.dev/gvisor/runsc/boot"
+	"gvisor.dev/gvisor/runsc/config"
 	"gvisor.dev/gvisor/runsc/specutils"
 )
 
@@ -133,21 +133,21 @@ func Command(logger Logger, args ...string) *Cmd {
 
 // TestConfig returns the default configuration to use in tests. Note that
 // 'RootDir' must be set by caller if required.
-func TestConfig(t *testing.T) *boot.Config {
+func TestConfig(t *testing.T) *config.Config {
 	logDir := os.TempDir()
 	if dir, ok := os.LookupEnv("TEST_UNDECLARED_OUTPUTS_DIR"); ok {
 		logDir = dir + "/"
 	}
-	return &boot.Config{
+	return &config.Config{
 		Debug:              true,
 		DebugLog:           path.Join(logDir, "runsc.log."+t.Name()+".%TIMESTAMP%.%COMMAND%"),
 		LogFormat:          "text",
 		DebugLogFormat:     "text",
 		LogPackets:         true,
-		Network:            boot.NetworkNone,
+		Network:            config.NetworkNone,
 		Strace:             true,
 		Platform:           "ptrace",
-		FileAccess:         boot.FileAccessExclusive,
+		FileAccess:         config.FileAccessExclusive,
 		NumNetworkChannels: 1,
 
 		TestOnlyAllowRunAsCurrentUserWithoutChroot: true,
@@ -203,7 +203,7 @@ func SetupRootDir() (string, func(), error) {
 
 // SetupContainer creates a bundle and root dir for the container, generates a
 // test config, and writes the spec to config.json in the bundle dir.
-func SetupContainer(spec *specs.Spec, conf *boot.Config) (rootDir, bundleDir string, cleanup func(), err error) {
+func SetupContainer(spec *specs.Spec, conf *config.Config) (rootDir, bundleDir string, cleanup func(), err error) {
 	rootDir, rootCleanup, err := SetupRootDir()
 	if err != nil {
 		return "", "", nil, err
diff --git a/runsc/BUILD b/runsc/BUILD
index 96f697a5fe..267fb2af86 100644
--- a/runsc/BUILD
+++ b/runsc/BUILD
@@ -17,8 +17,8 @@ go_binary(
         "//pkg/log",
         "//pkg/refs",
         "//pkg/sentry/platform",
-        "//runsc/boot",
         "//runsc/cmd",
+        "//runsc/config",
         "//runsc/flag",
         "//runsc/specutils",
         "@com_github_google_subcommands//:go_default_library",
@@ -53,8 +53,8 @@ go_binary(
         "//pkg/log",
         "//pkg/refs",
         "//pkg/sentry/platform",
-        "//runsc/boot",
         "//runsc/cmd",
+        "//runsc/config",
         "//runsc/flag",
         "//runsc/specutils",
         "@com_github_google_subcommands//:go_default_library",
diff --git a/runsc/boot/BUILD b/runsc/boot/BUILD
index 9f52438c25..040f6a72d3 100644
--- a/runsc/boot/BUILD
+++ b/runsc/boot/BUILD
@@ -8,7 +8,6 @@ go_library(
         "compat.go",
         "compat_amd64.go",
         "compat_arm64.go",
-        "config.go",
         "controller.go",
         "debug.go",
         "events.go",
@@ -105,6 +104,7 @@ go_library(
         "//runsc/boot/filter",
         "//runsc/boot/platforms",
         "//runsc/boot/pprof",
+        "//runsc/config",
         "//runsc/specutils",
         "@com_github_golang_protobuf//proto:go_default_library",
         "@com_github_opencontainers_runtime_spec//specs-go:go_default_library",
@@ -131,6 +131,7 @@ go_test(
         "//pkg/sentry/vfs",
         "//pkg/sync",
         "//pkg/unet",
+        "//runsc/config",
         "//runsc/fsgofer",
         "@com_github_opencontainers_runtime_spec//specs-go:go_default_library",
         "@org_golang_x_sys//unix:go_default_library",
diff --git a/runsc/boot/controller.go b/runsc/boot/controller.go
index 626a3816e1..68a2b45cfb 100644
--- a/runsc/boot/controller.go
+++ b/runsc/boot/controller.go
@@ -33,6 +33,7 @@ import (
 	"gvisor.dev/gvisor/pkg/tcpip/stack"
 	"gvisor.dev/gvisor/pkg/urpc"
 	"gvisor.dev/gvisor/runsc/boot/pprof"
+	"gvisor.dev/gvisor/runsc/config"
 	"gvisor.dev/gvisor/runsc/specutils"
 )
 
@@ -220,7 +221,7 @@ type StartArgs struct {
 	Spec *specs.Spec
 
 	// Config is the runsc-specific configuration for the sandbox.
-	Conf *Config
+	Conf *config.Config
 
 	// CID is the ID of the container to start.
 	CID string
diff --git a/runsc/boot/fs.go b/runsc/boot/fs.go
index 9dd5b0184a..163265afe1 100644
--- a/runsc/boot/fs.go
+++ b/runsc/boot/fs.go
@@ -48,6 +48,7 @@ import (
 	"gvisor.dev/gvisor/pkg/sentry/kernel"
 	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
 	"gvisor.dev/gvisor/pkg/syserror"
+	"gvisor.dev/gvisor/runsc/config"
 	"gvisor.dev/gvisor/runsc/specutils"
 )
 
@@ -66,7 +67,7 @@ const (
 // tmpfs has some extra supported options that we must pass through.
 var tmpfsAllowedData = []string{"mode", "uid", "gid"}
 
-func addOverlay(ctx context.Context, conf *Config, lower *fs.Inode, name string, lowerFlags fs.MountSourceFlags) (*fs.Inode, error) {
+func addOverlay(ctx context.Context, conf *config.Config, lower *fs.Inode, name string, lowerFlags fs.MountSourceFlags) (*fs.Inode, error) {
 	// Upper layer uses the same flags as lower, but it must be read-write.
 	upperFlags := lowerFlags
 	upperFlags.ReadOnly = false
@@ -156,7 +157,7 @@ func compileMounts(spec *specs.Spec) []specs.Mount {
 }
 
 // p9MountData creates a slice of p9 mount data.
-func p9MountData(fd int, fa FileAccessType, vfs2 bool) []string {
+func p9MountData(fd int, fa config.FileAccessType, vfs2 bool) []string {
 	opts := []string{
 		"trans=fd",
 		"rfdno=" + strconv.Itoa(fd),
@@ -167,7 +168,7 @@ func p9MountData(fd int, fa FileAccessType, vfs2 bool) []string {
 		// enablement.
 		opts = append(opts, "privateunixsocket=true")
 	}
-	if fa == FileAccessShared {
+	if fa == config.FileAccessShared {
 		opts = append(opts, "cache=remote_revalidating")
 	}
 	return opts
@@ -281,7 +282,7 @@ func subtargets(root string, mnts []specs.Mount) []string {
 	return targets
 }
 
-func setupContainerFS(ctx context.Context, conf *Config, mntr *containerMounter, procArgs *kernel.CreateProcessArgs) error {
+func setupContainerFS(ctx context.Context, conf *config.Config, mntr *containerMounter, procArgs *kernel.CreateProcessArgs) error {
 	if conf.VFS2 {
 		return setupContainerVFS2(ctx, conf, mntr, procArgs)
 	}
@@ -468,11 +469,11 @@ func (m *mountHint) checkCompatible(mount specs.Mount) error {
 	return nil
 }
 
-func (m *mountHint) fileAccessType() FileAccessType {
+func (m *mountHint) fileAccessType() config.FileAccessType {
 	if m.share == container {
-		return FileAccessExclusive
+		return config.FileAccessExclusive
 	}
-	return FileAccessShared
+	return config.FileAccessShared
 }
 
 func filterUnsupportedOptions(mount specs.Mount) []string {
@@ -576,7 +577,7 @@ func newContainerMounter(spec *specs.Spec, goferFDs []int, k *kernel.Kernel, hin
 // processHints processes annotations that container hints about how volumes
 // should be mounted (e.g. a volume shared between containers). It must be
 // called for the root container only.
-func (c *containerMounter) processHints(conf *Config, creds *auth.Credentials) error {
+func (c *containerMounter) processHints(conf *config.Config, creds *auth.Credentials) error {
 	if conf.VFS2 {
 		return c.processHintsVFS2(conf, creds)
 	}
@@ -600,7 +601,7 @@ func (c *containerMounter) processHints(conf *Config, creds *auth.Credentials) e
 // setupFS is used to set up the file system for all containers. This is the
 // main entry point method, with most of the other being internal only. It
 // returns the mount namespace that is created for the container.
-func (c *containerMounter) setupFS(conf *Config, procArgs *kernel.CreateProcessArgs) (*fs.MountNamespace, error) {
+func (c *containerMounter) setupFS(conf *config.Config, procArgs *kernel.CreateProcessArgs) (*fs.MountNamespace, error) {
 	log.Infof("Configuring container's file system")
 
 	// Create context with root credentials to mount the filesystem (the current
@@ -626,7 +627,7 @@ func (c *containerMounter) setupFS(conf *Config, procArgs *kernel.CreateProcessA
 	return mns, nil
 }
 
-func (c *containerMounter) createMountNamespace(ctx context.Context, conf *Config) (*fs.MountNamespace, error) {
+func (c *containerMounter) createMountNamespace(ctx context.Context, conf *config.Config) (*fs.MountNamespace, error) {
 	rootInode, err := c.createRootMount(ctx, conf)
 	if err != nil {
 		return nil, fmt.Errorf("creating filesystem for container: %v", err)
@@ -638,7 +639,7 @@ func (c *containerMounter) createMountNamespace(ctx context.Context, conf *Confi
 	return mns, nil
 }
 
-func (c *containerMounter) mountSubmounts(ctx context.Context, conf *Config, mns *fs.MountNamespace) error {
+func (c *containerMounter) mountSubmounts(ctx context.Context, conf *config.Config, mns *fs.MountNamespace) error {
 	root := mns.Root()
 	defer root.DecRef(ctx)
 
@@ -674,7 +675,7 @@ func (c *containerMounter) checkDispenser() error {
 
 // mountSharedMaster mounts the master of a volume that is shared among
 // containers in a pod. It returns the root mount's inode.
-func (c *containerMounter) mountSharedMaster(ctx context.Context, conf *Config, hint *mountHint) (*fs.Inode, error) {
+func (c *containerMounter) mountSharedMaster(ctx context.Context, conf *config.Config, hint *mountHint) (*fs.Inode, error) {
 	// Map mount type to filesystem name, and parse out the options that we are
 	// capable of dealing with.
 	fsName, opts, useOverlay, err := c.getMountNameAndOptions(conf, hint.mount)
@@ -714,7 +715,7 @@ func (c *containerMounter) mountSharedMaster(ctx context.Context, conf *Config,
 }
 
 // createRootMount creates the root filesystem.
-func (c *containerMounter) createRootMount(ctx context.Context, conf *Config) (*fs.Inode, error) {
+func (c *containerMounter) createRootMount(ctx context.Context, conf *config.Config) (*fs.Inode, error) {
 	// First construct the filesystem from the spec.Root.
 	mf := fs.MountSourceFlags{ReadOnly: c.root.Readonly || conf.Overlay}
 
@@ -759,7 +760,7 @@ func (c *containerMounter) createRootMount(ctx context.Context, conf *Config) (*
 
 // getMountNameAndOptions retrieves the fsName, opts, and useOverlay values
 // used for mounts.
-func (c *containerMounter) getMountNameAndOptions(conf *Config, m specs.Mount) (string, []string, bool, error) {
+func (c *containerMounter) getMountNameAndOptions(conf *config.Config, m specs.Mount) (string, []string, bool, error) {
 	var (
 		fsName     string
 		opts       []string
@@ -793,19 +794,19 @@ func (c *containerMounter) getMountNameAndOptions(conf *Config, m specs.Mount) (
 	return fsName, opts, useOverlay, nil
 }
 
-func (c *containerMounter) getMountAccessType(mount specs.Mount) FileAccessType {
+func (c *containerMounter) getMountAccessType(mount specs.Mount) config.FileAccessType {
 	if hint := c.hints.findMount(mount); hint != nil {
 		return hint.fileAccessType()
 	}
 	// Non-root bind mounts are always shared if no hints were provided.
-	return FileAccessShared
+	return config.FileAccessShared
 }
 
 // mountSubmount mounts volumes inside the container's root. Because mounts may
 // be readonly, a lower ramfs overlay is added to create the mount point dir.
 // Another overlay is added with tmpfs on top if Config.Overlay is true.
 // 'm.Destination' must be an absolute path with '..' and symlinks resolved.
-func (c *containerMounter) mountSubmount(ctx context.Context, conf *Config, mns *fs.MountNamespace, root *fs.Dirent, m specs.Mount) error {
+func (c *containerMounter) mountSubmount(ctx context.Context, conf *config.Config, mns *fs.MountNamespace, root *fs.Dirent, m specs.Mount) error {
 	// Map mount type to filesystem name, and parse out the options that we are
 	// capable of dealing with.
 	fsName, opts, useOverlay, err := c.getMountNameAndOptions(conf, m)
@@ -904,7 +905,7 @@ func (c *containerMounter) mountSharedSubmount(ctx context.Context, mns *fs.Moun
 
 // addRestoreMount adds a mount to the MountSources map used for restoring a
 // checkpointed container.
-func (c *containerMounter) addRestoreMount(conf *Config, renv *fs.RestoreEnvironment, m specs.Mount) error {
+func (c *containerMounter) addRestoreMount(conf *config.Config, renv *fs.RestoreEnvironment, m specs.Mount) error {
 	fsName, opts, useOverlay, err := c.getMountNameAndOptions(conf, m)
 	if err != nil {
 		return err
@@ -929,7 +930,7 @@ func (c *containerMounter) addRestoreMount(conf *Config, renv *fs.RestoreEnviron
 
 // createRestoreEnvironment builds a fs.RestoreEnvironment called renv by adding
 // the mounts to the environment.
-func (c *containerMounter) createRestoreEnvironment(conf *Config) (*fs.RestoreEnvironment, error) {
+func (c *containerMounter) createRestoreEnvironment(conf *config.Config) (*fs.RestoreEnvironment, error) {
 	renv := &fs.RestoreEnvironment{
 		MountSources: make(map[string][]fs.MountArgs),
 	}
@@ -984,7 +985,7 @@ func (c *containerMounter) createRestoreEnvironment(conf *Config) (*fs.RestoreEn
 //
 // Note that when there are submounts inside of '/tmp', directories for the
 // mount points must be present, making '/tmp' not empty anymore.
-func (c *containerMounter) mountTmp(ctx context.Context, conf *Config, mns *fs.MountNamespace, root *fs.Dirent) error {
+func (c *containerMounter) mountTmp(ctx context.Context, conf *config.Config, mns *fs.MountNamespace, root *fs.Dirent) error {
 	for _, m := range c.mounts {
 		if filepath.Clean(m.Destination) == "/tmp" {
 			log.Debugf("Explict %q mount found, skipping internal tmpfs, mount: %+v", "/tmp", m)
diff --git a/runsc/boot/fs_test.go b/runsc/boot/fs_test.go
index 912037075b..e986231e57 100644
--- a/runsc/boot/fs_test.go
+++ b/runsc/boot/fs_test.go
@@ -20,6 +20,7 @@ import (
 	"testing"
 
 	specs "github.com/opencontainers/runtime-spec/specs-go"
+	"gvisor.dev/gvisor/runsc/config"
 )
 
 func TestPodMountHintsHappy(t *testing.T) {
@@ -196,7 +197,7 @@ func TestGetMountAccessType(t *testing.T) {
 	for _, tst := range []struct {
 		name        string
 		annotations map[string]string
-		want        FileAccessType
+		want        config.FileAccessType
 	}{
 		{
 			name: "container=exclusive",
@@ -205,7 +206,7 @@ func TestGetMountAccessType(t *testing.T) {
 				MountPrefix + "mount1.type":   "bind",
 				MountPrefix + "mount1.share":  "container",
 			},
-			want: FileAccessExclusive,
+			want: config.FileAccessExclusive,
 		},
 		{
 			name: "pod=shared",
@@ -214,7 +215,7 @@ func TestGetMountAccessType(t *testing.T) {
 				MountPrefix + "mount1.type":   "bind",
 				MountPrefix + "mount1.share":  "pod",
 			},
-			want: FileAccessShared,
+			want: config.FileAccessShared,
 		},
 		{
 			name: "shared=shared",
@@ -223,7 +224,7 @@ func TestGetMountAccessType(t *testing.T) {
 				MountPrefix + "mount1.type":   "bind",
 				MountPrefix + "mount1.share":  "shared",
 			},
-			want: FileAccessShared,
+			want: config.FileAccessShared,
 		},
 		{
 			name: "default=shared",
@@ -232,7 +233,7 @@ func TestGetMountAccessType(t *testing.T) {
 				MountPrefix + "mount1.type":   "bind",
 				MountPrefix + "mount1.share":  "container",
 			},
-			want: FileAccessShared,
+			want: config.FileAccessShared,
 		},
 	} {
 		t.Run(tst.name, func(t *testing.T) {
diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go
index 40c6f99fd5..e8ea5093b6 100644
--- a/runsc/boot/loader.go
+++ b/runsc/boot/loader.go
@@ -67,6 +67,7 @@ import (
 	"gvisor.dev/gvisor/runsc/boot/filter"
 	_ "gvisor.dev/gvisor/runsc/boot/platforms" // register all platforms.
 	"gvisor.dev/gvisor/runsc/boot/pprof"
+	"gvisor.dev/gvisor/runsc/config"
 	"gvisor.dev/gvisor/runsc/specutils"
 
 	// Include supported socket providers.
@@ -79,7 +80,7 @@ import (
 )
 
 type containerInfo struct {
-	conf *Config
+	conf *config.Config
 
 	// spec is the base configuration for the root container.
 	spec *specs.Spec
@@ -165,7 +166,7 @@ type Args struct {
 	// Spec is the sandbox specification.
 	Spec *specs.Spec
 	// Conf is the system configuration.
-	Conf *Config
+	Conf *config.Config
 	// ControllerFD is the FD to the URPC controller. The Loader takes ownership
 	// of this FD and may close it at any time.
 	ControllerFD int
@@ -471,7 +472,7 @@ func (l *Loader) Destroy() {
 	}
 }
 
-func createPlatform(conf *Config, deviceFile *os.File) (platform.Platform, error) {
+func createPlatform(conf *config.Config, deviceFile *os.File) (platform.Platform, error) {
 	p, err := platform.Lookup(conf.Platform)
 	if err != nil {
 		panic(fmt.Sprintf("invalid platform %v: %v", conf.Platform, err))
@@ -504,7 +505,7 @@ func (l *Loader) installSeccompFilters() error {
 	} else {
 		opts := filter.Options{
 			Platform:      l.k.Platform,
-			HostNetwork:   l.root.conf.Network == NetworkHost,
+			HostNetwork:   l.root.conf.Network == config.NetworkHost,
 			ProfileEnable: l.root.conf.ProfileEnable,
 			ControllerFD:  l.ctrl.srv.FD(),
 		}
@@ -531,7 +532,7 @@ func (l *Loader) Run() error {
 }
 
 func (l *Loader) run() error {
-	if l.root.conf.Network == NetworkHost {
+	if l.root.conf.Network == config.NetworkHost {
 		// Delay host network configuration to this point because network namespace
 		// is configured after the loader is created and before Run() is called.
 		log.Debugf("Configuring host network")
@@ -629,7 +630,7 @@ func (l *Loader) createContainer(cid string) error {
 // startContainer starts a child container. It returns the thread group ID of
 // the newly created process. Caller owns 'files' and may close them after
 // this method returns.
-func (l *Loader) startContainer(spec *specs.Spec, conf *Config, cid string, files []*os.File) error {
+func (l *Loader) startContainer(spec *specs.Spec, conf *config.Config, cid string, files []*os.File) error {
 	// Create capabilities.
 	caps, err := specutils.Capabilities(conf.EnableRaw, spec.Process.Capabilities)
 	if err != nil {
@@ -1017,17 +1018,17 @@ func (l *Loader) WaitExit() kernel.ExitStatus {
 	return l.k.GlobalInit().ExitStatus()
 }
 
-func newRootNetworkNamespace(conf *Config, clock tcpip.Clock, uniqueID stack.UniqueID) (*inet.Namespace, error) {
+func newRootNetworkNamespace(conf *config.Config, clock tcpip.Clock, uniqueID stack.UniqueID) (*inet.Namespace, error) {
 	// Create an empty network stack because the network namespace may be empty at
 	// this point. Netns is configured before Run() is called. Netstack is
 	// configured using a control uRPC message. Host network is configured inside
 	// Run().
 	switch conf.Network {
-	case NetworkHost:
+	case config.NetworkHost:
 		// No network namespacing support for hostinet yet, hence creator is nil.
 		return inet.NewRootNamespace(hostinet.NewStack(), nil), nil
 
-	case NetworkNone, NetworkSandbox:
+	case config.NetworkNone, config.NetworkSandbox:
 		s, err := newEmptySandboxNetworkStack(clock, uniqueID)
 		if err != nil {
 			return nil, err
diff --git a/runsc/boot/loader_test.go b/runsc/boot/loader_test.go
index aa3fdf96cd..03cbaec333 100644
--- a/runsc/boot/loader_test.go
+++ b/runsc/boot/loader_test.go
@@ -34,6 +34,7 @@ import (
 	"gvisor.dev/gvisor/pkg/sentry/vfs"
 	"gvisor.dev/gvisor/pkg/sync"
 	"gvisor.dev/gvisor/pkg/unet"
+	"gvisor.dev/gvisor/runsc/config"
 	"gvisor.dev/gvisor/runsc/fsgofer"
 )
 
@@ -45,10 +46,10 @@ func init() {
 	}
 }
 
-func testConfig() *Config {
-	return &Config{
+func testConfig() *config.Config {
+	return &config.Config{
 		RootDir:        "unused_root_dir",
-		Network:        NetworkNone,
+		Network:        config.NetworkNone,
 		DisableSeccomp: true,
 		Platform:       "ptrace",
 	}
diff --git a/runsc/boot/network.go b/runsc/boot/network.go
index 4e1fa76652..988573640c 100644
--- a/runsc/boot/network.go
+++ b/runsc/boot/network.go
@@ -33,6 +33,7 @@ import (
 	"gvisor.dev/gvisor/pkg/tcpip/network/ipv6"
 	"gvisor.dev/gvisor/pkg/tcpip/stack"
 	"gvisor.dev/gvisor/pkg/urpc"
+	"gvisor.dev/gvisor/runsc/config"
 )
 
 var (
@@ -78,44 +79,6 @@ type DefaultRoute struct {
 	Name  string
 }
 
-// QueueingDiscipline is used to specify the kind of Queueing Discipline to
-// apply for a give FDBasedLink.
-type QueueingDiscipline int
-
-const (
-	// QDiscNone disables any queueing for the underlying FD.
-	QDiscNone QueueingDiscipline = iota
-
-	// QDiscFIFO applies a simple fifo based queue to the underlying
-	// FD.
-	QDiscFIFO
-)
-
-// MakeQueueingDiscipline if possible the equivalent QueuingDiscipline for s
-// else returns an error.
-func MakeQueueingDiscipline(s string) (QueueingDiscipline, error) {
-	switch s {
-	case "none":
-		return QDiscNone, nil
-	case "fifo":
-		return QDiscFIFO, nil
-	default:
-		return 0, fmt.Errorf("unsupported qdisc specified: %q", s)
-	}
-}
-
-// String implements fmt.Stringer.
-func (q QueueingDiscipline) String() string {
-	switch q {
-	case QDiscNone:
-		return "none"
-	case QDiscFIFO:
-		return "fifo"
-	default:
-		panic(fmt.Sprintf("Invalid queueing discipline: %d", q))
-	}
-}
-
 // FDBasedLink configures an fd-based link.
 type FDBasedLink struct {
 	Name               string
@@ -127,7 +90,7 @@ type FDBasedLink struct {
 	TXChecksumOffload  bool
 	RXChecksumOffload  bool
 	LinkAddress        net.HardwareAddr
-	QDisc              QueueingDiscipline
+	QDisc              config.QueueingDiscipline
 
 	// NumChannels controls how many underlying FD's are to be used to
 	// create this endpoint.
@@ -247,8 +210,8 @@ func (n *Network) CreateLinksAndRoutes(args *CreateLinksAndRoutesArgs, _ *struct
 		}
 
 		switch link.QDisc {
-		case QDiscNone:
-		case QDiscFIFO:
+		case config.QDiscNone:
+		case config.QDiscFIFO:
 			log.Infof("Enabling FIFO QDisc on %q", link.Name)
 			linkEP = fifo.New(linkEP, runtime.GOMAXPROCS(0), 1000)
 		}
diff --git a/runsc/boot/strace.go b/runsc/boot/strace.go
index fbfd3b07c7..176981f741 100644
--- a/runsc/boot/strace.go
+++ b/runsc/boot/strace.go
@@ -16,9 +16,10 @@ package boot
 
 import (
 	"gvisor.dev/gvisor/pkg/sentry/strace"
+	"gvisor.dev/gvisor/runsc/config"
 )
 
-func enableStrace(conf *Config) error {
+func enableStrace(conf *config.Config) error {
 	// We must initialize even if strace is not enabled.
 	strace.Initialize()
 
diff --git a/runsc/boot/vfs.go b/runsc/boot/vfs.go
index 08dce8b6c5..3da7a64f04 100644
--- a/runsc/boot/vfs.go
+++ b/runsc/boot/vfs.go
@@ -42,6 +42,7 @@ import (
 	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
 	"gvisor.dev/gvisor/pkg/sentry/vfs"
 	"gvisor.dev/gvisor/pkg/syserror"
+	"gvisor.dev/gvisor/runsc/config"
 )
 
 func registerFilesystems(k *kernel.Kernel) error {
@@ -133,7 +134,7 @@ func registerFilesystems(k *kernel.Kernel) error {
 	return nil
 }
 
-func setupContainerVFS2(ctx context.Context, conf *Config, mntr *containerMounter, procArgs *kernel.CreateProcessArgs) error {
+func setupContainerVFS2(ctx context.Context, conf *config.Config, mntr *containerMounter, procArgs *kernel.CreateProcessArgs) error {
 	mns, err := mntr.setupVFS2(ctx, conf, procArgs)
 	if err != nil {
 		return fmt.Errorf("failed to setupFS: %w", err)
@@ -149,7 +150,7 @@ func setupContainerVFS2(ctx context.Context, conf *Config, mntr *containerMounte
 	return nil
 }
 
-func (c *containerMounter) setupVFS2(ctx context.Context, conf *Config, procArgs *kernel.CreateProcessArgs) (*vfs.MountNamespace, error) {
+func (c *containerMounter) setupVFS2(ctx context.Context, conf *config.Config, procArgs *kernel.CreateProcessArgs) (*vfs.MountNamespace, error) {
 	log.Infof("Configuring container's file system with VFS2")
 
 	// Create context with root credentials to mount the filesystem (the current
@@ -175,7 +176,7 @@ func (c *containerMounter) setupVFS2(ctx context.Context, conf *Config, procArgs
 	return mns, nil
 }
 
-func (c *containerMounter) createMountNamespaceVFS2(ctx context.Context, conf *Config, creds *auth.Credentials) (*vfs.MountNamespace, error) {
+func (c *containerMounter) createMountNamespaceVFS2(ctx context.Context, conf *config.Config, creds *auth.Credentials) (*vfs.MountNamespace, error) {
 	fd := c.fds.remove()
 	opts := p9MountData(fd, conf.FileAccess, true /* vfs2 */)
 
@@ -196,7 +197,7 @@ func (c *containerMounter) createMountNamespaceVFS2(ctx context.Context, conf *C
 	return mns, nil
 }
 
-func (c *containerMounter) mountSubmountsVFS2(ctx context.Context, conf *Config, mns *vfs.MountNamespace, creds *auth.Credentials) error {
+func (c *containerMounter) mountSubmountsVFS2(ctx context.Context, conf *config.Config, mns *vfs.MountNamespace, creds *auth.Credentials) error {
 	mounts, err := c.prepareMountsVFS2()
 	if err != nil {
 		return err
@@ -256,7 +257,7 @@ func (c *containerMounter) prepareMountsVFS2() ([]mountAndFD, error) {
 	return mounts, nil
 }
 
-func (c *containerMounter) mountSubmountVFS2(ctx context.Context, conf *Config, mns *vfs.MountNamespace, creds *auth.Credentials, submount *mountAndFD) error {
+func (c *containerMounter) mountSubmountVFS2(ctx context.Context, conf *config.Config, mns *vfs.MountNamespace, creds *auth.Credentials, submount *mountAndFD) error {
 	root := mns.Root()
 	defer root.DecRef(ctx)
 	target := &vfs.PathOperation{
@@ -285,7 +286,7 @@ func (c *containerMounter) mountSubmountVFS2(ctx context.Context, conf *Config,
 
 // getMountNameAndOptionsVFS2 retrieves the fsName, opts, and useOverlay values
 // used for mounts.
-func (c *containerMounter) getMountNameAndOptionsVFS2(conf *Config, m *mountAndFD) (string, *vfs.MountOptions, error) {
+func (c *containerMounter) getMountNameAndOptionsVFS2(conf *config.Config, m *mountAndFD) (string, *vfs.MountOptions, error) {
 	fsName := m.Type
 	var data []string
 
@@ -383,7 +384,7 @@ func (c *containerMounter) makeSyntheticMount(ctx context.Context, currentPath s
 //
 // Note that when there are submounts inside of '/tmp', directories for the
 // mount points must be present, making '/tmp' not empty anymore.
-func (c *containerMounter) mountTmpVFS2(ctx context.Context, conf *Config, creds *auth.Credentials, mns *vfs.MountNamespace) error {
+func (c *containerMounter) mountTmpVFS2(ctx context.Context, conf *config.Config, creds *auth.Credentials, mns *vfs.MountNamespace) error {
 	for _, m := range c.mounts {
 		// m.Destination has been cleaned, so it's to use equality here.
 		if m.Destination == "/tmp" {
@@ -448,7 +449,7 @@ func (c *containerMounter) mountTmpVFS2(ctx context.Context, conf *Config, creds
 // processHintsVFS2 processes annotations that container hints about how volumes
 // should be mounted (e.g. a volume shared between containers). It must be
 // called for the root container only.
-func (c *containerMounter) processHintsVFS2(conf *Config, creds *auth.Credentials) error {
+func (c *containerMounter) processHintsVFS2(conf *config.Config, creds *auth.Credentials) error {
 	ctx := c.k.SupervisorContext()
 	for _, hint := range c.hints.mounts {
 		// TODO(b/142076984): Only support tmpfs for now. Bind mounts require a
@@ -469,7 +470,7 @@ func (c *containerMounter) processHintsVFS2(conf *Config, creds *auth.Credential
 
 // mountSharedMasterVFS2 mounts the master of a volume that is shared among
 // containers in a pod.
-func (c *containerMounter) mountSharedMasterVFS2(ctx context.Context, conf *Config, hint *mountHint, creds *auth.Credentials) (*vfs.Mount, error) {
+func (c *containerMounter) mountSharedMasterVFS2(ctx context.Context, conf *config.Config, hint *mountHint, creds *auth.Credentials) (*vfs.Mount, error) {
 	// Map mount type to filesystem name, and parse out the options that we are
 	// capable of dealing with.
 	mntFD := &mountAndFD{Mount: hint.mount}
@@ -485,7 +486,7 @@ func (c *containerMounter) mountSharedMasterVFS2(ctx context.Context, conf *Conf
 
 // mountSharedSubmount binds mount to a previously mounted volume that is shared
 // among containers in the same pod.
-func (c *containerMounter) mountSharedSubmountVFS2(ctx context.Context, conf *Config, mns *vfs.MountNamespace, creds *auth.Credentials, mount specs.Mount, source *mountHint) error {
+func (c *containerMounter) mountSharedSubmountVFS2(ctx context.Context, conf *config.Config, mns *vfs.MountNamespace, creds *auth.Credentials, mount specs.Mount, source *mountHint) error {
 	if err := source.checkCompatible(mount); err != nil {
 		return err
 	}
diff --git a/runsc/cmd/BUILD b/runsc/cmd/BUILD
index 1b5178dd57..2556f6d9e8 100644
--- a/runsc/cmd/BUILD
+++ b/runsc/cmd/BUILD
@@ -51,6 +51,7 @@ go_library(
         "//pkg/unet",
         "//pkg/urpc",
         "//runsc/boot",
+        "//runsc/config",
         "//runsc/console",
         "//runsc/container",
         "//runsc/flag",
@@ -84,7 +85,7 @@ go_test(
         "//pkg/sentry/kernel/auth",
         "//pkg/test/testutil",
         "//pkg/urpc",
-        "//runsc/boot",
+        "//runsc/config",
         "//runsc/container",
         "//runsc/specutils",
         "@com_github_google_go_cmp//cmp:go_default_library",
diff --git a/runsc/cmd/boot.go b/runsc/cmd/boot.go
index f4f2477215..357f46517b 100644
--- a/runsc/cmd/boot.go
+++ b/runsc/cmd/boot.go
@@ -27,6 +27,7 @@ import (
 	"gvisor.dev/gvisor/pkg/log"
 	"gvisor.dev/gvisor/pkg/sentry/platform"
 	"gvisor.dev/gvisor/runsc/boot"
+	"gvisor.dev/gvisor/runsc/config"
 	"gvisor.dev/gvisor/runsc/flag"
 	"gvisor.dev/gvisor/runsc/specutils"
 )
@@ -133,7 +134,7 @@ func (b *Boot) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
 	// Ensure that if there is a panic, all goroutine stacks are printed.
 	debug.SetTraceback("system")
 
-	conf := args[0].(*boot.Config)
+	conf := args[0].(*config.Config)
 
 	if b.attached {
 		// Ensure this process is killed after parent process terminates when
diff --git a/runsc/cmd/capability_test.go b/runsc/cmd/capability_test.go
index a84067112b..e13a944868 100644
--- a/runsc/cmd/capability_test.go
+++ b/runsc/cmd/capability_test.go
@@ -24,7 +24,7 @@ import (
 	"github.com/syndtr/gocapability/capability"
 	"gvisor.dev/gvisor/pkg/log"
 	"gvisor.dev/gvisor/pkg/test/testutil"
-	"gvisor.dev/gvisor/runsc/boot"
+	"gvisor.dev/gvisor/runsc/config"
 	"gvisor.dev/gvisor/runsc/container"
 	"gvisor.dev/gvisor/runsc/specutils"
 )
@@ -88,7 +88,7 @@ func TestCapabilities(t *testing.T) {
 	conf := testutil.TestConfig(t)
 
 	// Use --network=host to make sandbox use spec's capabilities.
-	conf.Network = boot.NetworkHost
+	conf.Network = config.NetworkHost
 
 	_, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf)
 	if err != nil {
diff --git a/runsc/cmd/checkpoint.go b/runsc/cmd/checkpoint.go
index 8a29e521ec..db46d509f7 100644
--- a/runsc/cmd/checkpoint.go
+++ b/runsc/cmd/checkpoint.go
@@ -22,7 +22,7 @@ import (
 
 	"github.com/google/subcommands"
 	"gvisor.dev/gvisor/pkg/log"
-	"gvisor.dev/gvisor/runsc/boot"
+	"gvisor.dev/gvisor/runsc/config"
 	"gvisor.dev/gvisor/runsc/container"
 	"gvisor.dev/gvisor/runsc/flag"
 	"gvisor.dev/gvisor/runsc/specutils"
@@ -72,7 +72,7 @@ func (c *Checkpoint) Execute(_ context.Context, f *flag.FlagSet, args ...interfa
 	}
 
 	id := f.Arg(0)
-	conf := args[0].(*boot.Config)
+	conf := args[0].(*config.Config)
 	waitStatus := args[1].(*syscall.WaitStatus)
 
 	cont, err := container.Load(conf.RootDir, id)
diff --git a/runsc/cmd/create.go b/runsc/cmd/create.go
index 910e97577f..4d9085244b 100644
--- a/runsc/cmd/create.go
+++ b/runsc/cmd/create.go
@@ -18,7 +18,7 @@ import (
 	"context"
 
 	"github.com/google/subcommands"
-	"gvisor.dev/gvisor/runsc/boot"
+	"gvisor.dev/gvisor/runsc/config"
 	"gvisor.dev/gvisor/runsc/container"
 	"gvisor.dev/gvisor/runsc/flag"
 	"gvisor.dev/gvisor/runsc/specutils"
@@ -81,7 +81,7 @@ func (c *Create) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}
 	}
 
 	id := f.Arg(0)
-	conf := args[0].(*boot.Config)
+	conf := args[0].(*config.Config)
 
 	if conf.Rootless {
 		return Errorf("Rootless mode not supported with %q", c.Name())
diff --git a/runsc/cmd/debug.go b/runsc/cmd/debug.go
index 742f8c3445..1321982221 100644
--- a/runsc/cmd/debug.go
+++ b/runsc/cmd/debug.go
@@ -25,7 +25,7 @@ import (
 	"github.com/google/subcommands"
 	"gvisor.dev/gvisor/pkg/log"
 	"gvisor.dev/gvisor/pkg/sentry/control"
-	"gvisor.dev/gvisor/runsc/boot"
+	"gvisor.dev/gvisor/runsc/config"
 	"gvisor.dev/gvisor/runsc/container"
 	"gvisor.dev/gvisor/runsc/flag"
 )
@@ -82,7 +82,7 @@ func (d *Debug) SetFlags(f *flag.FlagSet) {
 // Execute implements subcommands.Command.Execute.
 func (d *Debug) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus {
 	var c *container.Container
-	conf := args[0].(*boot.Config)
+	conf := args[0].(*config.Config)
 
 	if d.pid == 0 {
 		// No pid, container ID must have been provided.
diff --git a/runsc/cmd/delete.go b/runsc/cmd/delete.go
index 0e4863f50b..4e49deff8f 100644
--- a/runsc/cmd/delete.go
+++ b/runsc/cmd/delete.go
@@ -21,7 +21,7 @@ import (
 
 	"github.com/google/subcommands"
 	"gvisor.dev/gvisor/pkg/log"
-	"gvisor.dev/gvisor/runsc/boot"
+	"gvisor.dev/gvisor/runsc/config"
 	"gvisor.dev/gvisor/runsc/container"
 	"gvisor.dev/gvisor/runsc/flag"
 )
@@ -59,14 +59,14 @@ func (d *Delete) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}
 		return subcommands.ExitUsageError
 	}
 
-	conf := args[0].(*boot.Config)
+	conf := args[0].(*config.Config)
 	if err := d.execute(f.Args(), conf); err != nil {
 		Fatalf("%v", err)
 	}
 	return subcommands.ExitSuccess
 }
 
-func (d *Delete) execute(ids []string, conf *boot.Config) error {
+func (d *Delete) execute(ids []string, conf *config.Config) error {
 	for _, id := range ids {
 		c, err := container.Load(conf.RootDir, id)
 		if err != nil {
diff --git a/runsc/cmd/delete_test.go b/runsc/cmd/delete_test.go
index cb59516a30..e2d994a052 100644
--- a/runsc/cmd/delete_test.go
+++ b/runsc/cmd/delete_test.go
@@ -18,7 +18,7 @@ import (
 	"io/ioutil"
 	"testing"
 
-	"gvisor.dev/gvisor/runsc/boot"
+	"gvisor.dev/gvisor/runsc/config"
 )
 
 func TestNotFound(t *testing.T) {
@@ -27,7 +27,7 @@ func TestNotFound(t *testing.T) {
 	if err != nil {
 		t.Fatalf("error creating dir: %v", err)
 	}
-	conf := &boot.Config{RootDir: dir}
+	conf := &config.Config{RootDir: dir}
 
 	d := Delete{}
 	if err := d.execute(ids, conf); err == nil {
diff --git a/runsc/cmd/do.go b/runsc/cmd/do.go
index 7d1310c96a..d1f2e9e6d0 100644
--- a/runsc/cmd/do.go
+++ b/runsc/cmd/do.go
@@ -30,7 +30,7 @@ import (
 	"github.com/google/subcommands"
 	specs "github.com/opencontainers/runtime-spec/specs-go"
 	"gvisor.dev/gvisor/pkg/log"
-	"gvisor.dev/gvisor/runsc/boot"
+	"gvisor.dev/gvisor/runsc/config"
 	"gvisor.dev/gvisor/runsc/container"
 	"gvisor.dev/gvisor/runsc/flag"
 	"gvisor.dev/gvisor/runsc/specutils"
@@ -82,7 +82,7 @@ func (c *Do) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) su
 		return subcommands.ExitUsageError
 	}
 
-	conf := args[0].(*boot.Config)
+	conf := args[0].(*config.Config)
 	waitStatus := args[1].(*syscall.WaitStatus)
 
 	if conf.Rootless {
@@ -125,7 +125,7 @@ func (c *Do) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) su
 	specutils.LogSpec(spec)
 
 	cid := fmt.Sprintf("runsc-%06d", rand.Int31n(1000000))
-	if conf.Network == boot.NetworkNone {
+	if conf.Network == config.NetworkNone {
 		netns := specs.LinuxNamespace{
 			Type: specs.NetworkNamespace,
 		}
@@ -135,9 +135,9 @@ func (c *Do) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) su
 		spec.Linux = &specs.Linux{Namespaces: []specs.LinuxNamespace{netns}}
 
 	} else if conf.Rootless {
-		if conf.Network == boot.NetworkSandbox {
+		if conf.Network == config.NetworkSandbox {
 			c.notifyUser("*** Warning: using host network due to --rootless ***")
-			conf.Network = boot.NetworkHost
+			conf.Network = config.NetworkHost
 		}
 
 	} else {
diff --git a/runsc/cmd/events.go b/runsc/cmd/events.go
index 51f6a98edb..25fe2cf1cf 100644
--- a/runsc/cmd/events.go
+++ b/runsc/cmd/events.go
@@ -22,7 +22,7 @@ import (
 
 	"github.com/google/subcommands"
 	"gvisor.dev/gvisor/pkg/log"
-	"gvisor.dev/gvisor/runsc/boot"
+	"gvisor.dev/gvisor/runsc/config"
 	"gvisor.dev/gvisor/runsc/container"
 	"gvisor.dev/gvisor/runsc/flag"
 )
@@ -72,7 +72,7 @@ func (evs *Events) Execute(ctx context.Context, f *flag.FlagSet, args ...interfa
 	}
 
 	id := f.Arg(0)
-	conf := args[0].(*boot.Config)
+	conf := args[0].(*config.Config)
 
 	c, err := container.Load(conf.RootDir, id)
 	if err != nil {
diff --git a/runsc/cmd/exec.go b/runsc/cmd/exec.go
index d9a94903eb..600876a274 100644
--- a/runsc/cmd/exec.go
+++ b/runsc/cmd/exec.go
@@ -33,7 +33,7 @@ import (
 	"gvisor.dev/gvisor/pkg/sentry/control"
 	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
 	"gvisor.dev/gvisor/pkg/urpc"
-	"gvisor.dev/gvisor/runsc/boot"
+	"gvisor.dev/gvisor/runsc/config"
 	"gvisor.dev/gvisor/runsc/console"
 	"gvisor.dev/gvisor/runsc/container"
 	"gvisor.dev/gvisor/runsc/flag"
@@ -105,7 +105,7 @@ func (ex *Exec) SetFlags(f *flag.FlagSet) {
 // Execute implements subcommands.Command.Execute. It starts a process in an
 // already created container.
 func (ex *Exec) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus {
-	conf := args[0].(*boot.Config)
+	conf := args[0].(*config.Config)
 	e, id, err := ex.parseArgs(f, conf.EnableRaw)
 	if err != nil {
 		Fatalf("parsing process spec: %v", err)
diff --git a/runsc/cmd/gofer.go b/runsc/cmd/gofer.go
index 3966e2d212..7da02c3af5 100644
--- a/runsc/cmd/gofer.go
+++ b/runsc/cmd/gofer.go
@@ -30,7 +30,7 @@ import (
 	"gvisor.dev/gvisor/pkg/p9"
 	"gvisor.dev/gvisor/pkg/sync"
 	"gvisor.dev/gvisor/pkg/unet"
-	"gvisor.dev/gvisor/runsc/boot"
+	"gvisor.dev/gvisor/runsc/config"
 	"gvisor.dev/gvisor/runsc/flag"
 	"gvisor.dev/gvisor/runsc/fsgofer"
 	"gvisor.dev/gvisor/runsc/fsgofer/filter"
@@ -107,7 +107,7 @@ func (g *Gofer) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
 		Fatalf("reading spec: %v", err)
 	}
 
-	conf := args[0].(*boot.Config)
+	conf := args[0].(*config.Config)
 
 	if g.setUpRoot {
 		if err := setupRootFS(spec, conf); err != nil {
@@ -263,7 +263,7 @@ func isReadonlyMount(opts []string) bool {
 	return false
 }
 
-func setupRootFS(spec *specs.Spec, conf *boot.Config) error {
+func setupRootFS(spec *specs.Spec, conf *config.Config) error {
 	// Convert all shared mounts into slaves to be sure that nothing will be
 	// propagated outside of our namespace.
 	if err := syscall.Mount("", "/", "", syscall.MS_SLAVE|syscall.MS_REC, ""); err != nil {
@@ -346,7 +346,7 @@ func setupRootFS(spec *specs.Spec, conf *boot.Config) error {
 // setupMounts binds mount all mounts specified in the spec in their correct
 // location inside root. It will resolve relative paths and symlinks. It also
 // creates directories as needed.
-func setupMounts(conf *boot.Config, mounts []specs.Mount, root string) error {
+func setupMounts(conf *config.Config, mounts []specs.Mount, root string) error {
 	for _, m := range mounts {
 		if m.Type != "bind" || !specutils.IsSupportedDevMount(m) {
 			continue
@@ -385,7 +385,7 @@ func setupMounts(conf *boot.Config, mounts []specs.Mount, root string) error {
 // Otherwise, it may follow symlinks to locations that would be overwritten
 // with another mount point and return the wrong location. In short, make sure
 // setupMounts() has been called before.
-func resolveMounts(conf *boot.Config, mounts []specs.Mount, root string) ([]specs.Mount, error) {
+func resolveMounts(conf *config.Config, mounts []specs.Mount, root string) ([]specs.Mount, error) {
 	cleanMounts := make([]specs.Mount, 0, len(mounts))
 	for _, m := range mounts {
 		if m.Type != "bind" || !specutils.IsSupportedDevMount(m) {
@@ -467,7 +467,7 @@ func resolveSymlinksImpl(root, base, rel string, followCount uint) (string, erro
 }
 
 // adjustMountOptions adds 'overlayfs_stale_read' if mounting over overlayfs.
-func adjustMountOptions(conf *boot.Config, path string, opts []string) ([]string, error) {
+func adjustMountOptions(conf *config.Config, path string, opts []string) ([]string, error) {
 	rv := make([]string, len(opts))
 	copy(rv, opts)
 
diff --git a/runsc/cmd/kill.go b/runsc/cmd/kill.go
index 8282ea0e03..04eee99b2e 100644
--- a/runsc/cmd/kill.go
+++ b/runsc/cmd/kill.go
@@ -23,7 +23,7 @@ import (
 
 	"github.com/google/subcommands"
 	"golang.org/x/sys/unix"
-	"gvisor.dev/gvisor/runsc/boot"
+	"gvisor.dev/gvisor/runsc/config"
 	"gvisor.dev/gvisor/runsc/container"
 	"gvisor.dev/gvisor/runsc/flag"
 )
@@ -63,7 +63,7 @@ func (k *Kill) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
 	}
 
 	id := f.Arg(0)
-	conf := args[0].(*boot.Config)
+	conf := args[0].(*config.Config)
 
 	if k.pid != 0 && k.all {
 		Fatalf("it is invalid to specify both --all and --pid")
diff --git a/runsc/cmd/list.go b/runsc/cmd/list.go
index d8d906fe32..f92d6fef9d 100644
--- a/runsc/cmd/list.go
+++ b/runsc/cmd/list.go
@@ -24,7 +24,7 @@ import (
 
 	"github.com/google/subcommands"
 	specs "github.com/opencontainers/runtime-spec/specs-go"
-	"gvisor.dev/gvisor/runsc/boot"
+	"gvisor.dev/gvisor/runsc/config"
 	"gvisor.dev/gvisor/runsc/container"
 	"gvisor.dev/gvisor/runsc/flag"
 )
@@ -63,7 +63,7 @@ func (l *List) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
 		return subcommands.ExitUsageError
 	}
 
-	conf := args[0].(*boot.Config)
+	conf := args[0].(*config.Config)
 	ids, err := container.List(conf.RootDir)
 	if err != nil {
 		Fatalf("%v", err)
diff --git a/runsc/cmd/pause.go b/runsc/cmd/pause.go
index 6f95a98375..0eb1402ed0 100644
--- a/runsc/cmd/pause.go
+++ b/runsc/cmd/pause.go
@@ -18,7 +18,7 @@ import (
 	"context"
 
 	"github.com/google/subcommands"
-	"gvisor.dev/gvisor/runsc/boot"
+	"gvisor.dev/gvisor/runsc/config"
 	"gvisor.dev/gvisor/runsc/container"
 	"gvisor.dev/gvisor/runsc/flag"
 )
@@ -53,7 +53,7 @@ func (*Pause) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) s
 	}
 
 	id := f.Arg(0)
-	conf := args[0].(*boot.Config)
+	conf := args[0].(*config.Config)
 
 	cont, err := container.Load(conf.RootDir, id)
 	if err != nil {
diff --git a/runsc/cmd/ps.go b/runsc/cmd/ps.go
index 7fb8041af7..bc58c928f6 100644
--- a/runsc/cmd/ps.go
+++ b/runsc/cmd/ps.go
@@ -20,7 +20,7 @@ import (
 
 	"github.com/google/subcommands"
 	"gvisor.dev/gvisor/pkg/sentry/control"
-	"gvisor.dev/gvisor/runsc/boot"
+	"gvisor.dev/gvisor/runsc/config"
 	"gvisor.dev/gvisor/runsc/container"
 	"gvisor.dev/gvisor/runsc/flag"
 )
@@ -58,7 +58,7 @@ func (ps *PS) Execute(ctx context.Context, f *flag.FlagSet, args ...interface{})
 	}
 
 	id := f.Arg(0)
-	conf := args[0].(*boot.Config)
+	conf := args[0].(*config.Config)
 
 	c, err := container.Load(conf.RootDir, id)
 	if err != nil {
diff --git a/runsc/cmd/restore.go b/runsc/cmd/restore.go
index 72584b3265..b169758042 100644
--- a/runsc/cmd/restore.go
+++ b/runsc/cmd/restore.go
@@ -20,7 +20,7 @@ import (
 	"syscall"
 
 	"github.com/google/subcommands"
-	"gvisor.dev/gvisor/runsc/boot"
+	"gvisor.dev/gvisor/runsc/config"
 	"gvisor.dev/gvisor/runsc/container"
 	"gvisor.dev/gvisor/runsc/flag"
 	"gvisor.dev/gvisor/runsc/specutils"
@@ -77,7 +77,7 @@ func (r *Restore) Execute(_ context.Context, f *flag.FlagSet, args ...interface{
 	}
 
 	id := f.Arg(0)
-	conf := args[0].(*boot.Config)
+	conf := args[0].(*config.Config)
 	waitStatus := args[1].(*syscall.WaitStatus)
 
 	if conf.Rootless {
diff --git a/runsc/cmd/resume.go b/runsc/cmd/resume.go
index 61a55a5545..f24823f998 100644
--- a/runsc/cmd/resume.go
+++ b/runsc/cmd/resume.go
@@ -18,7 +18,7 @@ import (
 	"context"
 
 	"github.com/google/subcommands"
-	"gvisor.dev/gvisor/runsc/boot"
+	"gvisor.dev/gvisor/runsc/config"
 	"gvisor.dev/gvisor/runsc/container"
 	"gvisor.dev/gvisor/runsc/flag"
 )
@@ -54,7 +54,7 @@ func (r *Resume) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}
 	}
 
 	id := f.Arg(0)
-	conf := args[0].(*boot.Config)
+	conf := args[0].(*config.Config)
 
 	cont, err := container.Load(conf.RootDir, id)
 	if err != nil {
diff --git a/runsc/cmd/run.go b/runsc/cmd/run.go
index cf41581ad5..1161de67a1 100644
--- a/runsc/cmd/run.go
+++ b/runsc/cmd/run.go
@@ -19,7 +19,7 @@ import (
 	"syscall"
 
 	"github.com/google/subcommands"
-	"gvisor.dev/gvisor/runsc/boot"
+	"gvisor.dev/gvisor/runsc/config"
 	"gvisor.dev/gvisor/runsc/container"
 	"gvisor.dev/gvisor/runsc/flag"
 	"gvisor.dev/gvisor/runsc/specutils"
@@ -64,7 +64,7 @@ func (r *Run) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) s
 	}
 
 	id := f.Arg(0)
-	conf := args[0].(*boot.Config)
+	conf := args[0].(*config.Config)
 	waitStatus := args[1].(*syscall.WaitStatus)
 
 	if conf.Rootless {
diff --git a/runsc/cmd/start.go b/runsc/cmd/start.go
index 0205fd9f77..88991b5216 100644
--- a/runsc/cmd/start.go
+++ b/runsc/cmd/start.go
@@ -18,7 +18,7 @@ import (
 	"context"
 
 	"github.com/google/subcommands"
-	"gvisor.dev/gvisor/runsc/boot"
+	"gvisor.dev/gvisor/runsc/config"
 	"gvisor.dev/gvisor/runsc/container"
 	"gvisor.dev/gvisor/runsc/flag"
 )
@@ -52,7 +52,7 @@ func (*Start) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) s
 	}
 
 	id := f.Arg(0)
-	conf := args[0].(*boot.Config)
+	conf := args[0].(*config.Config)
 
 	c, err := container.Load(conf.RootDir, id)
 	if err != nil {
diff --git a/runsc/cmd/state.go b/runsc/cmd/state.go
index cf2413deb2..2bd2ab9f8f 100644
--- a/runsc/cmd/state.go
+++ b/runsc/cmd/state.go
@@ -21,7 +21,7 @@ import (
 
 	"github.com/google/subcommands"
 	"gvisor.dev/gvisor/pkg/log"
-	"gvisor.dev/gvisor/runsc/boot"
+	"gvisor.dev/gvisor/runsc/config"
 	"gvisor.dev/gvisor/runsc/container"
 	"gvisor.dev/gvisor/runsc/flag"
 )
@@ -55,7 +55,7 @@ func (*State) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) s
 	}
 
 	id := f.Arg(0)
-	conf := args[0].(*boot.Config)
+	conf := args[0].(*config.Config)
 
 	c, err := container.Load(conf.RootDir, id)
 	if err != nil {
diff --git a/runsc/cmd/wait.go b/runsc/cmd/wait.go
index 29c0a15f09..28d0642ed2 100644
--- a/runsc/cmd/wait.go
+++ b/runsc/cmd/wait.go
@@ -21,7 +21,7 @@ import (
 	"syscall"
 
 	"github.com/google/subcommands"
-	"gvisor.dev/gvisor/runsc/boot"
+	"gvisor.dev/gvisor/runsc/config"
 	"gvisor.dev/gvisor/runsc/container"
 	"gvisor.dev/gvisor/runsc/flag"
 )
@@ -70,7 +70,7 @@ func (wt *Wait) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
 	}
 
 	id := f.Arg(0)
-	conf := args[0].(*boot.Config)
+	conf := args[0].(*config.Config)
 
 	c, err := container.Load(conf.RootDir, id)
 	if err != nil {
diff --git a/runsc/config/BUILD b/runsc/config/BUILD
new file mode 100644
index 0000000000..3c8713d535
--- /dev/null
+++ b/runsc/config/BUILD
@@ -0,0 +1,15 @@
+load("//tools:defs.bzl", "go_library")
+
+package(licenses = ["notice"])
+
+go_library(
+    name = "config",
+    srcs = [
+        "config.go",
+    ],
+    visibility = ["//:sandbox"],
+    deps = [
+        "//pkg/refs",
+        "//pkg/sentry/watchdog",
+    ],
+)
diff --git a/runsc/boot/config.go b/runsc/config/config.go
similarity index 89%
rename from runsc/boot/config.go
rename to runsc/config/config.go
index 80da8b3e68..ca85cef514 100644
--- a/runsc/boot/config.go
+++ b/runsc/config/config.go
@@ -1,4 +1,4 @@
-// Copyright 2018 The gVisor Authors.
+// Copyright 2020 The gVisor Authors.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -12,7 +12,10 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-package boot
+// Package config provides basic infrastructure to set configuration settings
+// for runsc. The configuration is set by flags to the command line. They can
+// also propagate to a different process using the same flags.
+package config
 
 import (
 	"fmt"
@@ -141,6 +144,44 @@ func refsLeakModeToString(mode refs.LeakMode) string {
 	}
 }
 
+// QueueingDiscipline is used to specify the kind of Queueing Discipline to
+// apply for a give FDBasedLink.
+type QueueingDiscipline int
+
+const (
+	// QDiscNone disables any queueing for the underlying FD.
+	QDiscNone QueueingDiscipline = iota
+
+	// QDiscFIFO applies a simple fifo based queue to the underlying
+	// FD.
+	QDiscFIFO
+)
+
+// MakeQueueingDiscipline if possible the equivalent QueuingDiscipline for s
+// else returns an error.
+func MakeQueueingDiscipline(s string) (QueueingDiscipline, error) {
+	switch s {
+	case "none":
+		return QDiscNone, nil
+	case "fifo":
+		return QDiscFIFO, nil
+	default:
+		return 0, fmt.Errorf("unsupported qdisc specified: %q", s)
+	}
+}
+
+// String implements fmt.Stringer.
+func (q QueueingDiscipline) String() string {
+	switch q {
+	case QDiscNone:
+		return "none"
+	case QDiscFIFO:
+		return "fifo"
+	default:
+		panic(fmt.Sprintf("Invalid queueing discipline: %d", q))
+	}
+}
+
 // Config holds configuration that is not part of the runtime spec.
 type Config struct {
 	// RootDir is the runtime root directory.
@@ -253,6 +294,18 @@ type Config struct {
 	// representing the "same" file.
 	OverlayfsStaleRead bool
 
+	// CPUNumFromQuota sets CPU number count to available CPU quota, using
+	// least integer value greater than or equal to quota.
+	//
+	// E.g. 0.2 CPU quota will result in 1, and 1.9 in 2.
+	CPUNumFromQuota bool
+
+	// Enables VFS2 (not plumbled through yet).
+	VFS2 bool
+
+	// Enables FUSE usage (not plumbled through yet).
+	FUSE bool
+
 	// TestOnlyAllowRunAsCurrentUserWithoutChroot should only be used in
 	// tests. It allows runsc to start the sandbox process as the current
 	// user, and without chrooting the sandbox process. This can be
@@ -265,18 +318,6 @@ type Config struct {
 	// multiple tests are run in parallel, since there is no way to pass
 	// parameters to the runtime from docker.
 	TestOnlyTestNameEnv string
-
-	// CPUNumFromQuota sets CPU number count to available CPU quota, using
-	// least integer value greater than or equal to quota.
-	//
-	// E.g. 0.2 CPU quota will result in 1, and 1.9 in 2.
-	CPUNumFromQuota bool
-
-	// Enables VFS2 (not plumbled through yet).
-	VFS2 bool
-
-	// Enables FUSE usage (not plumbled through yet).
-	FUSE bool
 }
 
 // ToFlags returns a slice of flags that correspond to the given Config.
@@ -316,6 +357,13 @@ func (c *Config) ToFlags() []string {
 	if c.CPUNumFromQuota {
 		f = append(f, "--cpu-num-from-quota")
 	}
+	if c.VFS2 {
+		f = append(f, "--vfs2=true")
+	}
+	if c.FUSE {
+		f = append(f, "--fuse=true")
+	}
+
 	// Only include these if set since it is never to be used by users.
 	if c.TestOnlyAllowRunAsCurrentUserWithoutChroot {
 		f = append(f, "--TESTONLY-unsafe-nonroot=true")
@@ -324,13 +372,5 @@ func (c *Config) ToFlags() []string {
 		f = append(f, "--TESTONLY-test-name-env="+c.TestOnlyTestNameEnv)
 	}
 
-	if c.VFS2 {
-		f = append(f, "--vfs2=true")
-	}
-
-	if c.FUSE {
-		f = append(f, "--fuse=true")
-	}
-
 	return f
 }
diff --git a/runsc/container/BUILD b/runsc/container/BUILD
index 9a9ee7e2a6..c337554826 100644
--- a/runsc/container/BUILD
+++ b/runsc/container/BUILD
@@ -23,6 +23,7 @@ go_library(
         "//pkg/sync",
         "//runsc/boot",
         "//runsc/cgroup",
+        "//runsc/config",
         "//runsc/sandbox",
         "//runsc/specutils",
         "@com_github_cenkalti_backoff//:go_default_library",
@@ -65,6 +66,7 @@ go_test(
         "//pkg/urpc",
         "//runsc/boot",
         "//runsc/boot/platforms",
+        "//runsc/config",
         "//runsc/specutils",
         "@com_github_cenkalti_backoff//:go_default_library",
         "@com_github_kr_pty//:go_default_library",
diff --git a/runsc/container/container.go b/runsc/container/container.go
index 7ad09bf231..6e1d6a5680 100644
--- a/runsc/container/container.go
+++ b/runsc/container/container.go
@@ -37,6 +37,7 @@ import (
 	"gvisor.dev/gvisor/pkg/sentry/sighandling"
 	"gvisor.dev/gvisor/runsc/boot"
 	"gvisor.dev/gvisor/runsc/cgroup"
+	"gvisor.dev/gvisor/runsc/config"
 	"gvisor.dev/gvisor/runsc/sandbox"
 	"gvisor.dev/gvisor/runsc/specutils"
 )
@@ -269,7 +270,7 @@ type Args struct {
 // New creates the container in a new Sandbox process, unless the metadata
 // indicates that an existing Sandbox should be used. The caller must call
 // Destroy() on the container.
-func New(conf *boot.Config, args Args) (*Container, error) {
+func New(conf *config.Config, args Args) (*Container, error) {
 	log.Debugf("Create container %q in root dir: %s", args.ID, conf.RootDir)
 	if err := validateID(args.ID); err != nil {
 		return nil, err
@@ -397,7 +398,7 @@ func New(conf *boot.Config, args Args) (*Container, error) {
 }
 
 // Start starts running the containerized process inside the sandbox.
-func (c *Container) Start(conf *boot.Config) error {
+func (c *Container) Start(conf *config.Config) error {
 	log.Debugf("Start container %q", c.ID)
 
 	if err := c.Saver.lock(); err != nil {
@@ -472,7 +473,7 @@ func (c *Container) Start(conf *boot.Config) error {
 
 // Restore takes a container and replaces its kernel and file system
 // to restore a container from its state file.
-func (c *Container) Restore(spec *specs.Spec, conf *boot.Config, restoreFile string) error {
+func (c *Container) Restore(spec *specs.Spec, conf *config.Config, restoreFile string) error {
 	log.Debugf("Restore container %q", c.ID)
 	if err := c.Saver.lock(); err != nil {
 		return err
@@ -499,7 +500,7 @@ func (c *Container) Restore(spec *specs.Spec, conf *boot.Config, restoreFile str
 }
 
 // Run is a helper that calls Create + Start + Wait.
-func Run(conf *boot.Config, args Args) (syscall.WaitStatus, error) {
+func Run(conf *config.Config, args Args) (syscall.WaitStatus, error) {
 	log.Debugf("Run container %q in root dir: %s", args.ID, conf.RootDir)
 	c, err := New(conf, args)
 	if err != nil {
@@ -861,7 +862,7 @@ func (c *Container) waitForStopped() error {
 	return backoff.Retry(op, b)
 }
 
-func (c *Container) createGoferProcess(spec *specs.Spec, conf *boot.Config, bundleDir string, attached bool) ([]*os.File, *os.File, error) {
+func (c *Container) createGoferProcess(spec *specs.Spec, conf *config.Config, bundleDir string, attached bool) ([]*os.File, *os.File, error) {
 	// Start with the general config flags.
 	args := conf.ToFlags()
 
diff --git a/runsc/container/container_test.go b/runsc/container/container_test.go
index 5e8247bc80..6082068c75 100644
--- a/runsc/container/container_test.go
+++ b/runsc/container/container_test.go
@@ -41,8 +41,8 @@ import (
 	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
 	"gvisor.dev/gvisor/pkg/sync"
 	"gvisor.dev/gvisor/pkg/test/testutil"
-	"gvisor.dev/gvisor/runsc/boot"
 	"gvisor.dev/gvisor/runsc/boot/platforms"
+	"gvisor.dev/gvisor/runsc/config"
 	"gvisor.dev/gvisor/runsc/specutils"
 )
 
@@ -250,7 +250,7 @@ func readOutputNum(file string, position int) (int, error) {
 
 // run starts the sandbox and waits for it to exit, checking that the
 // application succeeded.
-func run(spec *specs.Spec, conf *boot.Config) error {
+func run(spec *specs.Spec, conf *config.Config) error {
 	_, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf)
 	if err != nil {
 		return fmt.Errorf("error setting up container: %v", err)
@@ -289,9 +289,9 @@ var (
 )
 
 // configs generates different configurations to run tests.
-func configs(t *testing.T, opts ...configOption) map[string]*boot.Config {
+func configs(t *testing.T, opts ...configOption) map[string]*config.Config {
 	// Always load the default config.
-	cs := make(map[string]*boot.Config)
+	cs := make(map[string]*config.Config)
 	for _, o := range opts {
 		switch o {
 		case overlay:
@@ -308,7 +308,7 @@ func configs(t *testing.T, opts ...configOption) map[string]*boot.Config {
 			cs["kvm"] = c
 		case nonExclusiveFS:
 			c := testutil.TestConfig(t)
-			c.FileAccess = boot.FileAccessShared
+			c.FileAccess = config.FileAccessShared
 			cs["non-exclusive"] = c
 		default:
 			panic(fmt.Sprintf("unknown config option %v", o))
@@ -317,7 +317,7 @@ func configs(t *testing.T, opts ...configOption) map[string]*boot.Config {
 	return cs
 }
 
-func configsWithVFS2(t *testing.T, opts ...configOption) map[string]*boot.Config {
+func configsWithVFS2(t *testing.T, opts ...configOption) map[string]*config.Config {
 	vfs1 := configs(t, opts...)
 
 	var optsVFS2 []configOption
diff --git a/runsc/container/multi_container_test.go b/runsc/container/multi_container_test.go
index e189648f41..1beea123fc 100644
--- a/runsc/container/multi_container_test.go
+++ b/runsc/container/multi_container_test.go
@@ -33,6 +33,7 @@ import (
 	"gvisor.dev/gvisor/pkg/sync"
 	"gvisor.dev/gvisor/pkg/test/testutil"
 	"gvisor.dev/gvisor/runsc/boot"
+	"gvisor.dev/gvisor/runsc/config"
 	"gvisor.dev/gvisor/runsc/specutils"
 )
 
@@ -60,7 +61,7 @@ func createSpecs(cmds ...[]string) ([]*specs.Spec, []string) {
 	return specs, ids
 }
 
-func startContainers(conf *boot.Config, specs []*specs.Spec, ids []string) ([]*Container, func(), error) {
+func startContainers(conf *config.Config, specs []*specs.Spec, ids []string) ([]*Container, func(), error) {
 	if len(conf.RootDir) == 0 {
 		panic("conf.RootDir not set. Call testutil.SetupRootDir() to set.")
 	}
diff --git a/runsc/container/shared_volume_test.go b/runsc/container/shared_volume_test.go
index bac177a88d..4ea8fefeea 100644
--- a/runsc/container/shared_volume_test.go
+++ b/runsc/container/shared_volume_test.go
@@ -25,14 +25,14 @@ import (
 	"gvisor.dev/gvisor/pkg/sentry/control"
 	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
 	"gvisor.dev/gvisor/pkg/test/testutil"
-	"gvisor.dev/gvisor/runsc/boot"
+	"gvisor.dev/gvisor/runsc/config"
 )
 
 // TestSharedVolume checks that modifications to a volume mount are propagated
 // into and out of the sandbox.
 func TestSharedVolume(t *testing.T) {
 	conf := testutil.TestConfig(t)
-	conf.FileAccess = boot.FileAccessShared
+	conf.FileAccess = config.FileAccessShared
 
 	// Main process just sleeps. We will use "exec" to probe the state of
 	// the filesystem.
@@ -189,7 +189,7 @@ func checkFile(c *Container, filename string, want []byte) error {
 // is reflected inside.
 func TestSharedVolumeFile(t *testing.T) {
 	conf := testutil.TestConfig(t)
-	conf.FileAccess = boot.FileAccessShared
+	conf.FileAccess = config.FileAccessShared
 
 	// Main process just sleeps. We will use "exec" to probe the state of
 	// the filesystem.
diff --git a/runsc/fsgofer/fsgofer_test.go b/runsc/fsgofer/fsgofer_test.go
index c91cfd094e..0e4945b3d6 100644
--- a/runsc/fsgofer/fsgofer_test.go
+++ b/runsc/fsgofer/fsgofer_test.go
@@ -52,8 +52,8 @@ func init() {
 	}
 }
 
-func configTestName(config *Config) string {
-	if config.ROMount {
+func configTestName(conf *Config) string {
+	if conf.ROMount {
 		return "ROMount"
 	}
 	return "RWMount"
diff --git a/runsc/main.go b/runsc/main.go
index 69cb505fa4..c2ffecbdc7 100644
--- a/runsc/main.go
+++ b/runsc/main.go
@@ -32,8 +32,8 @@ import (
 	"gvisor.dev/gvisor/pkg/log"
 	"gvisor.dev/gvisor/pkg/refs"
 	"gvisor.dev/gvisor/pkg/sentry/platform"
-	"gvisor.dev/gvisor/runsc/boot"
 	"gvisor.dev/gvisor/runsc/cmd"
+	"gvisor.dev/gvisor/runsc/config"
 	"gvisor.dev/gvisor/runsc/flag"
 	"gvisor.dev/gvisor/runsc/specutils"
 )
@@ -174,21 +174,21 @@ func main() {
 		cmd.Fatalf("%v", err)
 	}
 
-	fsAccess, err := boot.MakeFileAccessType(*fileAccess)
+	fsAccess, err := config.MakeFileAccessType(*fileAccess)
 	if err != nil {
 		cmd.Fatalf("%v", err)
 	}
 
-	if fsAccess == boot.FileAccessShared && *overlay {
+	if fsAccess == config.FileAccessShared && *overlay {
 		cmd.Fatalf("overlay flag is incompatible with shared file access")
 	}
 
-	netType, err := boot.MakeNetworkType(*network)
+	netType, err := config.MakeNetworkType(*network)
 	if err != nil {
 		cmd.Fatalf("%v", err)
 	}
 
-	wa, err := boot.MakeWatchdogAction(*watchdogAction)
+	wa, err := config.MakeWatchdogAction(*watchdogAction)
 	if err != nil {
 		cmd.Fatalf("%v", err)
 	}
@@ -197,12 +197,12 @@ func main() {
 		cmd.Fatalf("num_network_channels must be > 0, got: %d", *numNetworkChannels)
 	}
 
-	refsLeakMode, err := boot.MakeRefsLeakMode(*referenceLeakMode)
+	refsLeakMode, err := config.MakeRefsLeakMode(*referenceLeakMode)
 	if err != nil {
 		cmd.Fatalf("%v", err)
 	}
 
-	queueingDiscipline, err := boot.MakeQueueingDiscipline(*qDisc)
+	queueingDiscipline, err := config.MakeQueueingDiscipline(*qDisc)
 	if err != nil {
 		cmd.Fatalf("%s", err)
 	}
@@ -212,7 +212,7 @@ func main() {
 	refs.SetLeakMode(refsLeakMode)
 
 	// Create a new Config from the flags.
-	conf := &boot.Config{
+	conf := &config.Config{
 		RootDir:            *rootDir,
 		Debug:              *debug,
 		LogFilename:        *logFilename,
diff --git a/runsc/sandbox/BUILD b/runsc/sandbox/BUILD
index 2b9d4549d2..f0a551a1e1 100644
--- a/runsc/sandbox/BUILD
+++ b/runsc/sandbox/BUILD
@@ -26,6 +26,7 @@ go_library(
         "//runsc/boot",
         "//runsc/boot/platforms",
         "//runsc/cgroup",
+        "//runsc/config",
         "//runsc/console",
         "//runsc/specutils",
         "@com_github_cenkalti_backoff//:go_default_library",
diff --git a/runsc/sandbox/network.go b/runsc/sandbox/network.go
index 817a923ad8..f9abb2d441 100644
--- a/runsc/sandbox/network.go
+++ b/runsc/sandbox/network.go
@@ -31,6 +31,7 @@ import (
 	"gvisor.dev/gvisor/pkg/tcpip/stack"
 	"gvisor.dev/gvisor/pkg/urpc"
 	"gvisor.dev/gvisor/runsc/boot"
+	"gvisor.dev/gvisor/runsc/config"
 	"gvisor.dev/gvisor/runsc/specutils"
 )
 
@@ -49,23 +50,23 @@ import (
 //
 // Run the following container to test it:
 //  docker run -di --runtime=runsc -p 8080:80 -v $PWD:/usr/local/apache2/htdocs/ httpd:2.4
-func setupNetwork(conn *urpc.Client, pid int, spec *specs.Spec, conf *boot.Config) error {
+func setupNetwork(conn *urpc.Client, pid int, spec *specs.Spec, conf *config.Config) error {
 	log.Infof("Setting up network")
 
 	switch conf.Network {
-	case boot.NetworkNone:
+	case config.NetworkNone:
 		log.Infof("Network is disabled, create loopback interface only")
 		if err := createDefaultLoopbackInterface(conn); err != nil {
 			return fmt.Errorf("creating default loopback interface: %v", err)
 		}
-	case boot.NetworkSandbox:
+	case config.NetworkSandbox:
 		// Build the path to the net namespace of the sandbox process.
 		// This is what we will copy.
 		nsPath := filepath.Join("/proc", strconv.Itoa(pid), "ns/net")
 		if err := createInterfacesAndRoutesFromNS(conn, nsPath, conf.HardwareGSO, conf.SoftwareGSO, conf.TXChecksumOffload, conf.RXChecksumOffload, conf.NumNetworkChannels, conf.QDisc); err != nil {
 			return fmt.Errorf("creating interfaces from net namespace %q: %v", nsPath, err)
 		}
-	case boot.NetworkHost:
+	case config.NetworkHost:
 		// Nothing to do here.
 	default:
 		return fmt.Errorf("invalid network type: %d", conf.Network)
@@ -115,7 +116,7 @@ func isRootNS() (bool, error) {
 // createInterfacesAndRoutesFromNS scrapes the interface and routes from the
 // net namespace with the given path, creates them in the sandbox, and removes
 // them from the host.
-func createInterfacesAndRoutesFromNS(conn *urpc.Client, nsPath string, hardwareGSO bool, softwareGSO bool, txChecksumOffload bool, rxChecksumOffload bool, numNetworkChannels int, qDisc boot.QueueingDiscipline) error {
+func createInterfacesAndRoutesFromNS(conn *urpc.Client, nsPath string, hardwareGSO bool, softwareGSO bool, txChecksumOffload bool, rxChecksumOffload bool, numNetworkChannels int, qDisc config.QueueingDiscipline) error {
 	// Join the network namespace that we will be copying.
 	restore, err := joinNetNS(nsPath)
 	if err != nil {
diff --git a/runsc/sandbox/sandbox.go b/runsc/sandbox/sandbox.go
index 36bb0c9c98..a339937fb6 100644
--- a/runsc/sandbox/sandbox.go
+++ b/runsc/sandbox/sandbox.go
@@ -41,6 +41,7 @@ import (
 	"gvisor.dev/gvisor/runsc/boot"
 	"gvisor.dev/gvisor/runsc/boot/platforms"
 	"gvisor.dev/gvisor/runsc/cgroup"
+	"gvisor.dev/gvisor/runsc/config"
 	"gvisor.dev/gvisor/runsc/console"
 	"gvisor.dev/gvisor/runsc/specutils"
 )
@@ -116,7 +117,7 @@ type Args struct {
 
 // New creates the sandbox process. The caller must call Destroy() on the
 // sandbox.
-func New(conf *boot.Config, args *Args) (*Sandbox, error) {
+func New(conf *config.Config, args *Args) (*Sandbox, error) {
 	s := &Sandbox{ID: args.ID, Cgroup: args.Cgroup}
 	// The Cleanup object cleans up partially created sandboxes when an error
 	// occurs. Any errors occurring during cleanup itself are ignored.
@@ -180,7 +181,7 @@ func (s *Sandbox) CreateContainer(cid string) error {
 }
 
 // StartRoot starts running the root container process inside the sandbox.
-func (s *Sandbox) StartRoot(spec *specs.Spec, conf *boot.Config) error {
+func (s *Sandbox) StartRoot(spec *specs.Spec, conf *config.Config) error {
 	log.Debugf("Start root sandbox %q, PID: %d", s.ID, s.Pid)
 	conn, err := s.sandboxConnect()
 	if err != nil {
@@ -203,7 +204,7 @@ func (s *Sandbox) StartRoot(spec *specs.Spec, conf *boot.Config) error {
 }
 
 // StartContainer starts running a non-root container inside the sandbox.
-func (s *Sandbox) StartContainer(spec *specs.Spec, conf *boot.Config, cid string, goferFiles []*os.File) error {
+func (s *Sandbox) StartContainer(spec *specs.Spec, conf *config.Config, cid string, goferFiles []*os.File) error {
 	for _, f := range goferFiles {
 		defer f.Close()
 	}
@@ -232,7 +233,7 @@ func (s *Sandbox) StartContainer(spec *specs.Spec, conf *boot.Config, cid string
 }
 
 // Restore sends the restore call for a container in the sandbox.
-func (s *Sandbox) Restore(cid string, spec *specs.Spec, conf *boot.Config, filename string) error {
+func (s *Sandbox) Restore(cid string, spec *specs.Spec, conf *config.Config, filename string) error {
 	log.Debugf("Restore sandbox %q", s.ID)
 
 	rf, err := os.Open(filename)
@@ -344,7 +345,7 @@ func (s *Sandbox) connError(err error) error {
 
 // createSandboxProcess starts the sandbox as a subprocess by running the "boot"
 // command, passing in the bundle dir.
-func (s *Sandbox) createSandboxProcess(conf *boot.Config, args *Args, startSyncFile *os.File) error {
+func (s *Sandbox) createSandboxProcess(conf *config.Config, args *Args, startSyncFile *os.File) error {
 	// nextFD is used to get unused FDs that we can pass to the sandbox.  It
 	// starts at 3 because 0, 1, and 2 are taken by stdin/out/err.
 	nextFD := 3
@@ -555,10 +556,10 @@ func (s *Sandbox) createSandboxProcess(conf *boot.Config, args *Args, startSyncF
 	// Joins the network namespace if network is enabled. the sandbox talks
 	// directly to the host network, which may have been configured in the
 	// namespace.
-	if ns, ok := specutils.GetNS(specs.NetworkNamespace, args.Spec); ok && conf.Network != boot.NetworkNone {
+	if ns, ok := specutils.GetNS(specs.NetworkNamespace, args.Spec); ok && conf.Network != config.NetworkNone {
 		log.Infof("Sandbox will be started in the container's network namespace: %+v", ns)
 		nss = append(nss, ns)
-	} else if conf.Network == boot.NetworkHost {
+	} else if conf.Network == config.NetworkHost {
 		log.Infof("Sandbox will be started in the host network namespace")
 	} else {
 		log.Infof("Sandbox will be started in new network namespace")
@@ -568,7 +569,7 @@ func (s *Sandbox) createSandboxProcess(conf *boot.Config, args *Args, startSyncF
 	// User namespace depends on the network type. Host network requires to run
 	// inside the user namespace specified in the spec or the current namespace
 	// if none is configured.
-	if conf.Network == boot.NetworkHost {
+	if conf.Network == config.NetworkHost {
 		if userns, ok := specutils.GetNS(specs.UserNamespace, args.Spec); ok {
 			log.Infof("Sandbox will be started in container's user namespace: %+v", userns)
 			nss = append(nss, userns)
@@ -1179,7 +1180,7 @@ func deviceFileForPlatform(name string) (*os.File, error) {
 
 // checkBinaryPermissions verifies that the required binary bits are set on
 // the runsc executable.
-func checkBinaryPermissions(conf *boot.Config) error {
+func checkBinaryPermissions(conf *config.Config) error {
 	// All platforms need the other exe bit
 	neededBits := os.FileMode(0001)
 	if conf.Platform == platforms.Ptrace {
diff --git a/website/blog/2019-11-18-security-basics.md b/website/blog/2019-11-18-security-basics.md
index 76bbabc13f..2256ee9d5b 100644
--- a/website/blog/2019-11-18-security-basics.md
+++ b/website/blog/2019-11-18-security-basics.md
@@ -188,7 +188,7 @@ for direct access to some files. And most files will be remotely accessed
 through the Gofers, in which case no FDs are donated to the Sentry.
 
 The Sentry itself is only allowed access to specific
-[whitelisted syscalls](https://github.com/google/gvisor/blob/master/runsc/boot/config.go).
+[whitelisted syscalls](https://github.com/google/gvisor/blob/master/runsc/config/config.go).
 Without networking, the Sentry needs 53 host syscalls in order to function, and
 with networking, it uses an additional 15[^8]. By limiting the whitelist to only
 these needed syscalls, we radically reduce the amount of host OS attack surface.

From c0ae8604b524b80d77a1596fded05ef09d1f76fd Mon Sep 17 00:00:00 2001
From: Nicolas Lacasse <nlacasse@google.com>
Date: Thu, 20 Aug 2020 10:40:26 -0700
Subject: [PATCH 037/211] Fix tabs in lock-ordering doc.

PiperOrigin-RevId: 327654207
---
 pkg/sentry/vfs/vfs.go | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pkg/sentry/vfs/vfs.go b/pkg/sentry/vfs/vfs.go
index 9c2420683c..8a79e13257 100644
--- a/pkg/sentry/vfs/vfs.go
+++ b/pkg/sentry/vfs/vfs.go
@@ -24,9 +24,9 @@
 //           Locks acquired by FilesystemImpls between Prepare{Delete,Rename}Dentry and Commit{Delete,Rename*}Dentry
 //         VirtualFilesystem.filesystemsMu
 //       EpollInstance.mu
-//		   Inotify.mu
-// 		     Watches.mu
-//  		     Inotify.evMu
+//       Inotify.mu
+//         Watches.mu
+//           Inotify.evMu
 // VirtualFilesystem.fsTypesMu
 //
 // Locking Dentry.mu in multiple Dentries requires holding

From 78cc2396bb1b3d89c4606fa95a77b151bb529c96 Mon Sep 17 00:00:00 2001
From: Bhasker Hariharan <bhaskerh@google.com>
Date: Thu, 20 Aug 2020 11:05:37 -0700
Subject: [PATCH 038/211] Use a explicit random src for RandomID.

PiperOrigin-RevId: 327659759
---
 pkg/test/testutil/testutil.go | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/pkg/test/testutil/testutil.go b/pkg/test/testutil/testutil.go
index 3cb6c6814f..42d79f5c2a 100644
--- a/pkg/test/testutil/testutil.go
+++ b/pkg/test/testutil/testutil.go
@@ -243,12 +243,15 @@ func writeSpec(dir string, spec *specs.Spec) error {
 	return ioutil.WriteFile(filepath.Join(dir, "config.json"), b, 0755)
 }
 
+// idRandomSrc is a pseudo random generator used to in RandomID.
+var idRandomSrc = rand.New(rand.NewSource(time.Now().UnixNano()))
+
 // RandomID returns 20 random bytes following the given prefix.
 func RandomID(prefix string) string {
 	// Read 20 random bytes.
 	b := make([]byte, 20)
 	// "[Read] always returns len(p) and a nil error." --godoc
-	if _, err := rand.Read(b); err != nil {
+	if _, err := idRandomSrc.Read(b); err != nil {
 		panic("rand.Read failed: " + err.Error())
 	}
 	if prefix != "" {

From bcd92e97513c0bfa6255f21a7330e18b5e8c7f1e Mon Sep 17 00:00:00 2001
From: Arthur Sfez <asfez@google.com>
Date: Thu, 20 Aug 2020 12:04:36 -0700
Subject: [PATCH 039/211] Only use the NextHeader value of the first IPv6
 fragment extension header.

As per RFC 8200 Section 4.5:
  The Next Header field of the last header of the Per-Fragment
  headers is obtained from the Next Header field of the first
  fragment's Fragment header.

Test:
  - pkg/tcpip/network/ipv6:ipv6_test
  - pkg/tcpip/network/ipv4:ipv4_test
  - pkg/tcpip/network/fragmentation:fragmentation_test

Updates #2197

PiperOrigin-RevId: 327671635
---
 pkg/tcpip/network/fragmentation/BUILD         |  4 +-
 .../network/fragmentation/fragmentation.go    | 25 +++++---
 .../fragmentation/fragmentation_test.go       | 57 +++++++++++++------
 .../network/fragmentation/reassembler.go      | 23 ++++++--
 pkg/tcpip/network/ipv4/ipv4.go                |  6 +-
 pkg/tcpip/network/ipv6/ipv6.go                | 10 ++--
 pkg/tcpip/network/ipv6/ipv6_test.go           | 40 +++++++++++++
 7 files changed, 125 insertions(+), 40 deletions(-)

diff --git a/pkg/tcpip/network/fragmentation/BUILD b/pkg/tcpip/network/fragmentation/BUILD
index d1c728ccf4..96c5f42f8a 100644
--- a/pkg/tcpip/network/fragmentation/BUILD
+++ b/pkg/tcpip/network/fragmentation/BUILD
@@ -41,5 +41,7 @@ go_test(
         "reassembler_test.go",
     ],
     library = ":fragmentation",
-    deps = ["//pkg/tcpip/buffer"],
+    deps = [
+        "//pkg/tcpip/buffer",
+    ],
 )
diff --git a/pkg/tcpip/network/fragmentation/fragmentation.go b/pkg/tcpip/network/fragmentation/fragmentation.go
index 1827666c59..6a4843f926 100644
--- a/pkg/tcpip/network/fragmentation/fragmentation.go
+++ b/pkg/tcpip/network/fragmentation/fragmentation.go
@@ -120,29 +120,36 @@ func NewFragmentation(blockSize uint16, highMemoryLimit, lowMemoryLimit int, rea
 }
 
 // Process processes an incoming fragment belonging to an ID and returns a
-// complete packet when all the packets belonging to that ID have been received.
+// complete packet and its protocol number when all the packets belonging to
+// that ID have been received.
 //
 // [first, last] is the range of the fragment bytes.
 //
 // first must be a multiple of the block size f is configured with. The size
 // of the fragment data must be a multiple of the block size, unless there are
 // no fragments following this fragment (more set to false).
-func (f *Fragmentation) Process(id FragmentID, first, last uint16, more bool, vv buffer.VectorisedView) (buffer.VectorisedView, bool, error) {
+//
+// proto is the protocol number marked in the fragment being processed. It has
+// to be given here outside of the FragmentID struct because IPv6 should not use
+// the protocol to identify a fragment.
+func (f *Fragmentation) Process(
+	id FragmentID, first, last uint16, more bool, proto uint8, vv buffer.VectorisedView) (
+	buffer.VectorisedView, uint8, bool, error) {
 	if first > last {
-		return buffer.VectorisedView{}, false, fmt.Errorf("first=%d is greater than last=%d: %w", first, last, ErrInvalidArgs)
+		return buffer.VectorisedView{}, 0, false, fmt.Errorf("first=%d is greater than last=%d: %w", first, last, ErrInvalidArgs)
 	}
 
 	if first%f.blockSize != 0 {
-		return buffer.VectorisedView{}, false, fmt.Errorf("first=%d is not a multiple of block size=%d: %w", first, f.blockSize, ErrInvalidArgs)
+		return buffer.VectorisedView{}, 0, false, fmt.Errorf("first=%d is not a multiple of block size=%d: %w", first, f.blockSize, ErrInvalidArgs)
 	}
 
 	fragmentSize := last - first + 1
 	if more && fragmentSize%f.blockSize != 0 {
-		return buffer.VectorisedView{}, false, fmt.Errorf("fragment size=%d bytes is not a multiple of block size=%d on non-final fragment: %w", fragmentSize, f.blockSize, ErrInvalidArgs)
+		return buffer.VectorisedView{}, 0, false, fmt.Errorf("fragment size=%d bytes is not a multiple of block size=%d on non-final fragment: %w", fragmentSize, f.blockSize, ErrInvalidArgs)
 	}
 
 	if l := vv.Size(); l < int(fragmentSize) {
-		return buffer.VectorisedView{}, false, fmt.Errorf("got fragment size=%d bytes less than the expected fragment size=%d bytes (first=%d last=%d): %w", l, fragmentSize, first, last, ErrInvalidArgs)
+		return buffer.VectorisedView{}, 0, false, fmt.Errorf("got fragment size=%d bytes less than the expected fragment size=%d bytes (first=%d last=%d): %w", l, fragmentSize, first, last, ErrInvalidArgs)
 	}
 	vv.CapLength(int(fragmentSize))
 
@@ -160,14 +167,14 @@ func (f *Fragmentation) Process(id FragmentID, first, last uint16, more bool, vv
 	}
 	f.mu.Unlock()
 
-	res, done, consumed, err := r.process(first, last, more, vv)
+	res, firstFragmentProto, done, consumed, err := r.process(first, last, more, proto, vv)
 	if err != nil {
 		// We probably got an invalid sequence of fragments. Just
 		// discard the reassembler and move on.
 		f.mu.Lock()
 		f.release(r)
 		f.mu.Unlock()
-		return buffer.VectorisedView{}, false, fmt.Errorf("fragmentation processing error: %v", err)
+		return buffer.VectorisedView{}, 0, false, fmt.Errorf("fragmentation processing error: %w", err)
 	}
 	f.mu.Lock()
 	f.size += consumed
@@ -186,7 +193,7 @@ func (f *Fragmentation) Process(id FragmentID, first, last uint16, more bool, vv
 		}
 	}
 	f.mu.Unlock()
-	return res, done, nil
+	return res, firstFragmentProto, done, nil
 }
 
 func (f *Fragmentation) release(r *reassembler) {
diff --git a/pkg/tcpip/network/fragmentation/fragmentation_test.go b/pkg/tcpip/network/fragmentation/fragmentation_test.go
index 9eedd33c4a..416604659e 100644
--- a/pkg/tcpip/network/fragmentation/fragmentation_test.go
+++ b/pkg/tcpip/network/fragmentation/fragmentation_test.go
@@ -38,12 +38,14 @@ type processInput struct {
 	first uint16
 	last  uint16
 	more  bool
+	proto uint8
 	vv    buffer.VectorisedView
 }
 
 type processOutput struct {
-	vv   buffer.VectorisedView
-	done bool
+	vv    buffer.VectorisedView
+	proto uint8
+	done  bool
 }
 
 var processTestCases = []struct {
@@ -62,6 +64,17 @@ var processTestCases = []struct {
 			{vv: vv(4, "01", "23"), done: true},
 		},
 	},
+	{
+		comment: "Next Header protocol mismatch",
+		in: []processInput{
+			{id: FragmentID{ID: 0}, first: 0, last: 1, more: true, proto: 6, vv: vv(2, "01")},
+			{id: FragmentID{ID: 0}, first: 2, last: 3, more: false, proto: 17, vv: vv(2, "23")},
+		},
+		out: []processOutput{
+			{vv: buffer.VectorisedView{}, done: false},
+			{vv: vv(4, "01", "23"), proto: 6, done: true},
+		},
+	},
 	{
 		comment: "Two IDs",
 		in: []processInput{
@@ -83,18 +96,26 @@ func TestFragmentationProcess(t *testing.T) {
 	for _, c := range processTestCases {
 		t.Run(c.comment, func(t *testing.T) {
 			f := NewFragmentation(minBlockSize, 1024, 512, DefaultReassembleTimeout)
+			firstFragmentProto := c.in[0].proto
 			for i, in := range c.in {
-				vv, done, err := f.Process(in.id, in.first, in.last, in.more, in.vv)
+				vv, proto, done, err := f.Process(in.id, in.first, in.last, in.more, in.proto, in.vv)
 				if err != nil {
-					t.Fatalf("f.Process(%+v, %+d, %+d, %t, %+v) failed: %v", in.id, in.first, in.last, in.more, in.vv, err)
+					t.Fatalf("f.Process(%+v, %d, %d, %t, %d, %X) failed: %s",
+						in.id, in.first, in.last, in.more, in.proto, in.vv.ToView(), err)
 				}
 				if !reflect.DeepEqual(vv, c.out[i].vv) {
-					t.Errorf("got Process(%d) = %+v, want = %+v", i, vv, c.out[i].vv)
+					t.Errorf("got Process(%+v, %d, %d, %t, %d, %X) = (%X, _, _, _), want = (%X, _, _, _)",
+						in.id, in.first, in.last, in.more, in.proto, in.vv.ToView(), vv.ToView(), c.out[i].vv.ToView())
 				}
 				if done != c.out[i].done {
-					t.Errorf("got Process(%d) = %+v, want = %+v", i, done, c.out[i].done)
+					t.Errorf("got Process(%+v, %d, %d, %t, %d, _) = (_, _, %t, _), want = (_, _, %t, _)",
+						in.id, in.first, in.last, in.more, in.proto, done, c.out[i].done)
 				}
 				if c.out[i].done {
+					if firstFragmentProto != proto {
+						t.Errorf("got Process(%+v, %d, %d, %t, %d, _) = (_, %d, _, _), want = (_, %d, _, _)",
+							in.id, in.first, in.last, in.more, in.proto, proto, firstFragmentProto)
+					}
 					if _, ok := f.reassemblers[in.id]; ok {
 						t.Errorf("Process(%d) did not remove buffer from reassemblers", i)
 					}
@@ -113,14 +134,14 @@ func TestReassemblingTimeout(t *testing.T) {
 	timeout := time.Millisecond
 	f := NewFragmentation(minBlockSize, 1024, 512, timeout)
 	// Send first fragment with id = 0, first = 0, last = 0, and more = true.
-	f.Process(FragmentID{}, 0, 0, true, vv(1, "0"))
+	f.Process(FragmentID{}, 0, 0, true, 0xFF, vv(1, "0"))
 	// Sleep more than the timeout.
 	time.Sleep(2 * timeout)
 	// Send another fragment that completes a packet.
 	// However, no packet should be reassembled because the fragment arrived after the timeout.
-	_, done, err := f.Process(FragmentID{}, 1, 1, false, vv(1, "1"))
+	_, _, done, err := f.Process(FragmentID{}, 1, 1, false, 0xFF, vv(1, "1"))
 	if err != nil {
-		t.Fatalf("f.Process(0, 1, 1, false, vv(1, \"1\")) failed: %v", err)
+		t.Fatalf("f.Process(0, 1, 1, false, 0xFF, vv(1, \"1\")) failed: %v", err)
 	}
 	if done {
 		t.Errorf("Fragmentation does not respect the reassembling timeout.")
@@ -130,15 +151,15 @@ func TestReassemblingTimeout(t *testing.T) {
 func TestMemoryLimits(t *testing.T) {
 	f := NewFragmentation(minBlockSize, 3, 1, DefaultReassembleTimeout)
 	// Send first fragment with id = 0.
-	f.Process(FragmentID{ID: 0}, 0, 0, true, vv(1, "0"))
+	f.Process(FragmentID{ID: 0}, 0, 0, true, 0xFF, vv(1, "0"))
 	// Send first fragment with id = 1.
-	f.Process(FragmentID{ID: 1}, 0, 0, true, vv(1, "1"))
+	f.Process(FragmentID{ID: 1}, 0, 0, true, 0xFF, vv(1, "1"))
 	// Send first fragment with id = 2.
-	f.Process(FragmentID{ID: 2}, 0, 0, true, vv(1, "2"))
+	f.Process(FragmentID{ID: 2}, 0, 0, true, 0xFF, vv(1, "2"))
 
 	// Send first fragment with id = 3. This should caused id = 0 and id = 1 to be
 	// evicted.
-	f.Process(FragmentID{ID: 3}, 0, 0, true, vv(1, "3"))
+	f.Process(FragmentID{ID: 3}, 0, 0, true, 0xFF, vv(1, "3"))
 
 	if _, ok := f.reassemblers[FragmentID{ID: 0}]; ok {
 		t.Errorf("Memory limits are not respected: id=0 has not been evicted.")
@@ -154,9 +175,9 @@ func TestMemoryLimits(t *testing.T) {
 func TestMemoryLimitsIgnoresDuplicates(t *testing.T) {
 	f := NewFragmentation(minBlockSize, 1, 0, DefaultReassembleTimeout)
 	// Send first fragment with id = 0.
-	f.Process(FragmentID{}, 0, 0, true, vv(1, "0"))
+	f.Process(FragmentID{}, 0, 0, true, 0xFF, vv(1, "0"))
 	// Send the same packet again.
-	f.Process(FragmentID{}, 0, 0, true, vv(1, "0"))
+	f.Process(FragmentID{}, 0, 0, true, 0xFF, vv(1, "0"))
 
 	got := f.size
 	want := 1
@@ -248,12 +269,12 @@ func TestErrors(t *testing.T) {
 	for _, test := range tests {
 		t.Run(test.name, func(t *testing.T) {
 			f := NewFragmentation(test.blockSize, HighFragThreshold, LowFragThreshold, DefaultReassembleTimeout)
-			_, done, err := f.Process(FragmentID{}, test.first, test.last, test.more, vv(len(test.data), test.data))
+			_, _, done, err := f.Process(FragmentID{}, test.first, test.last, test.more, 0, vv(len(test.data), test.data))
 			if !errors.Is(err, test.err) {
-				t.Errorf("got Proceess(_, %d, %d, %t, %q) = (_, _, %v), want = (_, _, %v)", test.first, test.last, test.more, test.data, err, test.err)
+				t.Errorf("got Process(_, %d, %d, %t, _, %q) = (_, _, _, %v), want = (_, _, _, %v)", test.first, test.last, test.more, test.data, err, test.err)
 			}
 			if done {
-				t.Errorf("got Proceess(_, %d, %d, %t, %q) = (_, true, _), want = (_, false, _)", test.first, test.last, test.more, test.data)
+				t.Errorf("got Process(_, %d, %d, %t, _, %q) = (_, _, true, _), want = (_, _, false, _)", test.first, test.last, test.more, test.data)
 			}
 		})
 	}
diff --git a/pkg/tcpip/network/fragmentation/reassembler.go b/pkg/tcpip/network/fragmentation/reassembler.go
index 50d30bbf09..f044867dc4 100644
--- a/pkg/tcpip/network/fragmentation/reassembler.go
+++ b/pkg/tcpip/network/fragmentation/reassembler.go
@@ -34,6 +34,7 @@ type reassembler struct {
 	reassemblerEntry
 	id           FragmentID
 	size         int
+	proto        uint8
 	mu           sync.Mutex
 	holes        []hole
 	deleted      int
@@ -46,7 +47,6 @@ func newReassembler(id FragmentID) *reassembler {
 	r := &reassembler{
 		id:           id,
 		holes:        make([]hole, 0, 16),
-		deleted:      0,
 		heap:         make(fragHeap, 0, 8),
 		creationTime: time.Now(),
 	}
@@ -78,7 +78,7 @@ func (r *reassembler) updateHoles(first, last uint16, more bool) bool {
 	return used
 }
 
-func (r *reassembler) process(first, last uint16, more bool, vv buffer.VectorisedView) (buffer.VectorisedView, bool, int, error) {
+func (r *reassembler) process(first, last uint16, more bool, proto uint8, vv buffer.VectorisedView) (buffer.VectorisedView, uint8, bool, int, error) {
 	r.mu.Lock()
 	defer r.mu.Unlock()
 	consumed := 0
@@ -86,7 +86,18 @@ func (r *reassembler) process(first, last uint16, more bool, vv buffer.Vectorise
 		// A concurrent goroutine might have already reassembled
 		// the packet and emptied the heap while this goroutine
 		// was waiting on the mutex. We don't have to do anything in this case.
-		return buffer.VectorisedView{}, false, consumed, nil
+		return buffer.VectorisedView{}, 0, false, consumed, nil
+	}
+	// For IPv6, it is possible to have different Protocol values between
+	// fragments of a packet (because, unlike IPv4, the Protocol is not used to
+	// identify a fragment). In this case, only the Protocol of the first
+	// fragment must be used as per RFC 8200 Section 4.5.
+	//
+	// TODO(gvisor.dev/issue/3648): The entire first IP header should be recorded
+	// here (instead of just the protocol) because most IP options should be
+	// derived from the first fragment.
+	if first == 0 {
+		r.proto = proto
 	}
 	if r.updateHoles(first, last, more) {
 		// We store the incoming packet only if it filled some holes.
@@ -96,13 +107,13 @@ func (r *reassembler) process(first, last uint16, more bool, vv buffer.Vectorise
 	}
 	// Check if all the holes have been deleted and we are ready to reassamble.
 	if r.deleted < len(r.holes) {
-		return buffer.VectorisedView{}, false, consumed, nil
+		return buffer.VectorisedView{}, 0, false, consumed, nil
 	}
 	res, err := r.heap.reassemble()
 	if err != nil {
-		return buffer.VectorisedView{}, false, consumed, fmt.Errorf("fragment reassembly failed: %v", err)
+		return buffer.VectorisedView{}, 0, false, consumed, fmt.Errorf("fragment reassembly failed: %w", err)
 	}
-	return res, true, consumed, nil
+	return res, r.proto, true, consumed, nil
 }
 
 func (r *reassembler) tooOld(timeout time.Duration) bool {
diff --git a/pkg/tcpip/network/ipv4/ipv4.go b/pkg/tcpip/network/ipv4/ipv4.go
index 79872ec9a0..63ffb36608 100644
--- a/pkg/tcpip/network/ipv4/ipv4.go
+++ b/pkg/tcpip/network/ipv4/ipv4.go
@@ -415,18 +415,20 @@ func (e *endpoint) HandlePacket(r *stack.Route, pkt *stack.PacketBuffer) {
 		}
 		var ready bool
 		var err error
-		pkt.Data, ready, err = e.protocol.fragmentation.Process(
+		proto := h.Protocol()
+		pkt.Data, _, ready, err = e.protocol.fragmentation.Process(
 			// As per RFC 791 section 2.3, the identification value is unique
 			// for a source-destination pair and protocol.
 			fragmentation.FragmentID{
 				Source:      h.SourceAddress(),
 				Destination: h.DestinationAddress(),
 				ID:          uint32(h.ID()),
-				Protocol:    h.Protocol(),
+				Protocol:    proto,
 			},
 			h.FragmentOffset(),
 			last,
 			h.More(),
+			proto,
 			pkt.Data,
 		)
 		if err != nil {
diff --git a/pkg/tcpip/network/ipv6/ipv6.go b/pkg/tcpip/network/ipv6/ipv6.go
index 0eafe97900..267d2cce8e 100644
--- a/pkg/tcpip/network/ipv6/ipv6.go
+++ b/pkg/tcpip/network/ipv6/ipv6.go
@@ -321,10 +321,9 @@ func (e *endpoint) HandlePacket(r *stack.Route, pkt *stack.PacketBuffer) {
 				return
 			}
 
-			var ready bool
 			// Note that pkt doesn't have its transport header set after reassembly,
 			// and won't until DeliverNetworkPacket sets it.
-			pkt.Data, ready, err = e.protocol.fragmentation.Process(
+			data, proto, ready, err := e.protocol.fragmentation.Process(
 				// IPv6 ignores the Protocol field since the ID only needs to be unique
 				// across source-destination pairs, as per RFC 8200 section 4.5.
 				fragmentation.FragmentID{
@@ -335,6 +334,7 @@ func (e *endpoint) HandlePacket(r *stack.Route, pkt *stack.PacketBuffer) {
 				start,
 				last,
 				extHdr.More(),
+				uint8(rawPayload.Identifier),
 				rawPayload.Buf,
 			)
 			if err != nil {
@@ -342,12 +342,14 @@ func (e *endpoint) HandlePacket(r *stack.Route, pkt *stack.PacketBuffer) {
 				r.Stats().IP.MalformedFragmentsReceived.Increment()
 				return
 			}
+			pkt.Data = data
 
 			if ready {
 				// We create a new iterator with the reassembled packet because we could
 				// have more extension headers in the reassembled payload, as per RFC
-				// 8200 section 4.5.
-				it = header.MakeIPv6PayloadIterator(rawPayload.Identifier, pkt.Data)
+				// 8200 section 4.5. We also use the NextHeader value from the first
+				// fragment.
+				it = header.MakeIPv6PayloadIterator(header.IPv6ExtensionHeaderIdentifier(proto), pkt.Data)
 			}
 
 		case header.IPv6DestinationOptionsExtHdr:
diff --git a/pkg/tcpip/network/ipv6/ipv6_test.go b/pkg/tcpip/network/ipv6/ipv6_test.go
index 0a183bfdee..54787198f7 100644
--- a/pkg/tcpip/network/ipv6/ipv6_test.go
+++ b/pkg/tcpip/network/ipv6/ipv6_test.go
@@ -865,6 +865,46 @@ func TestReceiveIPv6Fragments(t *testing.T) {
 			},
 			expectedPayloads: [][]byte{udpPayload1Addr1ToAddr2},
 		},
+		{
+			name: "Two fragments with different Next Header values",
+			fragments: []fragmentData{
+				{
+					srcAddr: addr1,
+					dstAddr: addr2,
+					nextHdr: fragmentExtHdrID,
+					data: buffer.NewVectorisedView(
+						fragmentExtHdrLen+64,
+						[]buffer.View{
+							// Fragment extension header.
+							//
+							// Fragment offset = 0, More = true, ID = 1
+							buffer.View([]byte{uint8(header.UDPProtocolNumber), 0, 0, 1, 0, 0, 0, 1}),
+
+							ipv6Payload1Addr1ToAddr2[:64],
+						},
+					),
+				},
+				{
+					srcAddr: addr1,
+					dstAddr: addr2,
+					nextHdr: fragmentExtHdrID,
+					data: buffer.NewVectorisedView(
+						fragmentExtHdrLen+len(ipv6Payload1Addr1ToAddr2)-64,
+						[]buffer.View{
+							// Fragment extension header.
+							//
+							// Fragment offset = 8, More = false, ID = 1
+							// NextHeader value is different than the one in the first fragment, so
+							// this NextHeader should be ignored.
+							buffer.View([]byte{uint8(header.IPv6NoNextHeaderIdentifier), 0, 0, 64, 0, 0, 0, 1}),
+
+							ipv6Payload1Addr1ToAddr2[64:],
+						},
+					),
+				},
+			},
+			expectedPayloads: [][]byte{udpPayload1Addr1ToAddr2},
+		},
 		{
 			name: "Two fragments with last fragment size not a multiple of fragment block size",
 			fragments: []fragmentData{

From e2c1084cc8eb52bdfda299df2386ba974c320d54 Mon Sep 17 00:00:00 2001
From: Bhasker Hariharan <bhaskerh@google.com>
Date: Thu, 20 Aug 2020 13:23:21 -0700
Subject: [PATCH 040/211] Skip listening TCP ports when trying to bind a free
 port.

PiperOrigin-RevId: 327686558
---
 pkg/sentry/socket/netstack/netstack.go        | 15 ++++-
 pkg/tcpip/ports/ports.go                      | 19 +++++-
 pkg/tcpip/ports/ports_test.go                 |  2 +-
 pkg/tcpip/transport/tcp/endpoint.go           | 60 +++++++++--------
 pkg/tcpip/transport/udp/endpoint.go           |  2 +-
 test/syscalls/linux/socket_inet_loopback.cc   | 38 +++++++++++
 .../linux/socket_inet_loopback_nogotsan.cc    | 65 +++++++++++++++++++
 .../syscalls/linux/socket_ipv4_udp_unbound.cc | 25 ++++++-
 8 files changed, 189 insertions(+), 37 deletions(-)

diff --git a/pkg/sentry/socket/netstack/netstack.go b/pkg/sentry/socket/netstack/netstack.go
index 0e5913b60a..4d0e336961 100644
--- a/pkg/sentry/socket/netstack/netstack.go
+++ b/pkg/sentry/socket/netstack/netstack.go
@@ -803,7 +803,20 @@ func (s *socketOpsCommon) Bind(t *kernel.Task, sockaddr []byte) *syserr.Error {
 	}
 
 	// Issue the bind request to the endpoint.
-	return syserr.TranslateNetstackError(s.Endpoint.Bind(addr))
+	err := s.Endpoint.Bind(addr)
+	if err == tcpip.ErrNoPortAvailable {
+		// Bind always returns EADDRINUSE irrespective of if the specified port was
+		// already bound or if an ephemeral port was requested but none were
+		// available.
+		//
+		// tcpip.ErrNoPortAvailable is mapped to EAGAIN in syserr package because
+		// UDP connect returns EAGAIN on ephemeral port exhaustion.
+		//
+		// TCP connect returns EADDRNOTAVAIL on ephemeral port exhaustion.
+		err = tcpip.ErrPortInUse
+	}
+
+	return syserr.TranslateNetstackError(err)
 }
 
 // Listen implements the linux syscall listen(2) for sockets backed by
diff --git a/pkg/tcpip/ports/ports.go b/pkg/tcpip/ports/ports.go
index f6d592eb5e..d87193650a 100644
--- a/pkg/tcpip/ports/ports.go
+++ b/pkg/tcpip/ports/ports.go
@@ -400,7 +400,11 @@ func (s *PortManager) isPortAvailableLocked(networks []tcpip.NetworkProtocolNumb
 // reserved by another endpoint. If port is zero, ReservePort will search for
 // an unreserved ephemeral port and reserve it, returning its value in the
 // "port" return value.
-func (s *PortManager) ReservePort(networks []tcpip.NetworkProtocolNumber, transport tcpip.TransportProtocolNumber, addr tcpip.Address, port uint16, flags Flags, bindToDevice tcpip.NICID, dest tcpip.FullAddress) (reservedPort uint16, err *tcpip.Error) {
+//
+// An optional testPort closure can be passed in which if provided will be used
+// to test if the picked port can be used. The function should return true if
+// the port is safe to use, false otherwise.
+func (s *PortManager) ReservePort(networks []tcpip.NetworkProtocolNumber, transport tcpip.TransportProtocolNumber, addr tcpip.Address, port uint16, flags Flags, bindToDevice tcpip.NICID, dest tcpip.FullAddress, testPort func(port uint16) bool) (reservedPort uint16, err *tcpip.Error) {
 	s.mu.Lock()
 	defer s.mu.Unlock()
 
@@ -412,12 +416,23 @@ func (s *PortManager) ReservePort(networks []tcpip.NetworkProtocolNumber, transp
 		if !s.reserveSpecificPort(networks, transport, addr, port, flags, bindToDevice, dst) {
 			return 0, tcpip.ErrPortInUse
 		}
+		if testPort != nil && !testPort(port) {
+			s.releasePortLocked(networks, transport, addr, port, flags.Bits(), bindToDevice, dst)
+			return 0, tcpip.ErrPortInUse
+		}
 		return port, nil
 	}
 
 	// A port wasn't specified, so try to find one.
 	return s.PickEphemeralPort(func(p uint16) (bool, *tcpip.Error) {
-		return s.reserveSpecificPort(networks, transport, addr, p, flags, bindToDevice, dst), nil
+		if !s.reserveSpecificPort(networks, transport, addr, p, flags, bindToDevice, dst) {
+			return false, nil
+		}
+		if testPort != nil && !testPort(p) {
+			s.releasePortLocked(networks, transport, addr, p, flags.Bits(), bindToDevice, dst)
+			return false, nil
+		}
+		return true, nil
 	})
 }
 
diff --git a/pkg/tcpip/ports/ports_test.go b/pkg/tcpip/ports/ports_test.go
index 58db5868cf..4bc949fd8a 100644
--- a/pkg/tcpip/ports/ports_test.go
+++ b/pkg/tcpip/ports/ports_test.go
@@ -332,7 +332,7 @@ func TestPortReservation(t *testing.T) {
 					pm.ReleasePort(net, fakeTransNumber, test.ip, test.port, test.flags, test.device, test.dest)
 					continue
 				}
-				gotPort, err := pm.ReservePort(net, fakeTransNumber, test.ip, test.port, test.flags, test.device, test.dest)
+				gotPort, err := pm.ReservePort(net, fakeTransNumber, test.ip, test.port, test.flags, test.device, test.dest, nil /* testPort */)
 				if err != test.want {
 					t.Fatalf("ReservePort(.., .., %s, %d, %+v, %d, %v) = %v, want %v", test.ip, test.port, test.flags, test.device, test.dest, err, test.want)
 				}
diff --git a/pkg/tcpip/transport/tcp/endpoint.go b/pkg/tcpip/transport/tcp/endpoint.go
index 21a4b6e2f4..9df22ac842 100644
--- a/pkg/tcpip/transport/tcp/endpoint.go
+++ b/pkg/tcpip/transport/tcp/endpoint.go
@@ -2169,7 +2169,7 @@ func (e *endpoint) connect(addr tcpip.FullAddress, handshake bool, run bool) *tc
 			if sameAddr && p == e.ID.RemotePort {
 				return false, nil
 			}
-			if _, err := e.stack.ReservePort(netProtos, ProtocolNumber, e.ID.LocalAddress, p, e.portFlags, e.bindToDevice, addr); err != nil {
+			if _, err := e.stack.ReservePort(netProtos, ProtocolNumber, e.ID.LocalAddress, p, e.portFlags, e.bindToDevice, addr, nil /* testPort */); err != nil {
 				if err != tcpip.ErrPortInUse || !reuse {
 					return false, nil
 				}
@@ -2207,7 +2207,7 @@ func (e *endpoint) connect(addr tcpip.FullAddress, handshake bool, run bool) *tc
 				tcpEP.notifyProtocolGoroutine(notifyAbort)
 				tcpEP.UnlockUser()
 				// Now try and Reserve again if it fails then we skip.
-				if _, err := e.stack.ReservePort(netProtos, ProtocolNumber, e.ID.LocalAddress, p, e.portFlags, e.bindToDevice, addr); err != nil {
+				if _, err := e.stack.ReservePort(netProtos, ProtocolNumber, e.ID.LocalAddress, p, e.portFlags, e.bindToDevice, addr, nil /* testPort */); err != nil {
 					return false, nil
 				}
 			}
@@ -2505,47 +2505,45 @@ func (e *endpoint) bindLocked(addr tcpip.FullAddress) (err *tcpip.Error) {
 		}
 	}
 
-	port, err := e.stack.ReservePort(netProtos, ProtocolNumber, addr.Addr, addr.Port, e.portFlags, e.bindToDevice, tcpip.FullAddress{})
-	if err != nil {
-		return err
-	}
-
-	e.boundBindToDevice = e.bindToDevice
-	e.boundPortFlags = e.portFlags
-	e.isPortReserved = true
-	e.effectiveNetProtos = netProtos
-	e.ID.LocalPort = port
-
-	// Any failures beyond this point must remove the port registration.
-	defer func(portFlags ports.Flags, bindToDevice tcpip.NICID) {
-		if err != nil {
-			e.stack.ReleasePort(netProtos, ProtocolNumber, addr.Addr, port, portFlags, bindToDevice, tcpip.FullAddress{})
-			e.isPortReserved = false
-			e.effectiveNetProtos = nil
-			e.ID.LocalPort = 0
-			e.ID.LocalAddress = ""
-			e.boundNICID = 0
-			e.boundBindToDevice = 0
-			e.boundPortFlags = ports.Flags{}
-		}
-	}(e.boundPortFlags, e.boundBindToDevice)
-
+	var nic tcpip.NICID
 	// If an address is specified, we must ensure that it's one of our
 	// local addresses.
 	if len(addr.Addr) != 0 {
-		nic := e.stack.CheckLocalAddress(addr.NIC, netProto, addr.Addr)
+		nic = e.stack.CheckLocalAddress(addr.NIC, netProto, addr.Addr)
 		if nic == 0 {
 			return tcpip.ErrBadLocalAddress
 		}
-
-		e.boundNICID = nic
 		e.ID.LocalAddress = addr.Addr
 	}
 
-	if err := e.stack.CheckRegisterTransportEndpoint(e.boundNICID, e.effectiveNetProtos, ProtocolNumber, e.ID, e.boundPortFlags, e.boundBindToDevice); err != nil {
+	port, err := e.stack.ReservePort(netProtos, ProtocolNumber, addr.Addr, addr.Port, e.portFlags, e.bindToDevice, tcpip.FullAddress{}, func(p uint16) bool {
+		id := e.ID
+		id.LocalPort = p
+		// CheckRegisterTransportEndpoint should only return an error if there is a
+		// listening endpoint bound with the same id and portFlags and bindToDevice
+		// options.
+		//
+		// NOTE: Only listening and connected endpoint register with
+		// demuxer. Further connected endpoints always have a remote
+		// address/port. Hence this will only return an error if there is a matching
+		// listening endpoint.
+		if err := e.stack.CheckRegisterTransportEndpoint(nic, netProtos, ProtocolNumber, id, e.portFlags, e.bindToDevice); err != nil {
+			return false
+		}
+		return true
+	})
+	if err != nil {
 		return err
 	}
 
+	e.boundBindToDevice = e.bindToDevice
+	e.boundPortFlags = e.portFlags
+	// TODO(gvisor.dev/issue/3691): Add test to verify boundNICID is correct.
+	e.boundNICID = nic
+	e.isPortReserved = true
+	e.effectiveNetProtos = netProtos
+	e.ID.LocalPort = port
+
 	// Mark endpoint as bound.
 	e.setEndpointState(StateBound)
 
diff --git a/pkg/tcpip/transport/udp/endpoint.go b/pkg/tcpip/transport/udp/endpoint.go
index 73608783cd..c33434b75c 100644
--- a/pkg/tcpip/transport/udp/endpoint.go
+++ b/pkg/tcpip/transport/udp/endpoint.go
@@ -1226,7 +1226,7 @@ func (*endpoint) Accept() (tcpip.Endpoint, *waiter.Queue, *tcpip.Error) {
 
 func (e *endpoint) registerWithStack(nicID tcpip.NICID, netProtos []tcpip.NetworkProtocolNumber, id stack.TransportEndpointID) (stack.TransportEndpointID, tcpip.NICID, *tcpip.Error) {
 	if e.ID.LocalPort == 0 {
-		port, err := e.stack.ReservePort(netProtos, ProtocolNumber, id.LocalAddress, id.LocalPort, e.portFlags, e.bindToDevice, tcpip.FullAddress{})
+		port, err := e.stack.ReservePort(netProtos, ProtocolNumber, id.LocalAddress, id.LocalPort, e.portFlags, e.bindToDevice, tcpip.FullAddress{}, nil /* testPort */)
 		if err != nil {
 			return id, e.bindToDevice, err
 		}
diff --git a/test/syscalls/linux/socket_inet_loopback.cc b/test/syscalls/linux/socket_inet_loopback.cc
index c3b42682f4..a62a10088a 100644
--- a/test/syscalls/linux/socket_inet_loopback.cc
+++ b/test/syscalls/linux/socket_inet_loopback.cc
@@ -2573,6 +2573,44 @@ TEST_P(SocketMultiProtocolInetLoopbackTest, V4EphemeralPortReservedReuseAddr) {
       SyscallSucceeds());
 }
 
+TEST_P(SocketMultiProtocolInetLoopbackTest,
+       MultipleBindsAllowedNoListeningReuseAddr) {
+  const auto& param = GetParam();
+  // UDP sockets are allowed to bind/listen on the port w/ SO_REUSEADDR, for TCP
+  // this is only permitted if there is no other listening socket.
+  SKIP_IF(param.type != SOCK_STREAM);
+  // Bind the v4 loopback on a v4 socket.
+  const TestAddress& test_addr = V4Loopback();
+  sockaddr_storage bound_addr = test_addr.addr;
+  FileDescriptor bound_fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr.family(), param.type, 0));
+
+  ASSERT_THAT(setsockopt(bound_fd.get(), SOL_SOCKET, SO_REUSEADDR, &kSockOptOn,
+                         sizeof(kSockOptOn)),
+              SyscallSucceeds());
+  ASSERT_THAT(bind(bound_fd.get(), reinterpret_cast<sockaddr*>(&bound_addr),
+                   test_addr.addr_len),
+              SyscallSucceeds());
+  // Get the port that we bound.
+  socklen_t bound_addr_len = test_addr.addr_len;
+  ASSERT_THAT(
+      getsockname(bound_fd.get(), reinterpret_cast<sockaddr*>(&bound_addr),
+                  &bound_addr_len),
+      SyscallSucceeds());
+
+  // Now create a socket and bind it to the same port, this should
+  // succeed since there is no listening socket for the same port.
+  FileDescriptor second_fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr.family(), param.type, 0));
+
+  ASSERT_THAT(setsockopt(second_fd.get(), SOL_SOCKET, SO_REUSEADDR, &kSockOptOn,
+                         sizeof(kSockOptOn)),
+              SyscallSucceeds());
+  ASSERT_THAT(bind(second_fd.get(), reinterpret_cast<sockaddr*>(&bound_addr),
+                   test_addr.addr_len),
+              SyscallSucceeds());
+}
+
 TEST_P(SocketMultiProtocolInetLoopbackTest, PortReuseTwoSockets) {
   auto const& param = GetParam();
   TestAddress const& test_addr = V4Loopback();
diff --git a/test/syscalls/linux/socket_inet_loopback_nogotsan.cc b/test/syscalls/linux/socket_inet_loopback_nogotsan.cc
index 791e2bd513..1a0b533948 100644
--- a/test/syscalls/linux/socket_inet_loopback_nogotsan.cc
+++ b/test/syscalls/linux/socket_inet_loopback_nogotsan.cc
@@ -168,6 +168,71 @@ INSTANTIATE_TEST_SUITE_P(
         TestParam{V6Loopback(), V6Loopback()}),
     DescribeTestParam);
 
+struct ProtocolTestParam {
+  std::string description;
+  int type;
+};
+
+std::string DescribeProtocolTestParam(
+    ::testing::TestParamInfo<ProtocolTestParam> const& info) {
+  return info.param.description;
+}
+
+using SocketMultiProtocolInetLoopbackTest =
+    ::testing::TestWithParam<ProtocolTestParam>;
+
+TEST_P(SocketMultiProtocolInetLoopbackTest,
+       BindAvoidsListeningPortsReuseAddr_NoRandomSave) {
+  const auto& param = GetParam();
+  // UDP sockets are allowed to bind/listen on the port w/ SO_REUSEADDR, for TCP
+  // this is only permitted if there is no other listening socket.
+  SKIP_IF(param.type != SOCK_STREAM);
+
+  DisableSave ds;  // Too many syscalls.
+
+  // A map of port to file descriptor binding the port.
+  std::map<uint16_t, FileDescriptor> listen_sockets;
+
+  // Exhaust all ephemeral ports.
+  while (true) {
+    // Bind the v4 loopback on a v4 socket.
+    TestAddress const& test_addr = V4Loopback();
+    sockaddr_storage bound_addr = test_addr.addr;
+    FileDescriptor bound_fd =
+        ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr.family(), param.type, 0));
+
+    ASSERT_THAT(setsockopt(bound_fd.get(), SOL_SOCKET, SO_REUSEADDR,
+                           &kSockOptOn, sizeof(kSockOptOn)),
+                SyscallSucceeds());
+
+    int ret = bind(bound_fd.get(), reinterpret_cast<sockaddr*>(&bound_addr),
+                   test_addr.addr_len);
+    if (ret != 0) {
+      ASSERT_EQ(errno, EADDRINUSE);
+      break;
+    }
+    // Get the port that we bound.
+    socklen_t bound_addr_len = test_addr.addr_len;
+    ASSERT_THAT(
+        getsockname(bound_fd.get(), reinterpret_cast<sockaddr*>(&bound_addr),
+                    &bound_addr_len),
+        SyscallSucceeds());
+    uint16_t port = reinterpret_cast<sockaddr_in*>(&bound_addr)->sin_port;
+
+    // Newly bound port should not already be in use by a listening socket.
+    ASSERT_EQ(listen_sockets.find(port), listen_sockets.end());
+    auto fd = bound_fd.get();
+    listen_sockets.insert(std::make_pair(port, std::move(bound_fd)));
+    ASSERT_THAT(listen(fd, SOMAXCONN), SyscallSucceeds());
+  }
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    AllFamilies, SocketMultiProtocolInetLoopbackTest,
+    ::testing::Values(ProtocolTestParam{"TCP", SOCK_STREAM},
+                      ProtocolTestParam{"UDP", SOCK_DGRAM}),
+    DescribeProtocolTestParam);
+
 }  // namespace
 
 }  // namespace testing
diff --git a/test/syscalls/linux/socket_ipv4_udp_unbound.cc b/test/syscalls/linux/socket_ipv4_udp_unbound.cc
index bc005e2bbe..cdc9c22666 100644
--- a/test/syscalls/linux/socket_ipv4_udp_unbound.cc
+++ b/test/syscalls/linux/socket_ipv4_udp_unbound.cc
@@ -2121,7 +2121,7 @@ TEST_P(IPv4UDPUnboundSocketTest, ReuseAddrReusePortDistribution) {
               SyscallSucceedsWithValue(kMessageSize));
 }
 
-// Check that connect returns EADDRNOTAVAIL when out of local ephemeral ports.
+// Check that connect returns EAGAIN when out of local ephemeral ports.
 // We disable S/R because this test creates a large number of sockets.
 TEST_P(IPv4UDPUnboundSocketTest, UDPConnectPortExhaustion_NoRandomSave) {
   auto receiver1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
@@ -2154,6 +2154,29 @@ TEST_P(IPv4UDPUnboundSocketTest, UDPConnectPortExhaustion_NoRandomSave) {
   }
 }
 
+// Check that bind returns EADDRINUSE when out of local ephemeral ports.
+// We disable S/R because this test creates a large number of sockets.
+TEST_P(IPv4UDPUnboundSocketTest, UDPBindPortExhaustion_NoRandomSave) {
+  auto receiver1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+  constexpr int kClients = 65536;
+  auto addr = V4Loopback();
+  // Disable cooperative S/R as we are making too many syscalls.
+  DisableSave ds;
+  std::vector<std::unique_ptr<FileDescriptor>> sockets;
+  for (int i = 0; i < kClients; i++) {
+    auto s = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
+    int ret =
+        bind(s->get(), reinterpret_cast<sockaddr*>(&addr.addr), addr.addr_len);
+    if (ret == 0) {
+      sockets.push_back(std::move(s));
+      continue;
+    }
+    ASSERT_THAT(ret, SyscallFailsWithErrno(EADDRINUSE));
+    break;
+  }
+}
+
 // Test that socket will receive packet info control message.
 TEST_P(IPv4UDPUnboundSocketTest, SetAndReceiveIPPKTINFO) {
   // TODO(gvisor.dev/issue/1202): ioctl() is not supported by hostinet.

From a3f446a86fed6f3f70daef91b7f7cb5db4ebd383 Mon Sep 17 00:00:00 2001
From: Michael Pratt <mpratt@google.com>
Date: Thu, 20 Aug 2020 13:28:43 -0700
Subject: [PATCH 041/211] Consistent precondition formatting

Our "Preconditions:" blocks are very useful to determine the input invariants,
but they are bit inconsistent throughout the codebase, which makes them harder
to read (particularly cases with 5+ conditions in a single paragraph).

I've reformatted all of the cases to fit in simple rules:

1. Cases with a single condition are placed on a single line.
2. Cases with multiple conditions are placed in a bulleted list.

This format has been added to the style guide.

I've also mentioned "Postconditions:", though those are much less frequently
used, and all uses already match this style.

PiperOrigin-RevId: 327687465
---
 g3doc/style.md                             |  9 +++
 pkg/fdnotifier/poll_unsafe.go              |  3 +-
 pkg/flipcall/flipcall.go                   | 31 +++++---
 pkg/metric/metric.go                       |  6 +-
 pkg/safemem/seq_unsafe.go                  |  7 +-
 pkg/segment/set.go                         | 32 ++++----
 pkg/sentry/fs/copy_up.go                   | 13 ++--
 pkg/sentry/fs/dirent.go                    | 12 +--
 pkg/sentry/fs/file_operations.go           |  5 +-
 pkg/sentry/fs/fsutil/file_range_set.go     | 10 ++-
 pkg/sentry/fs/fsutil/host_file_mapper.go   | 12 ++-
 pkg/sentry/fs/fsutil/inode_cached.go       |  4 +-
 pkg/sentry/fs/overlay.go                   | 20 +++--
 pkg/sentry/fs/tty/queue.go                 |  9 +--
 pkg/sentry/fsimpl/devpts/queue.go          |  9 +--
 pkg/sentry/fsimpl/ext/filesystem.go        | 12 +--
 pkg/sentry/fsimpl/gofer/directory.go       | 21 ++++--
 pkg/sentry/fsimpl/gofer/filesystem.go      | 38 ++++++----
 pkg/sentry/fsimpl/gofer/gofer.go           |  4 +-
 pkg/sentry/fsimpl/gofer/time.go            | 15 ++--
 pkg/sentry/fsimpl/kernfs/filesystem.go     | 21 ++++--
 pkg/sentry/fsimpl/overlay/directory.go     |  4 +-
 pkg/sentry/fsimpl/overlay/filesystem.go    | 32 +++++---
 pkg/sentry/fsimpl/overlay/overlay.go       |  4 +-
 pkg/sentry/fsimpl/tmpfs/directory.go       |  5 +-
 pkg/sentry/fsimpl/tmpfs/filesystem.go      | 13 +++-
 pkg/sentry/fsimpl/tmpfs/named_pipe.go      |  4 +-
 pkg/sentry/fsimpl/tmpfs/tmpfs.go           | 15 ++--
 pkg/sentry/kernel/kernel.go                | 10 ++-
 pkg/sentry/kernel/ptrace.go                | 25 ++++---
 pkg/sentry/kernel/rseq.go                  | 31 +++++---
 pkg/sentry/kernel/task_exec.go             |  7 +-
 pkg/sentry/kernel/task_sched.go            | 11 +--
 pkg/sentry/kernel/task_signals.go          | 12 +--
 pkg/sentry/kernel/task_stop.go             | 14 ++--
 pkg/sentry/kernel/task_usermem.go          | 12 +--
 pkg/sentry/kernel/time/time.go             |  6 +-
 pkg/sentry/kernel/vdso.go                  |  3 -
 pkg/sentry/loader/elf.go                   | 13 ++--
 pkg/sentry/loader/loader.go                |  4 +-
 pkg/sentry/memmap/mapping_set.go           |  4 +-
 pkg/sentry/memmap/memmap.go                | 59 +++++++++------
 pkg/sentry/mm/address_space.go             |  8 +-
 pkg/sentry/mm/io.go                        |  9 ++-
 pkg/sentry/mm/pma.go                       | 85 +++++++++++++++-------
 pkg/sentry/mm/syscalls.go                  |  9 ++-
 pkg/sentry/mm/vma.go                       | 42 +++++++----
 pkg/sentry/pgalloc/pgalloc.go              | 10 ++-
 pkg/sentry/platform/interrupt/interrupt.go |  5 +-
 pkg/sentry/platform/platform.go            | 13 +++-
 pkg/sentry/vfs/dentry.go                   |  5 +-
 pkg/sentry/vfs/file_description.go         | 10 ++-
 pkg/sentry/vfs/filesystem.go               | 41 ++++++-----
 pkg/sentry/vfs/mount.go                    | 24 +++---
 pkg/sentry/vfs/mount_unsafe.go             | 18 +++--
 pkg/syncevent/broadcaster.go               |  4 +-
 pkg/syncevent/source.go                    |  8 +-
 pkg/tcpip/stack/conntrack.go               |  4 +-
 pkg/tcpip/stack/iptables.go                | 12 +--
 pkg/unet/unet.go                           |  2 +-
 pkg/usermem/addr_range_seq_unsafe.go       |  6 +-
 pkg/usermem/usermem.go                     | 77 +++++++++++---------
 62 files changed, 596 insertions(+), 377 deletions(-)

diff --git a/g3doc/style.md b/g3doc/style.md
index d10549fe97..8258b02331 100644
--- a/g3doc/style.md
+++ b/g3doc/style.md
@@ -46,6 +46,15 @@ protected.
 Each field or variable protected by a mutex should state as such in a comment on
 the field or variable declaration.
 
+### Function comments
+
+Functions with special entry conditions (e.g., a lock must be held) should state
+these conditions in a `Preconditions:` comment block. One condition per line;
+multiple conditions are specified with a bullet (`*`).
+
+Functions with notable exit conditions (e.g., a `Done` function must eventually
+be called by the caller) can similarly have a `Postconditions:` block.
+
 ### Unused returns
 
 Unused returns should be explicitly ignored with underscores. If there is a
diff --git a/pkg/fdnotifier/poll_unsafe.go b/pkg/fdnotifier/poll_unsafe.go
index 4225b04dd4..ec2f997a28 100644
--- a/pkg/fdnotifier/poll_unsafe.go
+++ b/pkg/fdnotifier/poll_unsafe.go
@@ -65,8 +65,7 @@ func NonBlockingPoll(fd int32, mask waiter.EventMask) waiter.EventMask {
 
 // epollWait performs a blocking wait on epfd.
 //
-// Preconditions:
-//  * len(events) > 0
+// Preconditions: len(events) > 0
 func epollWait(epfd int, events []syscall.EpollEvent, msec int) (int, error) {
 	if len(events) == 0 {
 		panic("Empty events passed to EpollWait")
diff --git a/pkg/flipcall/flipcall.go b/pkg/flipcall/flipcall.go
index ec742c0914..c4a3366cec 100644
--- a/pkg/flipcall/flipcall.go
+++ b/pkg/flipcall/flipcall.go
@@ -179,8 +179,10 @@ const (
 
 // Connect blocks until the peer Endpoint has called Endpoint.RecvFirst().
 //
-// Preconditions: ep is a client Endpoint. ep.Connect(), ep.RecvFirst(),
-// ep.SendRecv(), and ep.SendLast() have never been called.
+// Preconditions:
+// * ep is a client Endpoint.
+// * ep.Connect(), ep.RecvFirst(), ep.SendRecv(), and ep.SendLast() have never
+//   been called.
 func (ep *Endpoint) Connect() error {
 	err := ep.ctrlConnect()
 	if err == nil {
@@ -192,8 +194,9 @@ func (ep *Endpoint) Connect() error {
 // RecvFirst blocks until the peer Endpoint calls Endpoint.SendRecv(), then
 // returns the datagram length specified by that call.
 //
-// Preconditions: ep is a server Endpoint. ep.SendRecv(), ep.RecvFirst(), and
-// ep.SendLast() have never been called.
+// Preconditions:
+// * ep is a server Endpoint.
+// * ep.SendRecv(), ep.RecvFirst(), and ep.SendLast() have never been called.
 func (ep *Endpoint) RecvFirst() (uint32, error) {
 	if err := ep.ctrlWaitFirst(); err != nil {
 		return 0, err
@@ -211,10 +214,12 @@ func (ep *Endpoint) RecvFirst() (uint32, error) {
 // datagram length, then blocks until the peer Endpoint calls
 // Endpoint.SendRecv() or Endpoint.SendLast().
 //
-// Preconditions: dataLen <= ep.DataCap(). No previous call to ep.SendRecv() or
-// ep.RecvFirst() has returned an error. ep.SendLast() has never been called.
-// If ep is a client Endpoint, ep.Connect() has previously been called and
-// returned nil.
+// Preconditions:
+// * dataLen <= ep.DataCap().
+// * No previous call to ep.SendRecv() or ep.RecvFirst() has returned an error.
+// * ep.SendLast() has never been called.
+// * If ep is a client Endpoint, ep.Connect() has previously been called and
+//   returned nil.
 func (ep *Endpoint) SendRecv(dataLen uint32) (uint32, error) {
 	if dataLen > ep.dataCap {
 		panic(fmt.Sprintf("attempting to send packet with datagram length %d (maximum %d)", dataLen, ep.dataCap))
@@ -240,10 +245,12 @@ func (ep *Endpoint) SendRecv(dataLen uint32) (uint32, error) {
 // SendLast causes the peer Endpoint's call to Endpoint.SendRecv() or
 // Endpoint.RecvFirst() to return with the given datagram length.
 //
-// Preconditions: dataLen <= ep.DataCap(). No previous call to ep.SendRecv() or
-// ep.RecvFirst() has returned an error. ep.SendLast() has never been called.
-// If ep is a client Endpoint, ep.Connect() has previously been called and
-// returned nil.
+// Preconditions:
+// * dataLen <= ep.DataCap().
+// * No previous call to ep.SendRecv() or ep.RecvFirst() has returned an error.
+// * ep.SendLast() has never been called.
+// * If ep is a client Endpoint, ep.Connect() has previously been called and
+//   returned nil.
 func (ep *Endpoint) SendLast(dataLen uint32) error {
 	if dataLen > ep.dataCap {
 		panic(fmt.Sprintf("attempting to send packet with datagram length %d (maximum %d)", dataLen, ep.dataCap))
diff --git a/pkg/metric/metric.go b/pkg/metric/metric.go
index 64aa365ceb..d012c57343 100644
--- a/pkg/metric/metric.go
+++ b/pkg/metric/metric.go
@@ -106,8 +106,8 @@ type customUint64Metric struct {
 // after Initialized.
 //
 // Preconditions:
-//  * name must be globally unique.
-//  * Initialize/Disable have not been called.
+// * name must be globally unique.
+// * Initialize/Disable have not been called.
 func RegisterCustomUint64Metric(name string, cumulative, sync bool, units pb.MetricMetadata_Units, description string, value func() uint64) error {
 	if initialized {
 		return ErrInitializationDone
@@ -221,7 +221,7 @@ var (
 // EmitMetricUpdate is thread-safe.
 //
 // Preconditions:
-//  * Initialize has been called.
+// * Initialize has been called.
 func EmitMetricUpdate() {
 	emitMu.Lock()
 	defer emitMu.Unlock()
diff --git a/pkg/safemem/seq_unsafe.go b/pkg/safemem/seq_unsafe.go
index f5f0574f81..fc4049eeb8 100644
--- a/pkg/safemem/seq_unsafe.go
+++ b/pkg/safemem/seq_unsafe.go
@@ -91,9 +91,10 @@ func BlockSeqFromSlice(slice []Block) BlockSeq {
 	return blockSeqFromSliceLimited(slice, limit)
 }
 
-// Preconditions: The combined length of all Blocks in slice <= limit. If
-// len(slice) != 0, the first Block in slice has non-zero length, and limit >
-// 0.
+// Preconditions:
+// * The combined length of all Blocks in slice <= limit.
+// * If len(slice) != 0, the first Block in slice has non-zero length and
+//   limit > 0.
 func blockSeqFromSliceLimited(slice []Block, limit uint64) BlockSeq {
 	switch len(slice) {
 	case 0:
diff --git a/pkg/segment/set.go b/pkg/segment/set.go
index 1a17ad9cb9..fbb31dbead 100644
--- a/pkg/segment/set.go
+++ b/pkg/segment/set.go
@@ -407,7 +407,9 @@ func (s *Set) InsertWithoutMerging(gap GapIterator, r Range, val Value) Iterator
 // and returns an iterator to the inserted segment. All existing iterators
 // (including gap, but not including the returned iterator) are invalidated.
 //
-// Preconditions: r.Start >= gap.Start(); r.End <= gap.End().
+// Preconditions:
+// * r.Start >= gap.Start().
+// * r.End <= gap.End().
 func (s *Set) InsertWithoutMergingUnchecked(gap GapIterator, r Range, val Value) Iterator {
 	gap = gap.node.rebalanceBeforeInsert(gap)
 	splitMaxGap := trackGaps != 0 && (gap.node.nrSegments == 0 || gap.Range().Length() == gap.node.maxGap.Get())
@@ -1211,12 +1213,10 @@ func (seg Iterator) End() Key {
 // does not invalidate any iterators.
 //
 // Preconditions:
-//
-// - r.Length() > 0.
-//
-// - The new range must not overlap an existing one: If seg.NextSegment().Ok(),
-// then r.end <= seg.NextSegment().Start(); if seg.PrevSegment().Ok(), then
-// r.start >= seg.PrevSegment().End().
+// * r.Length() > 0.
+// * The new range must not overlap an existing one:
+//   * If seg.NextSegment().Ok(), then r.end <= seg.NextSegment().Start().
+//   * If seg.PrevSegment().Ok(), then r.start >= seg.PrevSegment().End().
 func (seg Iterator) SetRangeUnchecked(r Range) {
 	seg.node.keys[seg.index] = r
 }
@@ -1241,8 +1241,9 @@ func (seg Iterator) SetRange(r Range) {
 // SetStartUnchecked mutates the iterated segment's start. This operation does
 // not invalidate any iterators.
 //
-// Preconditions: The new start must be valid: start < seg.End(); if
-// seg.PrevSegment().Ok(), then start >= seg.PrevSegment().End().
+// Preconditions: The new start must be valid:
+// * start < seg.End()
+// * If seg.PrevSegment().Ok(), then start >= seg.PrevSegment().End().
 func (seg Iterator) SetStartUnchecked(start Key) {
 	seg.node.keys[seg.index].Start = start
 }
@@ -1264,8 +1265,9 @@ func (seg Iterator) SetStart(start Key) {
 // SetEndUnchecked mutates the iterated segment's end. This operation does not
 // invalidate any iterators.
 //
-// Preconditions: The new end must be valid: end > seg.Start(); if
-// seg.NextSegment().Ok(), then end <= seg.NextSegment().Start().
+// Preconditions: The new end must be valid:
+// * end > seg.Start().
+// * If seg.NextSegment().Ok(), then end <= seg.NextSegment().Start().
 func (seg Iterator) SetEndUnchecked(end Key) {
 	seg.node.keys[seg.index].End = end
 }
@@ -1695,9 +1697,11 @@ func (s *Set) ExportSortedSlices() *SegmentDataSlices {
 
 // ImportSortedSlice initializes the given set from the given slice.
 //
-// Preconditions: s must be empty. sds must represent a valid set (the segments
-// in sds must have valid lengths that do not overlap). The segments in sds
-// must be sorted in ascending key order.
+// Preconditions:
+// * s must be empty.
+// * sds must represent a valid set (the segments in sds must have valid
+//   lengths that do not overlap).
+// * The segments in sds must be sorted in ascending key order.
 func (s *Set) ImportSortedSlices(sds *SegmentDataSlices) error {
 	if !s.IsEmpty() {
 		return fmt.Errorf("cannot import into non-empty set %v", s)
diff --git a/pkg/sentry/fs/copy_up.go b/pkg/sentry/fs/copy_up.go
index 735452b07a..ff2fe67122 100644
--- a/pkg/sentry/fs/copy_up.go
+++ b/pkg/sentry/fs/copy_up.go
@@ -107,8 +107,7 @@ func copyUp(ctx context.Context, d *Dirent) error {
 // leave the upper filesystem filled with any number of parent directories
 // but the upper filesystem will never be in an inconsistent state.
 //
-// Preconditions:
-// - d.Inode.overlay is non-nil.
+// Preconditions: d.Inode.overlay is non-nil.
 func copyUpLockedForRename(ctx context.Context, d *Dirent) error {
 	for {
 		// Did we race with another copy up or does there
@@ -183,12 +182,12 @@ func doCopyUp(ctx context.Context, d *Dirent) error {
 // Returns a generic error on failure.
 //
 // Preconditions:
-// - parent.Inode.overlay.upper must be non-nil.
-// - next.Inode.overlay.copyMu must be locked writable.
-// - next.Inode.overlay.lower must be non-nil.
-// - next.Inode.overlay.lower.StableAttr.Type must be RegularFile, Directory,
+// * parent.Inode.overlay.upper must be non-nil.
+// * next.Inode.overlay.copyMu must be locked writable.
+// * next.Inode.overlay.lower must be non-nil.
+// * next.Inode.overlay.lower.StableAttr.Type must be RegularFile, Directory,
 //   or Symlink.
-// - upper filesystem must support setting file ownership and timestamps.
+// * upper filesystem must support setting file ownership and timestamps.
 func copyUpLocked(ctx context.Context, parent *Dirent, next *Dirent) error {
 	// Extract the attributes of the file we wish to copy.
 	attrs, err := next.Inode.overlay.lower.UnstableAttr(ctx)
diff --git a/pkg/sentry/fs/dirent.go b/pkg/sentry/fs/dirent.go
index a2f751068d..00c526b036 100644
--- a/pkg/sentry/fs/dirent.go
+++ b/pkg/sentry/fs/dirent.go
@@ -413,9 +413,9 @@ func (d *Dirent) descendantOf(p *Dirent) bool {
 // Inode.Lookup, otherwise walk will keep d.mu locked.
 //
 // Preconditions:
-// - renameMu must be held for reading.
-// - d.mu must be held.
-// - name must must not contain "/"s.
+// * renameMu must be held for reading.
+// * d.mu must be held.
+// * name must must not contain "/"s.
 func (d *Dirent) walk(ctx context.Context, root *Dirent, name string, walkMayUnlock bool) (*Dirent, error) {
 	if !IsDir(d.Inode.StableAttr) {
 		return nil, syscall.ENOTDIR
@@ -577,9 +577,9 @@ func (d *Dirent) Walk(ctx context.Context, root *Dirent, name string) (*Dirent,
 // exists returns true if name exists in relation to d.
 //
 // Preconditions:
-// - renameMu must be held for reading.
-// - d.mu must be held.
-// - name must must not contain "/"s.
+// * renameMu must be held for reading.
+// * d.mu must be held.
+// * name must must not contain "/"s.
 func (d *Dirent) exists(ctx context.Context, root *Dirent, name string) bool {
 	child, err := d.walk(ctx, root, name, false /* may unlock */)
 	if err != nil {
diff --git a/pkg/sentry/fs/file_operations.go b/pkg/sentry/fs/file_operations.go
index 305c0f8400..6ec7210222 100644
--- a/pkg/sentry/fs/file_operations.go
+++ b/pkg/sentry/fs/file_operations.go
@@ -159,8 +159,9 @@ type FileOperations interface {
 	// io provides access to the virtual memory space to which pointers in args
 	// refer.
 	//
-	// Preconditions: The AddressSpace (if any) that io refers to is activated.
-	// Must only be called from a task goroutine.
+	// Preconditions:
+	// * The AddressSpace (if any) that io refers to is activated.
+	// * Must only be called from a task goroutine.
 	Ioctl(ctx context.Context, file *File, io usermem.IO, args arch.SyscallArguments) (uintptr, error)
 }
 
diff --git a/pkg/sentry/fs/fsutil/file_range_set.go b/pkg/sentry/fs/fsutil/file_range_set.go
index bbafebf034..9197aeb889 100644
--- a/pkg/sentry/fs/fsutil/file_range_set.go
+++ b/pkg/sentry/fs/fsutil/file_range_set.go
@@ -70,7 +70,9 @@ func (seg FileRangeIterator) FileRange() memmap.FileRange {
 
 // FileRangeOf returns the FileRange mapped by mr.
 //
-// Preconditions: seg.Range().IsSupersetOf(mr). mr.Length() != 0.
+// Preconditions:
+// * seg.Range().IsSupersetOf(mr).
+// * mr.Length() != 0.
 func (seg FileRangeIterator) FileRangeOf(mr memmap.MappableRange) memmap.FileRange {
 	frstart := seg.Value() + (mr.Start - seg.Start())
 	return memmap.FileRange{frstart, frstart + mr.Length()}
@@ -88,8 +90,10 @@ func (seg FileRangeIterator) FileRangeOf(mr memmap.MappableRange) memmap.FileRan
 // outside of optional. It returns a non-nil error if any error occurs, even
 // if the error only affects offsets in optional, but not in required.
 //
-// Preconditions: required.Length() > 0. optional.IsSupersetOf(required).
-// required and optional must be page-aligned.
+// Preconditions:
+// * required.Length() > 0.
+// * optional.IsSupersetOf(required).
+// * required and optional must be page-aligned.
 func (frs *FileRangeSet) Fill(ctx context.Context, required, optional memmap.MappableRange, mf *pgalloc.MemoryFile, kind usage.MemoryKind, readAt func(ctx context.Context, dsts safemem.BlockSeq, offset uint64) (uint64, error)) error {
 	gap := frs.LowerBoundGap(required.Start)
 	for gap.Ok() && gap.Start() < required.End {
diff --git a/pkg/sentry/fs/fsutil/host_file_mapper.go b/pkg/sentry/fs/fsutil/host_file_mapper.go
index ef0113b522..1390a9a7f5 100644
--- a/pkg/sentry/fs/fsutil/host_file_mapper.go
+++ b/pkg/sentry/fs/fsutil/host_file_mapper.go
@@ -80,7 +80,9 @@ func NewHostFileMapper() *HostFileMapper {
 
 // IncRefOn increments the reference count on all offsets in mr.
 //
-// Preconditions: mr.Length() != 0. mr.Start and mr.End must be page-aligned.
+// Preconditions:
+// * mr.Length() != 0.
+// * mr.Start and mr.End must be page-aligned.
 func (f *HostFileMapper) IncRefOn(mr memmap.MappableRange) {
 	f.refsMu.Lock()
 	defer f.refsMu.Unlock()
@@ -97,7 +99,9 @@ func (f *HostFileMapper) IncRefOn(mr memmap.MappableRange) {
 
 // DecRefOn decrements the reference count on all offsets in mr.
 //
-// Preconditions: mr.Length() != 0. mr.Start and mr.End must be page-aligned.
+// Preconditions:
+// * mr.Length() != 0.
+// * mr.Start and mr.End must be page-aligned.
 func (f *HostFileMapper) DecRefOn(mr memmap.MappableRange) {
 	f.refsMu.Lock()
 	defer f.refsMu.Unlock()
@@ -204,7 +208,9 @@ func (f *HostFileMapper) UnmapAll() {
 	}
 }
 
-// Preconditions: f.mapsMu must be locked. f.mappings[chunkStart] == m.
+// Preconditions:
+// * f.mapsMu must be locked.
+// * f.mappings[chunkStart] == m.
 func (f *HostFileMapper) unmapAndRemoveLocked(chunkStart uint64, m mapping) {
 	if _, _, errno := syscall.Syscall(syscall.SYS_MUNMAP, m.addr, chunkSize, 0); errno != 0 {
 		// This leaks address space and is unexpected, but is otherwise
diff --git a/pkg/sentry/fs/fsutil/inode_cached.go b/pkg/sentry/fs/fsutil/inode_cached.go
index fe8b0b6acf..9eb6f522e1 100644
--- a/pkg/sentry/fs/fsutil/inode_cached.go
+++ b/pkg/sentry/fs/fsutil/inode_cached.go
@@ -684,7 +684,9 @@ func (rw *inodeReadWriter) ReadToBlocks(dsts safemem.BlockSeq) (uint64, error) {
 // maybeGrowFile grows the file's size if data has been written past the old
 // size.
 //
-// Preconditions: rw.c.attrMu and rw.c.dataMu bust be locked.
+// Preconditions:
+// * rw.c.attrMu must be locked.
+// * rw.c.dataMu must be locked.
 func (rw *inodeReadWriter) maybeGrowFile() {
 	// If the write ends beyond the file's previous size, it causes the
 	// file to grow.
diff --git a/pkg/sentry/fs/overlay.go b/pkg/sentry/fs/overlay.go
index 35013a21b3..01a1235b80 100644
--- a/pkg/sentry/fs/overlay.go
+++ b/pkg/sentry/fs/overlay.go
@@ -86,13 +86,12 @@ func isXattrOverlay(name string) bool {
 // NewOverlayRoot produces the root of an overlay.
 //
 // Preconditions:
-//
-// - upper and lower must be non-nil.
-// - upper must not be an overlay.
-// - lower should not expose character devices, pipes, or sockets, because
+// * upper and lower must be non-nil.
+// * upper must not be an overlay.
+// * lower should not expose character devices, pipes, or sockets, because
 //   copying up these types of files is not supported.
-// - lower must not require that file objects be revalidated.
-// - lower must not have dynamic file/directory content.
+// * lower must not require that file objects be revalidated.
+// * lower must not have dynamic file/directory content.
 func NewOverlayRoot(ctx context.Context, upper *Inode, lower *Inode, flags MountSourceFlags) (*Inode, error) {
 	if !IsDir(upper.StableAttr) {
 		return nil, fmt.Errorf("upper Inode is a %v, not a directory", upper.StableAttr.Type)
@@ -117,12 +116,11 @@ func NewOverlayRoot(ctx context.Context, upper *Inode, lower *Inode, flags Mount
 // NewOverlayRootFile produces the root of an overlay that points to a file.
 //
 // Preconditions:
-//
-// - lower must be non-nil.
-// - lower should not expose character devices, pipes, or sockets, because
+// * lower must be non-nil.
+// * lower should not expose character devices, pipes, or sockets, because
 //   copying up these types of files is not supported. Neither it can be a dir.
-// - lower must not require that file objects be revalidated.
-// - lower must not have dynamic file/directory content.
+// * lower must not require that file objects be revalidated.
+// * lower must not have dynamic file/directory content.
 func NewOverlayRootFile(ctx context.Context, upperMS *MountSource, lower *Inode, flags MountSourceFlags) (*Inode, error) {
 	if !IsRegular(lower.StableAttr) {
 		return nil, fmt.Errorf("lower Inode is not a regular file")
diff --git a/pkg/sentry/fs/tty/queue.go b/pkg/sentry/fs/tty/queue.go
index ceabb9b1ef..c5d7ec717c 100644
--- a/pkg/sentry/fs/tty/queue.go
+++ b/pkg/sentry/fs/tty/queue.go
@@ -104,8 +104,7 @@ func (q *queue) readableSize(ctx context.Context, io usermem.IO, args arch.Sysca
 // as whether the read caused more readable data to become available (whether
 // data was pushed from the wait buffer to the read buffer).
 //
-// Preconditions:
-// * l.termiosMu must be held for reading.
+// Preconditions: l.termiosMu must be held for reading.
 func (q *queue) read(ctx context.Context, dst usermem.IOSequence, l *lineDiscipline) (int64, bool, error) {
 	q.mu.Lock()
 	defer q.mu.Unlock()
@@ -145,8 +144,7 @@ func (q *queue) read(ctx context.Context, dst usermem.IOSequence, l *lineDiscipl
 
 // write writes to q from userspace.
 //
-// Preconditions:
-// * l.termiosMu must be held for reading.
+// Preconditions: l.termiosMu must be held for reading.
 func (q *queue) write(ctx context.Context, src usermem.IOSequence, l *lineDiscipline) (int64, error) {
 	q.mu.Lock()
 	defer q.mu.Unlock()
@@ -188,8 +186,7 @@ func (q *queue) write(ctx context.Context, src usermem.IOSequence, l *lineDiscip
 
 // writeBytes writes to q from b.
 //
-// Preconditions:
-// * l.termiosMu must be held for reading.
+// Preconditions: l.termiosMu must be held for reading.
 func (q *queue) writeBytes(b []byte, l *lineDiscipline) {
 	q.mu.Lock()
 	defer q.mu.Unlock()
diff --git a/pkg/sentry/fsimpl/devpts/queue.go b/pkg/sentry/fsimpl/devpts/queue.go
index dffb4232c1..331c139977 100644
--- a/pkg/sentry/fsimpl/devpts/queue.go
+++ b/pkg/sentry/fsimpl/devpts/queue.go
@@ -102,8 +102,7 @@ func (q *queue) readableSize(ctx context.Context, io usermem.IO, args arch.Sysca
 // as whether the read caused more readable data to become available (whether
 // data was pushed from the wait buffer to the read buffer).
 //
-// Preconditions:
-// * l.termiosMu must be held for reading.
+// Preconditions: l.termiosMu must be held for reading.
 func (q *queue) read(ctx context.Context, dst usermem.IOSequence, l *lineDiscipline) (int64, bool, error) {
 	q.mu.Lock()
 	defer q.mu.Unlock()
@@ -143,8 +142,7 @@ func (q *queue) read(ctx context.Context, dst usermem.IOSequence, l *lineDiscipl
 
 // write writes to q from userspace.
 //
-// Preconditions:
-// * l.termiosMu must be held for reading.
+// Preconditions: l.termiosMu must be held for reading.
 func (q *queue) write(ctx context.Context, src usermem.IOSequence, l *lineDiscipline) (int64, error) {
 	q.mu.Lock()
 	defer q.mu.Unlock()
@@ -186,8 +184,7 @@ func (q *queue) write(ctx context.Context, src usermem.IOSequence, l *lineDiscip
 
 // writeBytes writes to q from b.
 //
-// Preconditions:
-// * l.termiosMu must be held for reading.
+// Preconditions: l.termiosMu must be held for reading.
 func (q *queue) writeBytes(b []byte, l *lineDiscipline) {
 	q.mu.Lock()
 	defer q.mu.Unlock()
diff --git a/pkg/sentry/fsimpl/ext/filesystem.go b/pkg/sentry/fsimpl/ext/filesystem.go
index c714ddf739..8565d1a664 100644
--- a/pkg/sentry/fsimpl/ext/filesystem.go
+++ b/pkg/sentry/fsimpl/ext/filesystem.go
@@ -81,9 +81,9 @@ var _ vfs.FilesystemImpl = (*filesystem)(nil)
 // stepLocked is loosely analogous to fs/namei.c:walk_component().
 //
 // Preconditions:
-//     - filesystem.mu must be locked (for writing if write param is true).
-//     - !rp.Done().
-//     - inode == vfsd.Impl().(*Dentry).inode.
+// * filesystem.mu must be locked (for writing if write param is true).
+// * !rp.Done().
+// * inode == vfsd.Impl().(*Dentry).inode.
 func stepLocked(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentry, inode *inode, write bool) (*vfs.Dentry, *inode, error) {
 	if !inode.isDir() {
 		return nil, nil, syserror.ENOTDIR
@@ -166,7 +166,7 @@ func stepLocked(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentry, in
 // walkLocked is loosely analogous to Linux's fs/namei.c:path_lookupat().
 //
 // Preconditions:
-//     - filesystem.mu must be locked (for writing if write param is true).
+// * filesystem.mu must be locked (for writing if write param is true).
 func walkLocked(ctx context.Context, rp *vfs.ResolvingPath, write bool) (*vfs.Dentry, *inode, error) {
 	vfsd := rp.Start()
 	inode := vfsd.Impl().(*dentry).inode
@@ -194,8 +194,8 @@ func walkLocked(ctx context.Context, rp *vfs.ResolvingPath, write bool) (*vfs.De
 // walkParentLocked is loosely analogous to Linux's fs/namei.c:path_parentat().
 //
 // Preconditions:
-//     - filesystem.mu must be locked (for writing if write param is true).
-//     - !rp.Done().
+// * filesystem.mu must be locked (for writing if write param is true).
+// * !rp.Done().
 func walkParentLocked(ctx context.Context, rp *vfs.ResolvingPath, write bool) (*vfs.Dentry, *inode, error) {
 	vfsd := rp.Start()
 	inode := vfsd.Impl().(*dentry).inode
diff --git a/pkg/sentry/fsimpl/gofer/directory.go b/pkg/sentry/fsimpl/gofer/directory.go
index 40dce553eb..91d2ae1998 100644
--- a/pkg/sentry/fsimpl/gofer/directory.go
+++ b/pkg/sentry/fsimpl/gofer/directory.go
@@ -34,8 +34,11 @@ func (d *dentry) isDir() bool {
 	return d.fileType() == linux.S_IFDIR
 }
 
-// Preconditions: filesystem.renameMu must be locked. d.dirMu must be locked.
-// d.isDir(). child must be a newly-created dentry that has never had a parent.
+// Preconditions:
+// * filesystem.renameMu must be locked.
+// * d.dirMu must be locked.
+// * d.isDir().
+// * child must be a newly-created dentry that has never had a parent.
 func (d *dentry) cacheNewChildLocked(child *dentry, name string) {
 	d.IncRef() // reference held by child on its parent
 	child.parent = d
@@ -46,7 +49,9 @@ func (d *dentry) cacheNewChildLocked(child *dentry, name string) {
 	d.children[name] = child
 }
 
-// Preconditions: d.dirMu must be locked. d.isDir().
+// Preconditions:
+// * d.dirMu must be locked.
+// * d.isDir().
 func (d *dentry) cacheNegativeLookupLocked(name string) {
 	// Don't cache negative lookups if InteropModeShared is in effect (since
 	// this makes remote lookup unavoidable), or if d.isSynthetic() (in which
@@ -79,8 +84,10 @@ type createSyntheticOpts struct {
 // createSyntheticChildLocked creates a synthetic file with the given name
 // in d.
 //
-// Preconditions: d.dirMu must be locked. d.isDir(). d does not already contain
-// a child with the given name.
+// Preconditions:
+// * d.dirMu must be locked.
+// * d.isDir().
+// * d does not already contain a child with the given name.
 func (d *dentry) createSyntheticChildLocked(opts *createSyntheticOpts) {
 	child := &dentry{
 		refs:      1, // held by d
@@ -151,7 +158,9 @@ func (fd *directoryFD) IterDirents(ctx context.Context, cb vfs.IterDirentsCallba
 	return nil
 }
 
-// Preconditions: d.isDir(). There exists at least one directoryFD representing d.
+// Preconditions:
+// * d.isDir().
+// * There exists at least one directoryFD representing d.
 func (d *dentry) getDirents(ctx context.Context) ([]vfs.Dirent, error) {
 	// NOTE(b/135560623): 9P2000.L's readdir does not specify behavior in the
 	// presence of concurrent mutation of an iterated directory, so
diff --git a/pkg/sentry/fsimpl/gofer/filesystem.go b/pkg/sentry/fsimpl/gofer/filesystem.go
index 1b6fa4e148..4d581fc299 100644
--- a/pkg/sentry/fsimpl/gofer/filesystem.go
+++ b/pkg/sentry/fsimpl/gofer/filesystem.go
@@ -115,9 +115,12 @@ func putDentrySlice(ds *[]*dentry) {
 // Dentries which may become cached as a result of the traversal are appended
 // to *ds.
 //
-// Preconditions: fs.renameMu must be locked. d.dirMu must be locked.
-// !rp.Done(). If !d.cachedMetadataAuthoritative(), then d's cached metadata
-// must be up to date.
+// Preconditions:
+// * fs.renameMu must be locked.
+// * d.dirMu must be locked.
+// * !rp.Done().
+// * If !d.cachedMetadataAuthoritative(), then d's cached metadata must be up
+//   to date.
 //
 // Postconditions: The returned dentry's cached metadata is up to date.
 func (fs *filesystem) stepLocked(ctx context.Context, rp *vfs.ResolvingPath, d *dentry, mayFollowSymlinks bool, ds **[]*dentry) (*dentry, error) {
@@ -185,8 +188,11 @@ afterSymlink:
 // getChildLocked returns a dentry representing the child of parent with the
 // given name. If no such child exists, getChildLocked returns (nil, nil).
 //
-// Preconditions: fs.renameMu must be locked. parent.dirMu must be locked.
-// parent.isDir(). name is not "." or "..".
+// Preconditions:
+// * fs.renameMu must be locked.
+// * parent.dirMu must be locked.
+// * parent.isDir().
+// * name is not "." or "..".
 //
 // Postconditions: If getChildLocked returns a non-nil dentry, its cached
 // metadata is up to date.
@@ -206,7 +212,8 @@ func (fs *filesystem) getChildLocked(ctx context.Context, vfsObj *vfs.VirtualFil
 	return fs.revalidateChildLocked(ctx, vfsObj, parent, name, child, ds)
 }
 
-// Preconditions: As for getChildLocked. !parent.isSynthetic().
+// Preconditions: Same as getChildLocked, plus:
+// * !parent.isSynthetic().
 func (fs *filesystem) revalidateChildLocked(ctx context.Context, vfsObj *vfs.VirtualFilesystem, parent *dentry, name string, child *dentry, ds **[]*dentry) (*dentry, error) {
 	if child != nil {
 		// Need to lock child.metadataMu because we might be updating child
@@ -279,9 +286,11 @@ func (fs *filesystem) revalidateChildLocked(ctx context.Context, vfsObj *vfs.Vir
 // rp.Start().Impl().(*dentry)). It does not check that the returned directory
 // is searchable by the provider of rp.
 //
-// Preconditions: fs.renameMu must be locked. !rp.Done(). If
-// !d.cachedMetadataAuthoritative(), then d's cached metadata must be up to
-// date.
+// Preconditions:
+// * fs.renameMu must be locked.
+// * !rp.Done().
+// * If !d.cachedMetadataAuthoritative(), then d's cached metadata must be up
+//   to date.
 func (fs *filesystem) walkParentDirLocked(ctx context.Context, rp *vfs.ResolvingPath, d *dentry, ds **[]*dentry) (*dentry, error) {
 	for !rp.Final() {
 		d.dirMu.Lock()
@@ -328,8 +337,9 @@ func (fs *filesystem) resolveLocked(ctx context.Context, rp *vfs.ResolvingPath,
 // createInRemoteDir (if the parent directory is a real remote directory) or
 // createInSyntheticDir (if the parent directory is synthetic) to do so.
 //
-// Preconditions: !rp.Done(). For the final path component in rp,
-// !rp.ShouldFollowSymlink().
+// Preconditions:
+// * !rp.Done().
+// * For the final path component in rp, !rp.ShouldFollowSymlink().
 func (fs *filesystem) doCreateAt(ctx context.Context, rp *vfs.ResolvingPath, dir bool, createInRemoteDir func(parent *dentry, name string, ds **[]*dentry) error, createInSyntheticDir func(parent *dentry, name string) error) error {
 	var ds *[]*dentry
 	fs.renameMu.RLock()
@@ -1087,8 +1097,10 @@ retry:
 	return &fd.vfsfd, nil
 }
 
-// Preconditions: d.fs.renameMu must be locked. d.dirMu must be locked.
-// !d.isSynthetic().
+// Preconditions:
+// * d.fs.renameMu must be locked.
+// * d.dirMu must be locked.
+// * !d.isSynthetic().
 func (d *dentry) createAndOpenChildLocked(ctx context.Context, rp *vfs.ResolvingPath, opts *vfs.OpenOptions, ds **[]*dentry) (*vfs.FileDescription, error) {
 	if err := d.checkPermissions(rp.Credentials(), vfs.MayWrite); err != nil {
 		return nil, err
diff --git a/pkg/sentry/fsimpl/gofer/gofer.go b/pkg/sentry/fsimpl/gofer/gofer.go
index 63e589859c..c6696b9d82 100644
--- a/pkg/sentry/fsimpl/gofer/gofer.go
+++ b/pkg/sentry/fsimpl/gofer/gofer.go
@@ -1418,7 +1418,9 @@ func (d *dentry) userXattrSupported() bool {
 	return filetype == linux.ModeRegular || filetype == linux.ModeDirectory
 }
 
-// Preconditions: !d.isSynthetic(). d.isRegularFile() || d.isDir().
+// Preconditions:
+// * !d.isSynthetic().
+// * d.isRegularFile() || d.isDir().
 func (d *dentry) ensureSharedHandle(ctx context.Context, read, write, trunc bool) error {
 	// O_TRUNC unconditionally requires us to obtain a new handle (opened with
 	// O_TRUNC).
diff --git a/pkg/sentry/fsimpl/gofer/time.go b/pkg/sentry/fsimpl/gofer/time.go
index e59d07e900..98733253d9 100644
--- a/pkg/sentry/fsimpl/gofer/time.go
+++ b/pkg/sentry/fsimpl/gofer/time.go
@@ -52,8 +52,9 @@ func (d *dentry) touchAtime(mnt *vfs.Mount) {
 	mnt.EndWrite()
 }
 
-// Preconditions: d.cachedMetadataAuthoritative() == true. The caller has
-// successfully called vfs.Mount.CheckBeginWrite().
+// Preconditions:
+// * d.cachedMetadataAuthoritative() == true.
+// * The caller has successfully called vfs.Mount.CheckBeginWrite().
 func (d *dentry) touchCtime() {
 	now := d.fs.clock.Now().Nanoseconds()
 	d.metadataMu.Lock()
@@ -61,8 +62,9 @@ func (d *dentry) touchCtime() {
 	d.metadataMu.Unlock()
 }
 
-// Preconditions: d.cachedMetadataAuthoritative() == true. The caller has
-// successfully called vfs.Mount.CheckBeginWrite().
+// Preconditions:
+// * d.cachedMetadataAuthoritative() == true.
+// * The caller has successfully called vfs.Mount.CheckBeginWrite().
 func (d *dentry) touchCMtime() {
 	now := d.fs.clock.Now().Nanoseconds()
 	d.metadataMu.Lock()
@@ -72,8 +74,9 @@ func (d *dentry) touchCMtime() {
 	d.metadataMu.Unlock()
 }
 
-// Preconditions: d.cachedMetadataAuthoritative() == true. The caller has
-// locked d.metadataMu.
+// Preconditions:
+// * d.cachedMetadataAuthoritative() == true.
+// * The caller has locked d.metadataMu.
 func (d *dentry) touchCMtimeLocked() {
 	now := d.fs.clock.Now().Nanoseconds()
 	atomic.StoreInt64(&d.mtime, now)
diff --git a/pkg/sentry/fsimpl/kernfs/filesystem.go b/pkg/sentry/fsimpl/kernfs/filesystem.go
index 3e5192edd4..e5d6b5c35c 100644
--- a/pkg/sentry/fsimpl/kernfs/filesystem.go
+++ b/pkg/sentry/fsimpl/kernfs/filesystem.go
@@ -32,7 +32,9 @@ import (
 //
 // stepExistingLocked is loosely analogous to fs/namei.c:walk_component().
 //
-// Preconditions: Filesystem.mu must be locked for at least reading. !rp.Done().
+// Preconditions:
+// * Filesystem.mu must be locked for at least reading.
+// * !rp.Done().
 //
 // Postcondition: Caller must call fs.processDeferredDecRefs*.
 func (fs *Filesystem) stepExistingLocked(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentry, mayFollowSymlinks bool) (*vfs.Dentry, error) {
@@ -107,8 +109,11 @@ afterSymlink:
 // or vfs.ResolvingPath.ResolveChild(name) returns childVFSD (which may be
 // nil) to verify that the returned child (or lack thereof) is correct.
 //
-// Preconditions: Filesystem.mu must be locked for at least reading.
-// parent.dirMu must be locked. parent.isDir(). name is not "." or "..".
+// Preconditions:
+// * Filesystem.mu must be locked for at least reading.
+// * parent.dirMu must be locked.
+// * parent.isDir().
+// * name is not "." or "..".
 //
 // Postconditions: Caller must call fs.processDeferredDecRefs*.
 func (fs *Filesystem) revalidateChildLocked(ctx context.Context, vfsObj *vfs.VirtualFilesystem, parent *Dentry, name string, child *Dentry) (*Dentry, error) {
@@ -171,7 +176,9 @@ func (fs *Filesystem) walkExistingLocked(ctx context.Context, rp *vfs.ResolvingP
 // walkParentDirLocked is loosely analogous to Linux's
 // fs/namei.c:path_parentat().
 //
-// Preconditions: Filesystem.mu must be locked for at least reading. !rp.Done().
+// Preconditions:
+// * Filesystem.mu must be locked for at least reading.
+// * !rp.Done().
 //
 // Postconditions: Caller must call fs.processDeferredDecRefs*.
 func (fs *Filesystem) walkParentDirLocked(ctx context.Context, rp *vfs.ResolvingPath) (*vfs.Dentry, Inode, error) {
@@ -193,8 +200,10 @@ func (fs *Filesystem) walkParentDirLocked(ctx context.Context, rp *vfs.Resolving
 // checkCreateLocked checks that a file named rp.Component() may be created in
 // directory parentVFSD, then returns rp.Component().
 //
-// Preconditions: Filesystem.mu must be locked for at least reading. parentInode
-// == parentVFSD.Impl().(*Dentry).Inode. isDir(parentInode) == true.
+// Preconditions:
+// * Filesystem.mu must be locked for at least reading.
+// * parentInode == parentVFSD.Impl().(*Dentry).Inode.
+// * isDir(parentInode) == true.
 func checkCreateLocked(ctx context.Context, rp *vfs.ResolvingPath, parentVFSD *vfs.Dentry, parentInode Inode) (string, error) {
 	if err := parentInode.CheckPermissions(ctx, rp.Credentials(), vfs.MayWrite|vfs.MayExec); err != nil {
 		return "", err
diff --git a/pkg/sentry/fsimpl/overlay/directory.go b/pkg/sentry/fsimpl/overlay/directory.go
index 6a79f7ffe6..b1b292e835 100644
--- a/pkg/sentry/fsimpl/overlay/directory.go
+++ b/pkg/sentry/fsimpl/overlay/directory.go
@@ -29,7 +29,9 @@ func (d *dentry) isDir() bool {
 	return atomic.LoadUint32(&d.mode)&linux.S_IFMT == linux.S_IFDIR
 }
 
-// Preconditions: d.dirMu must be locked. d.isDir().
+// Preconditions:
+// * d.dirMu must be locked.
+// * d.isDir().
 func (d *dentry) collectWhiteoutsForRmdirLocked(ctx context.Context) (map[string]bool, error) {
 	vfsObj := d.fs.vfsfs.VirtualFilesystem()
 	var readdirErr error
diff --git a/pkg/sentry/fsimpl/overlay/filesystem.go b/pkg/sentry/fsimpl/overlay/filesystem.go
index 86d0164b4e..a3cee4047d 100644
--- a/pkg/sentry/fsimpl/overlay/filesystem.go
+++ b/pkg/sentry/fsimpl/overlay/filesystem.go
@@ -110,8 +110,10 @@ func (fs *filesystem) renameMuUnlockAndCheckDrop(ctx context.Context, ds **[]*de
 // Dentries which may have a reference count of zero, and which therefore
 // should be dropped once traversal is complete, are appended to ds.
 //
-// Preconditions: fs.renameMu must be locked. d.dirMu must be locked.
-// !rp.Done().
+// Preconditions:
+// * fs.renameMu must be locked.
+// * d.dirMu must be locked.
+// * !rp.Done().
 func (fs *filesystem) stepLocked(ctx context.Context, rp *vfs.ResolvingPath, d *dentry, mayFollowSymlinks bool, ds **[]*dentry) (*dentry, error) {
 	if !d.isDir() {
 		return nil, syserror.ENOTDIR
@@ -159,7 +161,9 @@ afterSymlink:
 	return child, nil
 }
 
-// Preconditions: fs.renameMu must be locked. d.dirMu must be locked.
+// Preconditions:
+// * fs.renameMu must be locked.
+// * d.dirMu must be locked.
 func (fs *filesystem) getChildLocked(ctx context.Context, parent *dentry, name string, ds **[]*dentry) (*dentry, error) {
 	if child, ok := parent.children[name]; ok {
 		return child, nil
@@ -177,7 +181,9 @@ func (fs *filesystem) getChildLocked(ctx context.Context, parent *dentry, name s
 	return child, nil
 }
 
-// Preconditions: fs.renameMu must be locked. parent.dirMu must be locked.
+// Preconditions:
+// * fs.renameMu must be locked.
+// * parent.dirMu must be locked.
 func (fs *filesystem) lookupLocked(ctx context.Context, parent *dentry, name string) (*dentry, error) {
 	childPath := fspath.Parse(name)
 	child := fs.newDentry()
@@ -300,7 +306,9 @@ func (fs *filesystem) lookupLocked(ctx context.Context, parent *dentry, name str
 // lookupLayerLocked is similar to lookupLocked, but only returns information
 // about the file rather than a dentry.
 //
-// Preconditions: fs.renameMu must be locked. parent.dirMu must be locked.
+// Preconditions:
+// * fs.renameMu must be locked.
+// * parent.dirMu must be locked.
 func (fs *filesystem) lookupLayerLocked(ctx context.Context, parent *dentry, name string) (lookupLayer, error) {
 	childPath := fspath.Parse(name)
 	lookupLayer := lookupLayerNone
@@ -385,7 +393,9 @@ func (ll lookupLayer) existsInOverlay() bool {
 // rp.Start().Impl().(*dentry)). It does not check that the returned directory
 // is searchable by the provider of rp.
 //
-// Preconditions: fs.renameMu must be locked. !rp.Done().
+// Preconditions:
+// * fs.renameMu must be locked.
+// * !rp.Done().
 func (fs *filesystem) walkParentDirLocked(ctx context.Context, rp *vfs.ResolvingPath, d *dentry, ds **[]*dentry) (*dentry, error) {
 	for !rp.Final() {
 		d.dirMu.Lock()
@@ -425,8 +435,9 @@ func (fs *filesystem) resolveLocked(ctx context.Context, rp *vfs.ResolvingPath,
 // doCreateAt checks that creating a file at rp is permitted, then invokes
 // create to do so.
 //
-// Preconditions: !rp.Done(). For the final path component in rp,
-// !rp.ShouldFollowSymlink().
+// Preconditions:
+// * !rp.Done().
+// * For the final path component in rp, !rp.ShouldFollowSymlink().
 func (fs *filesystem) doCreateAt(ctx context.Context, rp *vfs.ResolvingPath, dir bool, create func(parent *dentry, name string, haveUpperWhiteout bool) error) error {
 	var ds *[]*dentry
 	fs.renameMu.RLock()
@@ -851,8 +862,9 @@ func (d *dentry) openCopiedUp(ctx context.Context, rp *vfs.ResolvingPath, opts *
 	return &fd.vfsfd, nil
 }
 
-// Preconditions: parent.dirMu must be locked. parent does not already contain
-// a child named rp.Component().
+// Preconditions:
+// * parent.dirMu must be locked.
+// * parent does not already contain a child named rp.Component().
 func (fs *filesystem) createAndOpenLocked(ctx context.Context, rp *vfs.ResolvingPath, parent *dentry, opts *vfs.OpenOptions, ds **[]*dentry) (*vfs.FileDescription, error) {
 	creds := rp.Credentials()
 	if err := parent.checkPermissions(creds, vfs.MayWrite); err != nil {
diff --git a/pkg/sentry/fsimpl/overlay/overlay.go b/pkg/sentry/fsimpl/overlay/overlay.go
index 75cc006bf8..4b3dfbc017 100644
--- a/pkg/sentry/fsimpl/overlay/overlay.go
+++ b/pkg/sentry/fsimpl/overlay/overlay.go
@@ -482,7 +482,9 @@ func (d *dentry) checkDropLocked(ctx context.Context) {
 
 // destroyLocked destroys the dentry.
 //
-// Preconditions: d.fs.renameMu must be locked for writing. d.refs == 0.
+// Preconditions:
+// * d.fs.renameMu must be locked for writing.
+// * d.refs == 0.
 func (d *dentry) destroyLocked(ctx context.Context) {
 	switch atomic.LoadInt64(&d.refs) {
 	case 0:
diff --git a/pkg/sentry/fsimpl/tmpfs/directory.go b/pkg/sentry/fsimpl/tmpfs/directory.go
index 78b4fc5be6..070c75e687 100644
--- a/pkg/sentry/fsimpl/tmpfs/directory.go
+++ b/pkg/sentry/fsimpl/tmpfs/directory.go
@@ -57,8 +57,9 @@ func (fs *filesystem) newDirectory(kuid auth.KUID, kgid auth.KGID, mode linux.Fi
 	return dir
 }
 
-// Preconditions: filesystem.mu must be locked for writing. dir must not
-// already contain a child with the given name.
+// Preconditions:
+// * filesystem.mu must be locked for writing.
+// * dir must not already contain a child with the given name.
 func (dir *directory) insertChildLocked(child *dentry, name string) {
 	child.parent = &dir.dentry
 	child.name = name
diff --git a/pkg/sentry/fsimpl/tmpfs/filesystem.go b/pkg/sentry/fsimpl/tmpfs/filesystem.go
index b0ec177e69..7924a09115 100644
--- a/pkg/sentry/fsimpl/tmpfs/filesystem.go
+++ b/pkg/sentry/fsimpl/tmpfs/filesystem.go
@@ -39,7 +39,9 @@ func (fs *filesystem) Sync(ctx context.Context) error {
 //
 // stepLocked is loosely analogous to fs/namei.c:walk_component().
 //
-// Preconditions: filesystem.mu must be locked. !rp.Done().
+// Preconditions:
+// * filesystem.mu must be locked.
+// * !rp.Done().
 func stepLocked(ctx context.Context, rp *vfs.ResolvingPath, d *dentry) (*dentry, error) {
 	dir, ok := d.inode.impl.(*directory)
 	if !ok {
@@ -97,7 +99,9 @@ afterSymlink:
 // walkParentDirLocked is loosely analogous to Linux's
 // fs/namei.c:path_parentat().
 //
-// Preconditions: filesystem.mu must be locked. !rp.Done().
+// Preconditions:
+// * filesystem.mu must be locked.
+// * !rp.Done().
 func walkParentDirLocked(ctx context.Context, rp *vfs.ResolvingPath, d *dentry) (*directory, error) {
 	for !rp.Final() {
 		next, err := stepLocked(ctx, rp, d)
@@ -139,8 +143,9 @@ func resolveLocked(ctx context.Context, rp *vfs.ResolvingPath) (*dentry, error)
 // doCreateAt is loosely analogous to a conjunction of Linux's
 // fs/namei.c:filename_create() and done_path_create().
 //
-// Preconditions: !rp.Done(). For the final path component in rp,
-// !rp.ShouldFollowSymlink().
+// Preconditions:
+// * !rp.Done().
+// * For the final path component in rp, !rp.ShouldFollowSymlink().
 func (fs *filesystem) doCreateAt(ctx context.Context, rp *vfs.ResolvingPath, dir bool, create func(parentDir *directory, name string) error) error {
 	fs.mu.Lock()
 	defer fs.mu.Unlock()
diff --git a/pkg/sentry/fsimpl/tmpfs/named_pipe.go b/pkg/sentry/fsimpl/tmpfs/named_pipe.go
index 739350cf08..5b0471ff41 100644
--- a/pkg/sentry/fsimpl/tmpfs/named_pipe.go
+++ b/pkg/sentry/fsimpl/tmpfs/named_pipe.go
@@ -28,8 +28,8 @@ type namedPipe struct {
 }
 
 // Preconditions:
-//   * fs.mu must be locked.
-//   * rp.Mount().CheckBeginWrite() has been called successfully.
+// * fs.mu must be locked.
+// * rp.Mount().CheckBeginWrite() has been called successfully.
 func (fs *filesystem) newNamedPipe(kuid auth.KUID, kgid auth.KGID, mode linux.FileMode) *inode {
 	file := &namedPipe{pipe: pipe.NewVFSPipe(true /* isNamed */, pipe.DefaultPipeSize, usermem.PageSize)}
 	file.inode.init(file, fs, kuid, kgid, linux.S_IFIFO|mode)
diff --git a/pkg/sentry/fsimpl/tmpfs/tmpfs.go b/pkg/sentry/fsimpl/tmpfs/tmpfs.go
index de2af6d018..428f62aaad 100644
--- a/pkg/sentry/fsimpl/tmpfs/tmpfs.go
+++ b/pkg/sentry/fsimpl/tmpfs/tmpfs.go
@@ -340,8 +340,10 @@ func (i *inode) init(impl interface{}, fs *filesystem, kuid auth.KUID, kgid auth
 
 // incLinksLocked increments i's link count.
 //
-// Preconditions: filesystem.mu must be locked for writing. i.nlink != 0.
-// i.nlink < maxLinks.
+// Preconditions:
+// * filesystem.mu must be locked for writing.
+// * i.nlink != 0.
+// * i.nlink < maxLinks.
 func (i *inode) incLinksLocked() {
 	if i.nlink == 0 {
 		panic("tmpfs.inode.incLinksLocked() called with no existing links")
@@ -355,7 +357,9 @@ func (i *inode) incLinksLocked() {
 // decLinksLocked decrements i's link count. If the link count reaches 0, we
 // remove a reference on i as well.
 //
-// Preconditions: filesystem.mu must be locked for writing. i.nlink != 0.
+// Preconditions:
+// * filesystem.mu must be locked for writing.
+// * i.nlink != 0.
 func (i *inode) decLinksLocked(ctx context.Context) {
 	if i.nlink == 0 {
 		panic("tmpfs.inode.decLinksLocked() called with no existing links")
@@ -594,8 +598,9 @@ func (i *inode) touchCMtime() {
 	i.mu.Unlock()
 }
 
-// Preconditions: The caller has called vfs.Mount.CheckBeginWrite() and holds
-// inode.mu.
+// Preconditions:
+// * The caller has called vfs.Mount.CheckBeginWrite().
+// * inode.mu must be locked.
 func (i *inode) touchCMtimeLocked() {
 	now := i.fs.clock.Now().Nanoseconds()
 	atomic.StoreInt64(&i.mtime, now)
diff --git a/pkg/sentry/kernel/kernel.go b/pkg/sentry/kernel/kernel.go
index 1028d13c68..2e0175e36a 100644
--- a/pkg/sentry/kernel/kernel.go
+++ b/pkg/sentry/kernel/kernel.go
@@ -1067,8 +1067,9 @@ func (k *Kernel) Start() error {
 
 // pauseTimeLocked pauses all Timers and Timekeeper updates.
 //
-// Preconditions: Any task goroutines running in k must be stopped. k.extMu
-// must be locked.
+// Preconditions:
+// * Any task goroutines running in k must be stopped.
+// * k.extMu must be locked.
 func (k *Kernel) pauseTimeLocked(ctx context.Context) {
 	// k.cpuClockTicker may be nil since Kernel.SaveTo() may be called before
 	// Kernel.Start().
@@ -1111,8 +1112,9 @@ func (k *Kernel) pauseTimeLocked(ctx context.Context) {
 // pauseTimeLocked has not been previously called, resumeTimeLocked has no
 // effect.
 //
-// Preconditions: Any task goroutines running in k must be stopped. k.extMu
-// must be locked.
+// Preconditions:
+// * Any task goroutines running in k must be stopped.
+// * k.extMu must be locked.
 func (k *Kernel) resumeTimeLocked(ctx context.Context) {
 	if k.cpuClockTicker != nil {
 		k.cpuClockTicker.Resume()
diff --git a/pkg/sentry/kernel/ptrace.go b/pkg/sentry/kernel/ptrace.go
index 619b0cb7c6..50df179c37 100644
--- a/pkg/sentry/kernel/ptrace.go
+++ b/pkg/sentry/kernel/ptrace.go
@@ -224,8 +224,9 @@ func (s *ptraceStop) Killable() bool {
 // beginPtraceStopLocked does not signal t's tracer or wake it if it is
 // waiting.
 //
-// Preconditions: The TaskSet mutex must be locked. The caller must be running
-// on the task goroutine.
+// Preconditions:
+// * The TaskSet mutex must be locked.
+// * The caller must be running on the task goroutine.
 func (t *Task) beginPtraceStopLocked() bool {
 	t.tg.signalHandlers.mu.Lock()
 	defer t.tg.signalHandlers.mu.Unlock()
@@ -270,8 +271,9 @@ func (t *Task) ptraceTrapLocked(code int32) {
 // ptraceStop, temporarily preventing it from being removed by a concurrent
 // Task.Kill, and returns true. Otherwise it returns false.
 //
-// Preconditions: The TaskSet mutex must be locked. The caller must be running
-// on the task goroutine of t's tracer.
+// Preconditions:
+// * The TaskSet mutex must be locked.
+// * The caller must be running on the task goroutine of t's tracer.
 func (t *Task) ptraceFreeze() bool {
 	t.tg.signalHandlers.mu.Lock()
 	defer t.tg.signalHandlers.mu.Unlock()
@@ -301,8 +303,9 @@ func (t *Task) ptraceUnfreeze() {
 	t.ptraceUnfreezeLocked()
 }
 
-// Preconditions: t must be in a frozen ptraceStop. t's signal mutex must be
-// locked.
+// Preconditions:
+// * t must be in a frozen ptraceStop.
+// * t's signal mutex must be locked.
 func (t *Task) ptraceUnfreezeLocked() {
 	// Do this even if the task has been killed to ensure a panic if t.stop is
 	// nil or not a ptraceStop.
@@ -497,8 +500,9 @@ func (t *Task) forgetTracerLocked() {
 // ptraceSignalLocked is called after signal dequeueing to check if t should
 // enter ptrace signal-delivery-stop.
 //
-// Preconditions: The signal mutex must be locked. The caller must be running
-// on the task goroutine.
+// Preconditions:
+// * The signal mutex must be locked.
+// * The caller must be running on the task goroutine.
 func (t *Task) ptraceSignalLocked(info *arch.SignalInfo) bool {
 	if linux.Signal(info.Signo) == linux.SIGKILL {
 		return false
@@ -828,8 +832,9 @@ func (t *Task) ptraceInterrupt(target *Task) error {
 	return nil
 }
 
-// Preconditions: The TaskSet mutex must be locked for writing. t must have a
-// tracer.
+// Preconditions:
+// * The TaskSet mutex must be locked for writing.
+// * t must have a tracer.
 func (t *Task) ptraceSetOptionsLocked(opts uintptr) error {
 	const valid = uintptr(linux.PTRACE_O_EXITKILL |
 		linux.PTRACE_O_TRACESYSGOOD |
diff --git a/pkg/sentry/kernel/rseq.go b/pkg/sentry/kernel/rseq.go
index 18416643b8..2a9023fdfd 100644
--- a/pkg/sentry/kernel/rseq.go
+++ b/pkg/sentry/kernel/rseq.go
@@ -173,8 +173,10 @@ func (t *Task) OldRSeqCPUAddr() usermem.Addr {
 // SetOldRSeqCPUAddr replaces the address that old rseq will keep updated with
 // t's CPU number.
 //
-// Preconditions: t.RSeqAvailable() == true. The caller must be running on the
-// task goroutine. t's AddressSpace must be active.
+// Preconditions:
+// * t.RSeqAvailable() == true.
+// * The caller must be running on the task goroutine.
+// * t's AddressSpace must be active.
 func (t *Task) SetOldRSeqCPUAddr(addr usermem.Addr) error {
 	t.oldRSeqCPUAddr = addr
 
@@ -189,8 +191,9 @@ func (t *Task) SetOldRSeqCPUAddr(addr usermem.Addr) error {
 	return nil
 }
 
-// Preconditions: The caller must be running on the task goroutine. t's
-// AddressSpace must be active.
+// Preconditions:
+// * The caller must be running on the task goroutine.
+// * t's AddressSpace must be active.
 func (t *Task) rseqUpdateCPU() error {
 	if t.rseqAddr == 0 && t.oldRSeqCPUAddr == 0 {
 		t.rseqCPU = -1
@@ -209,8 +212,9 @@ func (t *Task) rseqUpdateCPU() error {
 	return oerr
 }
 
-// Preconditions: The caller must be running on the task goroutine. t's
-// AddressSpace must be active.
+// Preconditions:
+// * The caller must be running on the task goroutine.
+// * t's AddressSpace must be active.
 func (t *Task) oldRSeqCopyOutCPU() error {
 	if t.oldRSeqCPUAddr == 0 {
 		return nil
@@ -222,8 +226,9 @@ func (t *Task) oldRSeqCopyOutCPU() error {
 	return err
 }
 
-// Preconditions: The caller must be running on the task goroutine. t's
-// AddressSpace must be active.
+// Preconditions:
+// * The caller must be running on the task goroutine.
+// * t's AddressSpace must be active.
 func (t *Task) rseqCopyOutCPU() error {
 	if t.rseqAddr == 0 {
 		return nil
@@ -240,8 +245,9 @@ func (t *Task) rseqCopyOutCPU() error {
 	return err
 }
 
-// Preconditions: The caller must be running on the task goroutine. t's
-// AddressSpace must be active.
+// Preconditions:
+// * The caller must be running on the task goroutine.
+// * t's AddressSpace must be active.
 func (t *Task) rseqClearCPU() error {
 	buf := t.CopyScratchBuffer(8)
 	// CPUIDStart and CPUID are the first two fields in linux.RSeq.
@@ -269,8 +275,9 @@ func (t *Task) rseqClearCPU() error {
 //
 // See kernel/rseq.c:rseq_ip_fixup for reference.
 //
-// Preconditions: The caller must be running on the task goroutine. t's
-// AddressSpace must be active.
+// Preconditions:
+// * The caller must be running on the task goroutine.
+// * t's AddressSpace must be active.
 func (t *Task) rseqAddrInterrupt() {
 	if t.rseqAddr == 0 {
 		return
diff --git a/pkg/sentry/kernel/task_exec.go b/pkg/sentry/kernel/task_exec.go
index 5e4fb3e3af..412d471d35 100644
--- a/pkg/sentry/kernel/task_exec.go
+++ b/pkg/sentry/kernel/task_exec.go
@@ -237,9 +237,10 @@ func (r *runSyscallAfterExecStop) execute(t *Task) taskRunState {
 // promoteLocked makes t the leader of its thread group. If t is already the
 // thread group leader, promoteLocked is a no-op.
 //
-// Preconditions: All other tasks in t's thread group, including the existing
-// leader (if it is not t), have reached TaskExitZombie. The TaskSet mutex must
-// be locked for writing.
+// Preconditions:
+// * All other tasks in t's thread group, including the existing leader (if it
+//   is not t), have reached TaskExitZombie.
+// * The TaskSet mutex must be locked for writing.
 func (t *Task) promoteLocked() {
 	oldLeader := t.tg.leader
 	if t == oldLeader {
diff --git a/pkg/sentry/kernel/task_sched.go b/pkg/sentry/kernel/task_sched.go
index 09366b60c6..52c55d13d1 100644
--- a/pkg/sentry/kernel/task_sched.go
+++ b/pkg/sentry/kernel/task_sched.go
@@ -133,9 +133,10 @@ func (t *Task) accountTaskGoroutineEnter(state TaskGoroutineState) {
 	}
 }
 
-// Preconditions: The caller must be running on the task goroutine, and leaving
-// a state indicated by a previous call to
-// t.accountTaskGoroutineEnter(state).
+// Preconditions:
+// * The caller must be running on the task goroutine
+// * The caller must be leaving a state indicated by a previous call to
+//   t.accountTaskGoroutineEnter(state).
 func (t *Task) accountTaskGoroutineLeave(state TaskGoroutineState) {
 	if state != TaskGoroutineRunningApp {
 		// Task is unblocking/continuing.
@@ -191,8 +192,8 @@ func (tg *ThreadGroup) CPUStats() usage.CPUStats {
 	return tg.cpuStatsAtLocked(tg.leader.k.CPUClockNow())
 }
 
-// Preconditions: As for TaskGoroutineSchedInfo.userTicksAt. The TaskSet mutex
-// must be locked.
+// Preconditions: Same as TaskGoroutineSchedInfo.userTicksAt, plus:
+// * The TaskSet mutex must be locked.
 func (tg *ThreadGroup) cpuStatsAtLocked(now uint64) usage.CPUStats {
 	stats := tg.exitedCPUStats
 	// Account for live tasks.
diff --git a/pkg/sentry/kernel/task_signals.go b/pkg/sentry/kernel/task_signals.go
index d6a2040bc9..feaa385962 100644
--- a/pkg/sentry/kernel/task_signals.go
+++ b/pkg/sentry/kernel/task_signals.go
@@ -319,8 +319,9 @@ func (t *Task) SignalReturn(rt bool) (*SyscallControl, error) {
 
 // Sigtimedwait implements the semantics of sigtimedwait(2).
 //
-// Preconditions: The caller must be running on the task goroutine. t.exitState
-// < TaskExitZombie.
+// Preconditions:
+// * The caller must be running on the task goroutine.
+// * t.exitState < TaskExitZombie.
 func (t *Task) Sigtimedwait(set linux.SignalSet, timeout time.Duration) (*arch.SignalInfo, error) {
 	// set is the set of signals we're interested in; invert it to get the set
 	// of signals to block.
@@ -584,8 +585,9 @@ func (t *Task) SignalMask() linux.SignalSet {
 
 // SetSignalMask sets t's signal mask.
 //
-// Preconditions: SetSignalMask can only be called by the task goroutine.
-// t.exitState < TaskExitZombie.
+// Preconditions:
+// * The caller must be running on the task goroutine.
+// * t.exitState < TaskExitZombie.
 func (t *Task) SetSignalMask(mask linux.SignalSet) {
 	// By precondition, t prevents t.tg from completing an execve and mutating
 	// t.tg.signalHandlers, so we can skip the TaskSet mutex.
@@ -631,7 +633,7 @@ func (t *Task) setSignalMaskLocked(mask linux.SignalSet) {
 // SetSavedSignalMask sets the saved signal mask (see Task.savedSignalMask's
 // comment).
 //
-// Preconditions: SetSavedSignalMask can only be called by the task goroutine.
+// Preconditions: The caller must be running on the task goroutine.
 func (t *Task) SetSavedSignalMask(mask linux.SignalSet) {
 	t.savedSignalMask = mask
 	t.haveSavedSignalMask = true
diff --git a/pkg/sentry/kernel/task_stop.go b/pkg/sentry/kernel/task_stop.go
index 296735d321..a35948a5f2 100644
--- a/pkg/sentry/kernel/task_stop.go
+++ b/pkg/sentry/kernel/task_stop.go
@@ -99,8 +99,9 @@ type TaskStop interface {
 
 // beginInternalStop indicates the start of an internal stop that applies to t.
 //
-// Preconditions: The task must not already be in an internal stop (i.e. t.stop
-// == nil). The caller must be running on the task goroutine.
+// Preconditions:
+// * The caller must be running on the task goroutine.
+// * The task must not already be in an internal stop (i.e. t.stop == nil).
 func (t *Task) beginInternalStop(s TaskStop) {
 	t.tg.pidns.owner.mu.RLock()
 	defer t.tg.pidns.owner.mu.RUnlock()
@@ -109,8 +110,8 @@ func (t *Task) beginInternalStop(s TaskStop) {
 	t.beginInternalStopLocked(s)
 }
 
-// Preconditions: The signal mutex must be locked. All preconditions for
-// Task.beginInternalStop also apply.
+// Preconditions: Same as beginInternalStop, plus:
+// * The signal mutex must be locked.
 func (t *Task) beginInternalStopLocked(s TaskStop) {
 	if t.stop != nil {
 		panic(fmt.Sprintf("Attempting to enter internal stop %#v when already in internal stop %#v", s, t.stop))
@@ -128,8 +129,9 @@ func (t *Task) beginInternalStopLocked(s TaskStop) {
 // t.stop, which is why there is no endInternalStop that locks the signal mutex
 // for you.
 //
-// Preconditions: The signal mutex must be locked. The task must be in an
-// internal stop (i.e. t.stop != nil).
+// Preconditions:
+// * The signal mutex must be locked.
+// * The task must be in an internal stop (i.e. t.stop != nil).
 func (t *Task) endInternalStopLocked() {
 	if t.stop == nil {
 		panic("Attempting to leave non-existent internal stop")
diff --git a/pkg/sentry/kernel/task_usermem.go b/pkg/sentry/kernel/task_usermem.go
index b02044ad24..4550b9f89a 100644
--- a/pkg/sentry/kernel/task_usermem.go
+++ b/pkg/sentry/kernel/task_usermem.go
@@ -143,8 +143,9 @@ func (t *Task) CopyInVector(addr usermem.Addr, maxElemSize, maxTotalSize int) ([
 // CopyOutIovecs converts src to an array of struct iovecs and copies it to the
 // memory mapped at addr.
 //
-// Preconditions: As for usermem.IO.CopyOut. The caller must be running on the
-// task goroutine. t's AddressSpace must be active.
+// Preconditions: Same as usermem.IO.CopyOut, plus:
+// * The caller must be running on the task goroutine.
+// * t's AddressSpace must be active.
 func (t *Task) CopyOutIovecs(addr usermem.Addr, src usermem.AddrRangeSeq) error {
 	switch t.Arch().Width() {
 	case 8:
@@ -191,8 +192,9 @@ func (t *Task) CopyOutIovecs(addr usermem.Addr, src usermem.AddrRangeSeq) error
 // combined length of all AddrRanges would otherwise exceed this amount, ranges
 // beyond MAX_RW_COUNT are silently truncated.
 //
-// Preconditions: As for usermem.IO.CopyIn. The caller must be running on the
-// task goroutine. t's AddressSpace must be active.
+// Preconditions: Same as usermem.IO.CopyIn, plus:
+// * The caller must be running on the task goroutine.
+// * t's AddressSpace must be active.
 func (t *Task) CopyInIovecs(addr usermem.Addr, numIovecs int) (usermem.AddrRangeSeq, error) {
 	if numIovecs == 0 {
 		return usermem.AddrRangeSeq{}, nil
@@ -284,7 +286,7 @@ func (t *Task) SingleIOSequence(addr usermem.Addr, length int, opts usermem.IOOp
 //
 // IovecsIOSequence is analogous to Linux's lib/iov_iter.c:import_iovec().
 //
-// Preconditions: As for Task.CopyInIovecs.
+// Preconditions: Same as Task.CopyInIovecs.
 func (t *Task) IovecsIOSequence(addr usermem.Addr, iovcnt int, opts usermem.IOOpts) (usermem.IOSequence, error) {
 	if iovcnt < 0 || iovcnt > linux.UIO_MAXIOV {
 		return usermem.IOSequence{}, syserror.EINVAL
diff --git a/pkg/sentry/kernel/time/time.go b/pkg/sentry/kernel/time/time.go
index e959700f27..f61a8e164a 100644
--- a/pkg/sentry/kernel/time/time.go
+++ b/pkg/sentry/kernel/time/time.go
@@ -616,8 +616,10 @@ func (t *Timer) Swap(s Setting) (Time, Setting) {
 // Timer's Clock) at which the Setting was changed. Setting s.Enabled to true
 // starts the timer, while setting s.Enabled to false stops it.
 //
-// Preconditions: The Timer must not be paused. f cannot call any Timer methods
-// since it is called with the Timer mutex locked.
+// Preconditions:
+// * The Timer must not be paused.
+// * f cannot call any Timer methods since it is called with the Timer mutex
+//   locked.
 func (t *Timer) SwapAnd(s Setting, f func()) (Time, Setting) {
 	now := t.clock.Now()
 	t.mu.Lock()
diff --git a/pkg/sentry/kernel/vdso.go b/pkg/sentry/kernel/vdso.go
index 290c32466c..e44a139b34 100644
--- a/pkg/sentry/kernel/vdso.go
+++ b/pkg/sentry/kernel/vdso.go
@@ -73,13 +73,10 @@ type VDSOParamPage struct {
 // NewVDSOParamPage returns a VDSOParamPage.
 //
 // Preconditions:
-//
 // * fr is a single page allocated from mfp.MemoryFile(). VDSOParamPage does
 //   not take ownership of fr; it must remain allocated for the lifetime of the
 //   VDSOParamPage.
-//
 // * VDSOParamPage must be the only writer to fr.
-//
 // * mfp.MemoryFile().MapInternal(fr) must return a single safemem.Block.
 func NewVDSOParamPage(mfp pgalloc.MemoryFileProvider, fr memmap.FileRange) *VDSOParamPage {
 	return &VDSOParamPage{mfp: mfp, fr: fr}
diff --git a/pkg/sentry/loader/elf.go b/pkg/sentry/loader/elf.go
index 20dd1cc212..d4610ec3b5 100644
--- a/pkg/sentry/loader/elf.go
+++ b/pkg/sentry/loader/elf.go
@@ -402,8 +402,7 @@ type loadedELF struct {
 //
 // It does not load the ELF interpreter, or return any auxv entries.
 //
-// Preconditions:
-//  * f is an ELF file
+// Preconditions: f is an ELF file.
 func loadParsedELF(ctx context.Context, m *mm.MemoryManager, f fsbridge.File, info elfInfo, sharedLoadOffset usermem.Addr) (loadedELF, error) {
 	first := true
 	var start, end usermem.Addr
@@ -571,8 +570,8 @@ func loadParsedELF(ctx context.Context, m *mm.MemoryManager, f fsbridge.File, in
 // It does not load the ELF interpreter, or return any auxv entries.
 //
 // Preconditions:
-//  * f is an ELF file
-//  * f is the first ELF loaded into m
+// * f is an ELF file.
+// * f is the first ELF loaded into m.
 func loadInitialELF(ctx context.Context, m *mm.MemoryManager, fs *cpuid.FeatureSet, f fsbridge.File) (loadedELF, arch.Context, error) {
 	info, err := parseHeader(ctx, f)
 	if err != nil {
@@ -609,8 +608,7 @@ func loadInitialELF(ctx context.Context, m *mm.MemoryManager, fs *cpuid.FeatureS
 //
 // It does not return any auxv entries.
 //
-// Preconditions:
-//  * f is an ELF file
+// Preconditions: f is an ELF file.
 func loadInterpreterELF(ctx context.Context, m *mm.MemoryManager, f fsbridge.File, initial loadedELF) (loadedELF, error) {
 	info, err := parseHeader(ctx, f)
 	if err != nil {
@@ -640,8 +638,7 @@ func loadInterpreterELF(ctx context.Context, m *mm.MemoryManager, f fsbridge.Fil
 // If loadELF returns ErrSwitchFile it should be called again with the returned
 // path and argv.
 //
-// Preconditions:
-//  * args.File is an ELF file
+// Preconditions: args.File is an ELF file.
 func loadELF(ctx context.Context, args LoadArgs) (loadedELF, arch.Context, error) {
 	bin, ac, err := loadInitialELF(ctx, args.MemoryManager, args.Features, args.File)
 	if err != nil {
diff --git a/pkg/sentry/loader/loader.go b/pkg/sentry/loader/loader.go
index 8d6802ea33..15c88aa7c1 100644
--- a/pkg/sentry/loader/loader.go
+++ b/pkg/sentry/loader/loader.go
@@ -215,8 +215,8 @@ func loadExecutable(ctx context.Context, args LoadArgs) (loadedELF, arch.Context
 // path and argv.
 //
 // Preconditions:
-//  * The Task MemoryManager is empty.
-//  * Load is called on the Task goroutine.
+// * The Task MemoryManager is empty.
+// * Load is called on the Task goroutine.
 func Load(ctx context.Context, args LoadArgs, extraAuxv []arch.AuxEntry, vdso *VDSO) (abi.OS, arch.Context, string, *syserr.Error) {
 	// Load the executable itself.
 	loaded, ac, file, newArgv, err := loadExecutable(ctx, args)
diff --git a/pkg/sentry/memmap/mapping_set.go b/pkg/sentry/memmap/mapping_set.go
index d609c1ae01..457ed87f84 100644
--- a/pkg/sentry/memmap/mapping_set.go
+++ b/pkg/sentry/memmap/mapping_set.go
@@ -177,7 +177,7 @@ func subsetMapping(wholeRange, subsetRange MappableRange, ms MappingSpace, addr
 // AddMapping adds the given mapping and returns the set of MappableRanges that
 // previously had no mappings.
 //
-// Preconditions: As for Mappable.AddMapping.
+// Preconditions: Same as Mappable.AddMapping.
 func (s *MappingSet) AddMapping(ms MappingSpace, ar usermem.AddrRange, offset uint64, writable bool) []MappableRange {
 	mr := MappableRange{offset, offset + uint64(ar.Length())}
 	var mapped []MappableRange
@@ -204,7 +204,7 @@ func (s *MappingSet) AddMapping(ms MappingSpace, ar usermem.AddrRange, offset ui
 // RemoveMapping removes the given mapping and returns the set of
 // MappableRanges that now have no mappings.
 //
-// Preconditions: As for Mappable.RemoveMapping.
+// Preconditions: Same as Mappable.RemoveMapping.
 func (s *MappingSet) RemoveMapping(ms MappingSpace, ar usermem.AddrRange, offset uint64, writable bool) []MappableRange {
 	mr := MappableRange{offset, offset + uint64(ar.Length())}
 	var unmapped []MappableRange
diff --git a/pkg/sentry/memmap/memmap.go b/pkg/sentry/memmap/memmap.go
index 65d83096fb..a44fa2b95d 100644
--- a/pkg/sentry/memmap/memmap.go
+++ b/pkg/sentry/memmap/memmap.go
@@ -28,9 +28,9 @@ import (
 //
 // See mm/mm.go for Mappable's place in the lock order.
 //
-// Preconditions: For all Mappable methods, usermem.AddrRanges and
-// MappableRanges must be non-empty (Length() != 0), and usermem.Addrs and
-// Mappable offsets must be page-aligned.
+// All Mappable methods have the following preconditions:
+// * usermem.AddrRanges and MappableRanges must be non-empty (Length() != 0).
+// * usermem.Addrs and Mappable offsets must be page-aligned.
 type Mappable interface {
 	// AddMapping notifies the Mappable of a mapping from addresses ar in ms to
 	// offsets [offset, offset+ar.Length()) in this Mappable.
@@ -48,8 +48,10 @@ type Mappable interface {
 	// addresses ar in ms to offsets [offset, offset+ar.Length()) in this
 	// Mappable.
 	//
-	// Preconditions: offset+ar.Length() does not overflow. The removed mapping
-	// must exist. writable must match the corresponding call to AddMapping.
+	// Preconditions:
+	// * offset+ar.Length() does not overflow.
+	// * The removed mapping must exist. writable must match the
+	//   corresponding call to AddMapping.
 	RemoveMapping(ctx context.Context, ms MappingSpace, ar usermem.AddrRange, offset uint64, writable bool)
 
 	// CopyMapping notifies the Mappable of an attempt to copy a mapping in ms
@@ -60,9 +62,10 @@ type Mappable interface {
 	// CopyMapping is only called when a mapping is copied within a given
 	// MappingSpace; it is analogous to Linux's vm_operations_struct::mremap.
 	//
-	// Preconditions: offset+srcAR.Length() and offset+dstAR.Length() do not
-	// overflow. The mapping at srcAR must exist. writable must match the
-	// corresponding call to AddMapping.
+	// Preconditions:
+	// * offset+srcAR.Length() and offset+dstAR.Length() do not overflow.
+	// * The mapping at srcAR must exist. writable must match the
+	//   corresponding call to AddMapping.
 	CopyMapping(ctx context.Context, ms MappingSpace, srcAR, dstAR usermem.AddrRange, offset uint64, writable bool) error
 
 	// Translate returns the Mappable's current mappings for at least the range
@@ -77,11 +80,14 @@ type Mappable interface {
 	// reference is held on all pages in a File that may be the result
 	// of a valid Translation.
 	//
-	// Preconditions: required.Length() > 0. optional.IsSupersetOf(required).
-	// required and optional must be page-aligned. The caller must have
-	// established a mapping for all of the queried offsets via a previous call
-	// to AddMapping. The caller is responsible for ensuring that calls to
-	// Translate synchronize with invalidation.
+	// Preconditions:
+	// * required.Length() > 0.
+	// * optional.IsSupersetOf(required).
+	// * required and optional must be page-aligned.
+	// * The caller must have established a mapping for all of the queried
+	//   offsets via a previous call to AddMapping.
+	// * The caller is responsible for ensuring that calls to Translate
+	//   synchronize with invalidation.
 	//
 	// Postconditions: See CheckTranslateResult.
 	Translate(ctx context.Context, required, optional MappableRange, at usermem.AccessType) ([]Translation, error)
@@ -118,7 +124,7 @@ func (t Translation) FileRange() FileRange {
 // CheckTranslateResult returns an error if (ts, terr) does not satisfy all
 // postconditions for Mappable.Translate(required, optional, at).
 //
-// Preconditions: As for Mappable.Translate.
+// Preconditions: Same as Mappable.Translate.
 func CheckTranslateResult(required, optional MappableRange, at usermem.AccessType, ts []Translation, terr error) error {
 	// Verify that the inputs to Mappable.Translate were valid.
 	if !required.WellFormed() || required.Length() <= 0 {
@@ -214,7 +220,9 @@ type MappingSpace interface {
 	// Invalidate must not take any locks preceding mm.MemoryManager.activeMu
 	// in the lock order.
 	//
-	// Preconditions: ar.Length() != 0. ar must be page-aligned.
+	// Preconditions:
+	// * ar.Length() != 0.
+	// * ar must be page-aligned.
 	Invalidate(ar usermem.AddrRange, opts InvalidateOpts)
 }
 
@@ -375,16 +383,20 @@ type File interface {
 
 	// IncRef increments the reference count on all pages in fr.
 	//
-	// Preconditions: fr.Start and fr.End must be page-aligned. fr.Length() >
-	// 0. At least one reference must be held on all pages in fr. (The File
-	// interface does not provide a way to acquire an initial reference;
-	// implementors may define mechanisms for doing so.)
+	// Preconditions:
+	// * fr.Start and fr.End must be page-aligned.
+	// * fr.Length() > 0.
+	// * At least one reference must be held on all pages in fr. (The File
+	//   interface does not provide a way to acquire an initial reference;
+	//   implementors may define mechanisms for doing so.)
 	IncRef(fr FileRange)
 
 	// DecRef decrements the reference count on all pages in fr.
 	//
-	// Preconditions: fr.Start and fr.End must be page-aligned. fr.Length() >
-	// 0. At least one reference must be held on all pages in fr.
+	// Preconditions:
+	// * fr.Start and fr.End must be page-aligned.
+	// * fr.Length() > 0.
+	// * At least one reference must be held on all pages in fr.
 	DecRef(fr FileRange)
 
 	// MapInternal returns a mapping of the given file offsets in the invoking
@@ -392,8 +404,9 @@ type File interface {
 	//
 	// Note that fr.Start and fr.End need not be page-aligned.
 	//
-	// Preconditions: fr.Length() > 0. At least one reference must be held on
-	// all pages in fr.
+	// Preconditions:
+	// * fr.Length() > 0.
+	// * At least one reference must be held on all pages in fr.
 	//
 	// Postconditions: The returned mapping is valid as long as at least one
 	// reference is held on the mapped pages.
diff --git a/pkg/sentry/mm/address_space.go b/pkg/sentry/mm/address_space.go
index 5c667117cd..a93e76c753 100644
--- a/pkg/sentry/mm/address_space.go
+++ b/pkg/sentry/mm/address_space.go
@@ -166,8 +166,12 @@ func (mm *MemoryManager) Deactivate() {
 // mapASLocked maps addresses in ar into mm.as. If precommit is true, mappings
 // for all addresses in ar should be precommitted.
 //
-// Preconditions: mm.activeMu must be locked. mm.as != nil. ar.Length() != 0.
-// ar must be page-aligned. pseg == mm.pmas.LowerBoundSegment(ar.Start).
+// Preconditions:
+// * mm.activeMu must be locked.
+// * mm.as != nil.
+// * ar.Length() != 0.
+// * ar must be page-aligned.
+// * pseg == mm.pmas.LowerBoundSegment(ar.Start).
 func (mm *MemoryManager) mapASLocked(pseg pmaIterator, ar usermem.AddrRange, precommit bool) error {
 	// By default, map entire pmas at a time, under the assumption that there
 	// is no cost to mapping more of a pma than necessary.
diff --git a/pkg/sentry/mm/io.go b/pkg/sentry/mm/io.go
index fa776f9c65..a8ac480806 100644
--- a/pkg/sentry/mm/io.go
+++ b/pkg/sentry/mm/io.go
@@ -441,7 +441,10 @@ func (mm *MemoryManager) LoadUint32(ctx context.Context, addr usermem.Addr, opts
 // handleASIOFault handles a page fault at address addr for an AddressSpaceIO
 // operation spanning ioar.
 //
-// Preconditions: mm.as != nil. ioar.Length() != 0. ioar.Contains(addr).
+// Preconditions:
+// * mm.as != nil.
+// * ioar.Length() != 0.
+// * ioar.Contains(addr).
 func (mm *MemoryManager) handleASIOFault(ctx context.Context, addr usermem.Addr, ioar usermem.AddrRange, at usermem.AccessType) error {
 	// Try to map all remaining pages in the I/O operation. This RoundUp can't
 	// overflow because otherwise it would have been caught by CheckIORange.
@@ -629,7 +632,9 @@ func (mm *MemoryManager) withVecInternalMappings(ctx context.Context, ars userme
 // at most address end on AddrRange arsit.Head(). It is used in vector I/O paths to
 // truncate usermem.AddrRangeSeq when errors occur.
 //
-// Preconditions: !arsit.IsEmpty(). end <= arsit.Head().End.
+// Preconditions:
+// * !arsit.IsEmpty().
+// * end <= arsit.Head().End.
 func truncatedAddrRangeSeq(ars, arsit usermem.AddrRangeSeq, end usermem.Addr) usermem.AddrRangeSeq {
 	ar := arsit.Head()
 	if end <= ar.Start {
diff --git a/pkg/sentry/mm/pma.go b/pkg/sentry/mm/pma.go
index 930ec895fd..30facebf7f 100644
--- a/pkg/sentry/mm/pma.go
+++ b/pkg/sentry/mm/pma.go
@@ -31,7 +31,9 @@ import (
 // iterator to the pma containing ar.Start. Otherwise it returns a terminal
 // iterator.
 //
-// Preconditions: mm.activeMu must be locked. ar.Length() != 0.
+// Preconditions:
+// * mm.activeMu must be locked.
+// * ar.Length() != 0.
 func (mm *MemoryManager) existingPMAsLocked(ar usermem.AddrRange, at usermem.AccessType, ignorePermissions bool, needInternalMappings bool) pmaIterator {
 	if checkInvariants {
 		if !ar.WellFormed() || ar.Length() <= 0 {
@@ -89,10 +91,13 @@ func (mm *MemoryManager) existingVecPMAsLocked(ars usermem.AddrRangeSeq, at user
 //
 // - An error that is non-nil if pmas exist for only a subset of ar.
 //
-// Preconditions: mm.mappingMu must be locked. mm.activeMu must be locked for
-// writing. ar.Length() != 0. vseg.Range().Contains(ar.Start). vmas must exist
-// for all addresses in ar, and support accesses of type at (i.e. permission
-// checks must have been performed against vmas).
+// Preconditions:
+// * mm.mappingMu must be locked.
+// * mm.activeMu must be locked for writing.
+// * ar.Length() != 0.
+// * vseg.Range().Contains(ar.Start).
+// * vmas must exist for all addresses in ar, and support accesses of type at
+//   (i.e. permission checks must have been performed against vmas).
 func (mm *MemoryManager) getPMAsLocked(ctx context.Context, vseg vmaIterator, ar usermem.AddrRange, at usermem.AccessType) (pmaIterator, pmaGapIterator, error) {
 	if checkInvariants {
 		if !ar.WellFormed() || ar.Length() <= 0 {
@@ -135,9 +140,11 @@ func (mm *MemoryManager) getPMAsLocked(ctx context.Context, vseg vmaIterator, ar
 // exist. If this is not equal to ars, it returns a non-nil error explaining
 // why.
 //
-// Preconditions: mm.mappingMu must be locked. mm.activeMu must be locked for
-// writing. vmas must exist for all addresses in ars, and support accesses of
-// type at (i.e. permission checks must have been performed against vmas).
+// Preconditions:
+// * mm.mappingMu must be locked.
+// * mm.activeMu must be locked for writing.
+// * vmas must exist for all addresses in ars, and support accesses of type at
+//   (i.e. permission checks must have been performed against vmas).
 func (mm *MemoryManager) getVecPMAsLocked(ctx context.Context, ars usermem.AddrRangeSeq, at usermem.AccessType) (usermem.AddrRangeSeq, error) {
 	for arsit := ars; !arsit.IsEmpty(); arsit = arsit.Tail() {
 		ar := arsit.Head()
@@ -518,8 +525,10 @@ func privateAligned(ar usermem.AddrRange) usermem.AddrRange {
 // the memory it maps, isPMACopyOnWriteLocked will take ownership of the memory
 // and update the pma to indicate that it does not require copy-on-write.
 //
-// Preconditions: vseg.Range().IsSupersetOf(pseg.Range()). mm.mappingMu must be
-// locked. mm.activeMu must be locked for writing.
+// Preconditions:
+// * vseg.Range().IsSupersetOf(pseg.Range()).
+// * mm.mappingMu must be locked.
+// * mm.activeMu must be locked for writing.
 func (mm *MemoryManager) isPMACopyOnWriteLocked(vseg vmaIterator, pseg pmaIterator) bool {
 	pma := pseg.ValuePtr()
 	if !pma.needCOW {
@@ -568,8 +577,10 @@ func (mm *MemoryManager) Invalidate(ar usermem.AddrRange, opts memmap.Invalidate
 // invalidateLocked removes pmas and AddressSpace mappings of those pmas for
 // addresses in ar.
 //
-// Preconditions: mm.activeMu must be locked for writing. ar.Length() != 0. ar
-// must be page-aligned.
+// Preconditions:
+// * mm.activeMu must be locked for writing.
+// * ar.Length() != 0.
+// * ar must be page-aligned.
 func (mm *MemoryManager) invalidateLocked(ar usermem.AddrRange, invalidatePrivate, invalidateShared bool) {
 	if checkInvariants {
 		if !ar.WellFormed() || ar.Length() <= 0 || !ar.IsPageAligned() {
@@ -613,7 +624,9 @@ func (mm *MemoryManager) invalidateLocked(ar usermem.AddrRange, invalidatePrivat
 // most I/O. It should only be used in contexts that would use get_user_pages()
 // in the Linux kernel.
 //
-// Preconditions: ar.Length() != 0. ar must be page-aligned.
+// Preconditions:
+// * ar.Length() != 0.
+// * ar must be page-aligned.
 func (mm *MemoryManager) Pin(ctx context.Context, ar usermem.AddrRange, at usermem.AccessType, ignorePermissions bool) ([]PinnedRange, error) {
 	if checkInvariants {
 		if !ar.WellFormed() || ar.Length() <= 0 || !ar.IsPageAligned() {
@@ -693,9 +706,13 @@ func Unpin(prs []PinnedRange) {
 
 // movePMAsLocked moves all pmas in oldAR to newAR.
 //
-// Preconditions: mm.activeMu must be locked for writing. oldAR.Length() != 0.
-// oldAR.Length() <= newAR.Length(). !oldAR.Overlaps(newAR).
-// mm.pmas.IsEmptyRange(newAR). oldAR and newAR must be page-aligned.
+// Preconditions:
+// * mm.activeMu must be locked for writing.
+// * oldAR.Length() != 0.
+// * oldAR.Length() <= newAR.Length().
+// * !oldAR.Overlaps(newAR).
+// * mm.pmas.IsEmptyRange(newAR).
+// * oldAR and newAR must be page-aligned.
 func (mm *MemoryManager) movePMAsLocked(oldAR, newAR usermem.AddrRange) {
 	if checkInvariants {
 		if !oldAR.WellFormed() || oldAR.Length() <= 0 || !oldAR.IsPageAligned() {
@@ -751,9 +768,11 @@ func (mm *MemoryManager) movePMAsLocked(oldAR, newAR usermem.AddrRange) {
 // - An error that is non-nil if internal mappings exist for only a subset of
 // ar.
 //
-// Preconditions: mm.activeMu must be locked for writing.
-// pseg.Range().Contains(ar.Start). pmas must exist for all addresses in ar.
-// ar.Length() != 0.
+// Preconditions:
+// * mm.activeMu must be locked for writing.
+// * pseg.Range().Contains(ar.Start).
+// * pmas must exist for all addresses in ar.
+// * ar.Length() != 0.
 //
 // Postconditions: getPMAInternalMappingsLocked does not invalidate iterators
 // into mm.pmas.
@@ -783,8 +802,9 @@ func (mm *MemoryManager) getPMAInternalMappingsLocked(pseg pmaIterator, ar userm
 // internal mappings exist. If this is not equal to ars, it returns a non-nil
 // error explaining why.
 //
-// Preconditions: mm.activeMu must be locked for writing. pmas must exist for
-// all addresses in ar.
+// Preconditions:
+// * mm.activeMu must be locked for writing.
+// * pmas must exist for all addresses in ar.
 //
 // Postconditions: getVecPMAInternalMappingsLocked does not invalidate iterators
 // into mm.pmas.
@@ -803,9 +823,12 @@ func (mm *MemoryManager) getVecPMAInternalMappingsLocked(ars usermem.AddrRangeSe
 
 // internalMappingsLocked returns internal mappings for addresses in ar.
 //
-// Preconditions: mm.activeMu must be locked. Internal mappings must have been
-// previously established for all addresses in ar. ar.Length() != 0.
-// pseg.Range().Contains(ar.Start).
+// Preconditions:
+// * mm.activeMu must be locked.
+// * Internal mappings must have been previously established for all addresses
+//   in ar.
+// * ar.Length() != 0.
+// * pseg.Range().Contains(ar.Start).
 func (mm *MemoryManager) internalMappingsLocked(pseg pmaIterator, ar usermem.AddrRange) safemem.BlockSeq {
 	if checkInvariants {
 		if !ar.WellFormed() || ar.Length() <= 0 {
@@ -839,8 +862,10 @@ func (mm *MemoryManager) internalMappingsLocked(pseg pmaIterator, ar usermem.Add
 
 // vecInternalMappingsLocked returns internal mappings for addresses in ars.
 //
-// Preconditions: mm.activeMu must be locked. Internal mappings must have been
-// previously established for all addresses in ars.
+// Preconditions:
+// * mm.activeMu must be locked.
+// * Internal mappings must have been previously established for all addresses
+//   in ars.
 func (mm *MemoryManager) vecInternalMappingsLocked(ars usermem.AddrRangeSeq) safemem.BlockSeq {
 	var ims []safemem.Block
 	for ; !ars.IsEmpty(); ars = ars.Tail() {
@@ -969,7 +994,9 @@ func (pmaSetFunctions) Split(ar usermem.AddrRange, p pma, split usermem.Addr) (p
 // findOrSeekPrevUpperBoundPMA returns mm.pmas.UpperBoundSegment(addr), but may do
 // so by scanning linearly backward from pgap.
 //
-// Preconditions: mm.activeMu must be locked. addr <= pgap.Start().
+// Preconditions:
+// * mm.activeMu must be locked.
+// * addr <= pgap.Start().
 func (mm *MemoryManager) findOrSeekPrevUpperBoundPMA(addr usermem.Addr, pgap pmaGapIterator) pmaIterator {
 	if checkInvariants {
 		if !pgap.Ok() {
@@ -1015,7 +1042,9 @@ func (pseg pmaIterator) fileRange() memmap.FileRange {
 	return pseg.fileRangeOf(pseg.Range())
 }
 
-// Preconditions: pseg.Range().IsSupersetOf(ar). ar.Length != 0.
+// Preconditions:
+// * pseg.Range().IsSupersetOf(ar).
+// * ar.Length != 0.
 func (pseg pmaIterator) fileRangeOf(ar usermem.AddrRange) memmap.FileRange {
 	if checkInvariants {
 		if !pseg.Ok() {
diff --git a/pkg/sentry/mm/syscalls.go b/pkg/sentry/mm/syscalls.go
index e74d4e1c12..4c9a575e78 100644
--- a/pkg/sentry/mm/syscalls.go
+++ b/pkg/sentry/mm/syscalls.go
@@ -166,7 +166,9 @@ func (mm *MemoryManager) MMap(ctx context.Context, opts memmap.MMapOpts) (userme
 // populateVMA obtains pmas for addresses in ar in the given vma, and maps them
 // into mm.as if it is active.
 //
-// Preconditions: mm.mappingMu must be locked. vseg.Range().IsSupersetOf(ar).
+// Preconditions:
+// * mm.mappingMu must be locked.
+// * vseg.Range().IsSupersetOf(ar).
 func (mm *MemoryManager) populateVMA(ctx context.Context, vseg vmaIterator, ar usermem.AddrRange, precommit bool) {
 	if !vseg.ValuePtr().effectivePerms.Any() {
 		// Linux doesn't populate inaccessible pages. See
@@ -208,8 +210,9 @@ func (mm *MemoryManager) populateVMA(ctx context.Context, vseg vmaIterator, ar u
 // preferable to populateVMA since it unlocks mm.mappingMu before performing
 // expensive operations that don't require it to be locked.
 //
-// Preconditions: mm.mappingMu must be locked for writing.
-// vseg.Range().IsSupersetOf(ar).
+// Preconditions:
+// * mm.mappingMu must be locked for writing.
+// * vseg.Range().IsSupersetOf(ar).
 //
 // Postconditions: mm.mappingMu will be unlocked.
 func (mm *MemoryManager) populateVMAAndUnlock(ctx context.Context, vseg vmaIterator, ar usermem.AddrRange, precommit bool) {
diff --git a/pkg/sentry/mm/vma.go b/pkg/sentry/mm/vma.go
index c4e1989ed9..f769d8294b 100644
--- a/pkg/sentry/mm/vma.go
+++ b/pkg/sentry/mm/vma.go
@@ -27,8 +27,9 @@ import (
 	"gvisor.dev/gvisor/pkg/usermem"
 )
 
-// Preconditions: mm.mappingMu must be locked for writing. opts must be valid
-// as defined by the checks in MMap.
+// Preconditions:
+// * mm.mappingMu must be locked for writing.
+// * opts must be valid as defined by the checks in MMap.
 func (mm *MemoryManager) createVMALocked(ctx context.Context, opts memmap.MMapOpts) (vmaIterator, usermem.AddrRange, error) {
 	if opts.MaxPerms != opts.MaxPerms.Effective() {
 		panic(fmt.Sprintf("Non-effective MaxPerms %s cannot be enforced", opts.MaxPerms))
@@ -260,8 +261,9 @@ func (mm *MemoryManager) mlockedBytesRangeLocked(ar usermem.AddrRange) uint64 {
 //
 // - An error that is non-nil if vmas exist for only a subset of ar.
 //
-// Preconditions: mm.mappingMu must be locked for reading; it may be
-// temporarily unlocked. ar.Length() != 0.
+// Preconditions:
+// * mm.mappingMu must be locked for reading; it may be temporarily unlocked.
+// * ar.Length() != 0.
 func (mm *MemoryManager) getVMAsLocked(ctx context.Context, ar usermem.AddrRange, at usermem.AccessType, ignorePermissions bool) (vmaIterator, vmaGapIterator, error) {
 	if checkInvariants {
 		if !ar.WellFormed() || ar.Length() <= 0 {
@@ -342,8 +344,10 @@ const guardBytes = 256 * usermem.PageSize
 // unmapLocked unmaps all addresses in ar and returns the resulting gap in
 // mm.vmas.
 //
-// Preconditions: mm.mappingMu must be locked for writing. ar.Length() != 0.
-// ar must be page-aligned.
+// Preconditions:
+// * mm.mappingMu must be locked for writing.
+// * ar.Length() != 0.
+// * ar must be page-aligned.
 func (mm *MemoryManager) unmapLocked(ctx context.Context, ar usermem.AddrRange) vmaGapIterator {
 	if checkInvariants {
 		if !ar.WellFormed() || ar.Length() <= 0 || !ar.IsPageAligned() {
@@ -361,8 +365,10 @@ func (mm *MemoryManager) unmapLocked(ctx context.Context, ar usermem.AddrRange)
 // gap in mm.vmas. It does not remove pmas or AddressSpace mappings; clients
 // must do so before calling removeVMAsLocked.
 //
-// Preconditions: mm.mappingMu must be locked for writing. ar.Length() != 0. ar
-// must be page-aligned.
+// Preconditions:
+// * mm.mappingMu must be locked for writing.
+// * ar.Length() != 0.
+// * ar must be page-aligned.
 func (mm *MemoryManager) removeVMAsLocked(ctx context.Context, ar usermem.AddrRange) vmaGapIterator {
 	if checkInvariants {
 		if !ar.WellFormed() || ar.Length() <= 0 || !ar.IsPageAligned() {
@@ -467,7 +473,9 @@ func (vmaSetFunctions) Split(ar usermem.AddrRange, v vma, split usermem.Addr) (v
 	return v, v2
 }
 
-// Preconditions: vseg.ValuePtr().mappable != nil. vseg.Range().Contains(addr).
+// Preconditions:
+// * vseg.ValuePtr().mappable != nil.
+// * vseg.Range().Contains(addr).
 func (vseg vmaIterator) mappableOffsetAt(addr usermem.Addr) uint64 {
 	if checkInvariants {
 		if !vseg.Ok() {
@@ -491,8 +499,10 @@ func (vseg vmaIterator) mappableRange() memmap.MappableRange {
 	return vseg.mappableRangeOf(vseg.Range())
 }
 
-// Preconditions: vseg.ValuePtr().mappable != nil.
-// vseg.Range().IsSupersetOf(ar). ar.Length() != 0.
+// Preconditions:
+// * vseg.ValuePtr().mappable != nil.
+// * vseg.Range().IsSupersetOf(ar).
+// * ar.Length() != 0.
 func (vseg vmaIterator) mappableRangeOf(ar usermem.AddrRange) memmap.MappableRange {
 	if checkInvariants {
 		if !vseg.Ok() {
@@ -514,8 +524,10 @@ func (vseg vmaIterator) mappableRangeOf(ar usermem.AddrRange) memmap.MappableRan
 	return memmap.MappableRange{vma.off + uint64(ar.Start-vstart), vma.off + uint64(ar.End-vstart)}
 }
 
-// Preconditions: vseg.ValuePtr().mappable != nil.
-// vseg.mappableRange().IsSupersetOf(mr). mr.Length() != 0.
+// Preconditions:
+// * vseg.ValuePtr().mappable != nil.
+// * vseg.mappableRange().IsSupersetOf(mr).
+// * mr.Length() != 0.
 func (vseg vmaIterator) addrRangeOf(mr memmap.MappableRange) usermem.AddrRange {
 	if checkInvariants {
 		if !vseg.Ok() {
@@ -540,7 +552,9 @@ func (vseg vmaIterator) addrRangeOf(mr memmap.MappableRange) usermem.AddrRange {
 // seekNextLowerBound returns mm.vmas.LowerBoundSegment(addr), but does so by
 // scanning linearly forward from vseg.
 //
-// Preconditions: mm.mappingMu must be locked. addr >= vseg.Start().
+// Preconditions:
+// * mm.mappingMu must be locked.
+// * addr >= vseg.Start().
 func (vseg vmaIterator) seekNextLowerBound(addr usermem.Addr) vmaIterator {
 	if checkInvariants {
 		if !vseg.Ok() {
diff --git a/pkg/sentry/pgalloc/pgalloc.go b/pkg/sentry/pgalloc/pgalloc.go
index 46d3be58ca..626d1eaa4d 100644
--- a/pkg/sentry/pgalloc/pgalloc.go
+++ b/pkg/sentry/pgalloc/pgalloc.go
@@ -507,7 +507,9 @@ func findAvailableRange(usage *usageSet, fileSize int64, length, alignment uint6
 // nearest page. If this is shorter than length bytes due to an error returned
 // by r.ReadToBlocks(), it returns that error.
 //
-// Preconditions: length > 0. length must be page-aligned.
+// Preconditions:
+// * length > 0.
+// * length must be page-aligned.
 func (f *MemoryFile) AllocateAndFill(length uint64, kind usage.MemoryKind, r safemem.Reader) (memmap.FileRange, error) {
 	fr, err := f.Allocate(length, kind)
 	if err != nil {
@@ -1167,8 +1169,10 @@ func (f *MemoryFile) startEvictionsLocked() bool {
 	return startedAny
 }
 
-// Preconditions: info == f.evictable[user]. !info.evicting. f.mu must be
-// locked.
+// Preconditions:
+// * info == f.evictable[user].
+// * !info.evicting.
+// * f.mu must be locked.
 func (f *MemoryFile) startEvictionGoroutineLocked(user EvictableMemoryUser, info *evictableMemoryUserInfo) {
 	info.evicting = true
 	f.evictionWG.Add(1)
diff --git a/pkg/sentry/platform/interrupt/interrupt.go b/pkg/sentry/platform/interrupt/interrupt.go
index 57be416472..9dfac3eae7 100644
--- a/pkg/sentry/platform/interrupt/interrupt.go
+++ b/pkg/sentry/platform/interrupt/interrupt.go
@@ -54,8 +54,9 @@ type Forwarder struct {
 // }
 // defer f.Disable()
 //
-// Preconditions: r must not be nil. f must not already be forwarding
-// interrupts to a Receiver.
+// Preconditions:
+// * r must not be nil.
+// * f must not already be forwarding interrupts to a Receiver.
 func (f *Forwarder) Enable(r Receiver) bool {
 	if r == nil {
 		panic("nil Receiver")
diff --git a/pkg/sentry/platform/platform.go b/pkg/sentry/platform/platform.go
index ba031516ad..530e779b0d 100644
--- a/pkg/sentry/platform/platform.go
+++ b/pkg/sentry/platform/platform.go
@@ -245,14 +245,19 @@ type AddressSpace interface {
 	// physical memory) to the mapping. The precommit flag is advisory and
 	// implementations may choose to ignore it.
 	//
-	// Preconditions: addr and fr must be page-aligned. fr.Length() > 0.
-	// at.Any() == true. At least one reference must be held on all pages in
-	// fr, and must continue to be held as long as pages are mapped.
+	// Preconditions:
+	// * addr and fr must be page-aligned.
+	// * fr.Length() > 0.
+	// * at.Any() == true.
+	// * At least one reference must be held on all pages in fr, and must
+	//   continue to be held as long as pages are mapped.
 	MapFile(addr usermem.Addr, f memmap.File, fr memmap.FileRange, at usermem.AccessType, precommit bool) error
 
 	// Unmap unmaps the given range.
 	//
-	// Preconditions: addr is page-aligned. length > 0.
+	// Preconditions:
+	// * addr is page-aligned.
+	// * length > 0.
 	Unmap(addr usermem.Addr, length uint64)
 
 	// Release releases this address space. After releasing, a new AddressSpace
diff --git a/pkg/sentry/vfs/dentry.go b/pkg/sentry/vfs/dentry.go
index bc7ea93ea8..a69a5b2f12 100644
--- a/pkg/sentry/vfs/dentry.go
+++ b/pkg/sentry/vfs/dentry.go
@@ -242,8 +242,9 @@ func (vfs *VirtualFilesystem) InvalidateDentry(ctx context.Context, d *Dentry) {
 // caller must call AbortRenameDentry, CommitRenameReplaceDentry, or
 // CommitRenameExchangeDentry depending on the rename's outcome.
 //
-// Preconditions: If to is not nil, it must be a child Dentry from the same
-// Filesystem. from != to.
+// Preconditions:
+// * If to is not nil, it must be a child Dentry from the same Filesystem.
+// * from != to.
 func (vfs *VirtualFilesystem) PrepareRenameDentry(mntns *MountNamespace, from, to *Dentry) error {
 	vfs.mountMu.Lock()
 	if mntns.mountpoints[from] != 0 {
diff --git a/pkg/sentry/vfs/file_description.go b/pkg/sentry/vfs/file_description.go
index dcafffe57c..d3abe28eed 100644
--- a/pkg/sentry/vfs/file_description.go
+++ b/pkg/sentry/vfs/file_description.go
@@ -371,8 +371,9 @@ type FileDescriptionImpl interface {
 	//
 	// - If opts.Flags specifies unsupported options, PRead returns EOPNOTSUPP.
 	//
-	// Preconditions: The FileDescription was opened for reading.
-	// FileDescriptionOptions.DenyPRead == false.
+	// Preconditions:
+	// * The FileDescription was opened for reading.
+	// * FileDescriptionOptions.DenyPRead == false.
 	PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts ReadOptions) (int64, error)
 
 	// Read is similar to PRead, but does not specify an offset.
@@ -403,8 +404,9 @@ type FileDescriptionImpl interface {
 	// - If opts.Flags specifies unsupported options, PWrite returns
 	// EOPNOTSUPP.
 	//
-	// Preconditions: The FileDescription was opened for writing.
-	// FileDescriptionOptions.DenyPWrite == false.
+	// Preconditions:
+	// * The FileDescription was opened for writing.
+	// * FileDescriptionOptions.DenyPWrite == false.
 	PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts WriteOptions) (int64, error)
 
 	// Write is similar to PWrite, but does not specify an offset, which is
diff --git a/pkg/sentry/vfs/filesystem.go b/pkg/sentry/vfs/filesystem.go
index df3758fd16..2c60cfab28 100644
--- a/pkg/sentry/vfs/filesystem.go
+++ b/pkg/sentry/vfs/filesystem.go
@@ -212,8 +212,9 @@ type FilesystemImpl interface {
 	// ENOENT. Equivalently, if vd represents a file with a link count of 0 not
 	// created by open(O_TMPFILE) without O_EXCL, LinkAt returns ENOENT.
 	//
-	// Preconditions: !rp.Done(). For the final path component in rp,
-	// !rp.ShouldFollowSymlink().
+	// Preconditions:
+	// * !rp.Done().
+	// * For the final path component in rp, !rp.ShouldFollowSymlink().
 	//
 	// Postconditions: If LinkAt returns an error returned by
 	// ResolvingPath.Resolve*(), then !rp.Done().
@@ -231,8 +232,9 @@ type FilesystemImpl interface {
 	// - If the directory in which the new directory would be created has been
 	// removed by RmdirAt or RenameAt, MkdirAt returns ENOENT.
 	//
-	// Preconditions: !rp.Done(). For the final path component in rp,
-	// !rp.ShouldFollowSymlink().
+	// Preconditions:
+	// * !rp.Done().
+	// * For the final path component in rp, !rp.ShouldFollowSymlink().
 	//
 	// Postconditions: If MkdirAt returns an error returned by
 	// ResolvingPath.Resolve*(), then !rp.Done().
@@ -253,8 +255,9 @@ type FilesystemImpl interface {
 	// - If the directory in which the file would be created has been removed
 	// by RmdirAt or RenameAt, MknodAt returns ENOENT.
 	//
-	// Preconditions: !rp.Done(). For the final path component in rp,
-	// !rp.ShouldFollowSymlink().
+	// Preconditions:
+	// * !rp.Done().
+	// * For the final path component in rp, !rp.ShouldFollowSymlink().
 	//
 	// Postconditions: If MknodAt returns an error returned by
 	// ResolvingPath.Resolve*(), then !rp.Done().
@@ -345,11 +348,12 @@ type FilesystemImpl interface {
 	// - If renaming would replace a non-empty directory, RenameAt returns
 	// ENOTEMPTY.
 	//
-	// Preconditions: !rp.Done(). For the final path component in rp,
-	// !rp.ShouldFollowSymlink(). oldParentVD.Dentry() was obtained from a
-	// previous call to
-	// oldParentVD.Mount().Filesystem().Impl().GetParentDentryAt(). oldName is
-	// not "." or "..".
+	// Preconditions:
+	// * !rp.Done().
+	// * For the final path component in rp, !rp.ShouldFollowSymlink().
+	// * oldParentVD.Dentry() was obtained from a previous call to
+	//   oldParentVD.Mount().Filesystem().Impl().GetParentDentryAt().
+	// * oldName is not "." or "..".
 	//
 	// Postconditions: If RenameAt returns an error returned by
 	// ResolvingPath.Resolve*(), then !rp.Done().
@@ -372,8 +376,9 @@ type FilesystemImpl interface {
 	// - If the file at rp exists but is not a directory, RmdirAt returns
 	// ENOTDIR.
 	//
-	// Preconditions: !rp.Done(). For the final path component in rp,
-	// !rp.ShouldFollowSymlink().
+	// Preconditions:
+	// * !rp.Done().
+	// * For the final path component in rp, !rp.ShouldFollowSymlink().
 	//
 	// Postconditions: If RmdirAt returns an error returned by
 	// ResolvingPath.Resolve*(), then !rp.Done().
@@ -410,8 +415,9 @@ type FilesystemImpl interface {
 	// - If the directory in which the symbolic link would be created has been
 	// removed by RmdirAt or RenameAt, SymlinkAt returns ENOENT.
 	//
-	// Preconditions: !rp.Done(). For the final path component in rp,
-	// !rp.ShouldFollowSymlink().
+	// Preconditions:
+	// * !rp.Done().
+	// * For the final path component in rp, !rp.ShouldFollowSymlink().
 	//
 	// Postconditions: If SymlinkAt returns an error returned by
 	// ResolvingPath.Resolve*(), then !rp.Done().
@@ -431,8 +437,9 @@ type FilesystemImpl interface {
 	//
 	// - If the file at rp exists but is a directory, UnlinkAt returns EISDIR.
 	//
-	// Preconditions: !rp.Done(). For the final path component in rp,
-	// !rp.ShouldFollowSymlink().
+	// Preconditions:
+	// * !rp.Done().
+	// * For the final path component in rp, !rp.ShouldFollowSymlink().
 	//
 	// Postconditions: If UnlinkAt returns an error returned by
 	// ResolvingPath.Resolve*(), then !rp.Done().
diff --git a/pkg/sentry/vfs/mount.go b/pkg/sentry/vfs/mount.go
index 67dfba9867..714af69072 100644
--- a/pkg/sentry/vfs/mount.go
+++ b/pkg/sentry/vfs/mount.go
@@ -369,8 +369,9 @@ type umountRecursiveOptions struct {
 //
 // umountRecursiveLocked is analogous to Linux's fs/namespace.c:umount_tree().
 //
-// Preconditions: vfs.mountMu must be locked. vfs.mounts.seq must be in a
-// writer critical section.
+// Preconditions:
+// * vfs.mountMu must be locked.
+// * vfs.mounts.seq must be in a writer critical section.
 func (vfs *VirtualFilesystem) umountRecursiveLocked(mnt *Mount, opts *umountRecursiveOptions, vdsToDecRef []VirtualDentry, mountsToDecRef []*Mount) ([]VirtualDentry, []*Mount) {
 	if !mnt.umounted {
 		mnt.umounted = true
@@ -399,9 +400,11 @@ func (vfs *VirtualFilesystem) umountRecursiveLocked(mnt *Mount, opts *umountRecu
 // connectLocked makes vd the mount parent/point for mnt. It consumes
 // references held by vd.
 //
-// Preconditions: vfs.mountMu must be locked. vfs.mounts.seq must be in a
-// writer critical section. d.mu must be locked. mnt.parent() == nil, i.e. mnt
-// must not already be connected.
+// Preconditions:
+// * vfs.mountMu must be locked.
+// * vfs.mounts.seq must be in a writer critical section.
+// * d.mu must be locked.
+// * mnt.parent() == nil, i.e. mnt must not already be connected.
 func (vfs *VirtualFilesystem) connectLocked(mnt *Mount, vd VirtualDentry, mntns *MountNamespace) {
 	if checkInvariants {
 		if mnt.parent() != nil {
@@ -429,8 +432,10 @@ func (vfs *VirtualFilesystem) connectLocked(mnt *Mount, vd VirtualDentry, mntns
 // disconnectLocked makes vd have no mount parent/point and returns its old
 // mount parent/point with a reference held.
 //
-// Preconditions: vfs.mountMu must be locked. vfs.mounts.seq must be in a
-// writer critical section. mnt.parent() != nil.
+// Preconditions:
+// * vfs.mountMu must be locked.
+// * vfs.mounts.seq must be in a writer critical section.
+// * mnt.parent() != nil.
 func (vfs *VirtualFilesystem) disconnectLocked(mnt *Mount) VirtualDentry {
 	vd := mnt.loadKey()
 	if checkInvariants {
@@ -576,8 +581,9 @@ retryFirst:
 // mnt. It takes a reference on the returned VirtualDentry. If no such mount
 // point exists (i.e. mnt is a root mount), getMountpointAt returns (nil, nil).
 //
-// Preconditions: References are held on mnt and root. vfsroot is not (mnt,
-// mnt.root).
+// Preconditions:
+// * References are held on mnt and root.
+// * vfsroot is not (mnt, mnt.root).
 func (vfs *VirtualFilesystem) getMountpointAt(ctx context.Context, mnt *Mount, vfsroot VirtualDentry) VirtualDentry {
 	// The first mount is special-cased:
 	//
diff --git a/pkg/sentry/vfs/mount_unsafe.go b/pkg/sentry/vfs/mount_unsafe.go
index 70f850ca4c..777d631cb9 100644
--- a/pkg/sentry/vfs/mount_unsafe.go
+++ b/pkg/sentry/vfs/mount_unsafe.go
@@ -217,8 +217,9 @@ func (mt *mountTable) Insert(mount *Mount) {
 
 // insertSeqed inserts the given mount into mt.
 //
-// Preconditions: mt.seq must be in a writer critical section. mt must not
-// already contain a Mount with the same mount point and parent.
+// Preconditions:
+// * mt.seq must be in a writer critical section.
+// * mt must not already contain a Mount with the same mount point and parent.
 func (mt *mountTable) insertSeqed(mount *Mount) {
 	hash := memhash(unsafe.Pointer(&mount.key), uintptr(mt.seed), mountKeyBytes)
 
@@ -269,9 +270,11 @@ func (mt *mountTable) insertSeqed(mount *Mount) {
 	atomic.StorePointer(&mt.slots, newSlots)
 }
 
-// Preconditions: There are no concurrent mutators of the table (slots, cap).
-// If the table is visible to readers, then mt.seq must be in a writer critical
-// section. cap must be a power of 2.
+// Preconditions:
+// * There are no concurrent mutators of the table (slots, cap).
+// * If the table is visible to readers, then mt.seq must be in a writer
+//   critical section.
+// * cap must be a power of 2.
 func mtInsertLocked(slots unsafe.Pointer, cap uintptr, value unsafe.Pointer, hash uintptr) {
 	mask := cap - 1
 	off := (hash & mask) * mountSlotBytes
@@ -313,8 +316,9 @@ func (mt *mountTable) Remove(mount *Mount) {
 
 // removeSeqed removes the given mount from mt.
 //
-// Preconditions: mt.seq must be in a writer critical section. mt must contain
-// mount.
+// Preconditions:
+// * mt.seq must be in a writer critical section.
+// * mt must contain mount.
 func (mt *mountTable) removeSeqed(mount *Mount) {
 	hash := memhash(unsafe.Pointer(&mount.key), uintptr(mt.seed), mountKeyBytes)
 	tcap := uintptr(1) << (mt.size & mtSizeOrderMask)
diff --git a/pkg/syncevent/broadcaster.go b/pkg/syncevent/broadcaster.go
index 4bff59e7d7..dabf08895d 100644
--- a/pkg/syncevent/broadcaster.go
+++ b/pkg/syncevent/broadcaster.go
@@ -111,7 +111,9 @@ func (b *Broadcaster) SubscribeEvents(r *Receiver, filter Set) SubscriptionID {
 	return id
 }
 
-// Preconditions: table must not be full. len(table) is a power of 2.
+// Preconditions:
+// * table must not be full.
+// * len(table) is a power of 2.
 func broadcasterTableInsert(table []broadcasterSlot, id SubscriptionID, r *Receiver, filter Set) {
 	entry := broadcasterSlot{
 		receiver: r,
diff --git a/pkg/syncevent/source.go b/pkg/syncevent/source.go
index ddffb171af..d3d0f34c5d 100644
--- a/pkg/syncevent/source.go
+++ b/pkg/syncevent/source.go
@@ -19,9 +19,11 @@ type Source interface {
 	// SubscribeEvents causes the Source to notify the given Receiver of the
 	// given subset of events.
 	//
-	// Preconditions: r != nil. The ReceiverCallback for r must not take locks
-	// that are ordered prior to the Source; for example, it cannot call any
-	// Source methods.
+	// Preconditions:
+	// * r != nil.
+	// * The ReceiverCallback for r must not take locks that are ordered
+	//   prior to the Source; for example, it cannot call any Source
+	//   methods.
 	SubscribeEvents(r *Receiver, filter Set) SubscriptionID
 
 	// UnsubscribeEvents causes the Source to stop notifying the Receiver
diff --git a/pkg/tcpip/stack/conntrack.go b/pkg/tcpip/stack/conntrack.go
index 7dd344b4f3..836682ea06 100644
--- a/pkg/tcpip/stack/conntrack.go
+++ b/pkg/tcpip/stack/conntrack.go
@@ -572,7 +572,9 @@ func (ct *ConnTrack) reapUnused(start int, prevInterval time.Duration) (int, tim
 // reapTupleLocked tries to remove tuple and its reply from the table. It
 // returns whether the tuple's connection has timed out.
 //
-// Preconditions: ct.mu is locked for reading and bucket is locked.
+// Preconditions:
+// * ct.mu is locked for reading.
+// * bucket is locked.
 func (ct *ConnTrack) reapTupleLocked(tuple *tuple, bucket int, now time.Time) bool {
 	if !tuple.conn.timedOut(now) {
 		return false
diff --git a/pkg/tcpip/stack/iptables.go b/pkg/tcpip/stack/iptables.go
index c37da814f9..41ef4236bc 100644
--- a/pkg/tcpip/stack/iptables.go
+++ b/pkg/tcpip/stack/iptables.go
@@ -315,8 +315,8 @@ func (it *IPTables) startReaper(interval time.Duration) {
 // should not go forward.
 //
 // Preconditions:
-// - pkt is a IPv4 packet of at least length header.IPv4MinimumSize.
-// - pkt.NetworkHeader is not nil.
+// * pkt is a IPv4 packet of at least length header.IPv4MinimumSize.
+// * pkt.NetworkHeader is not nil.
 //
 // NOTE: unlike the Check API the returned map contains packets that should be
 // dropped.
@@ -341,8 +341,8 @@ func (it *IPTables) CheckPackets(hook Hook, pkts PacketBufferList, gso *GSO, r *
 }
 
 // Preconditions:
-// - pkt is a IPv4 packet of at least length header.IPv4MinimumSize.
-// - pkt.NetworkHeader is not nil.
+// * pkt is a IPv4 packet of at least length header.IPv4MinimumSize.
+// * pkt.NetworkHeader is not nil.
 func (it *IPTables) checkChain(hook Hook, pkt *PacketBuffer, table Table, ruleIdx int, gso *GSO, r *Route, address tcpip.Address, nicName string) chainVerdict {
 	// Start from ruleIdx and walk the list of rules until a rule gives us
 	// a verdict.
@@ -388,8 +388,8 @@ func (it *IPTables) checkChain(hook Hook, pkt *PacketBuffer, table Table, ruleId
 }
 
 // Preconditions:
-// - pkt is a IPv4 packet of at least length header.IPv4MinimumSize.
-// - pkt.NetworkHeader is not nil.
+// * pkt is a IPv4 packet of at least length header.IPv4MinimumSize.
+// * pkt.NetworkHeader is not nil.
 func (it *IPTables) checkRule(hook Hook, pkt *PacketBuffer, table Table, ruleIdx int, gso *GSO, r *Route, address tcpip.Address, nicName string) (RuleVerdict, int) {
 	rule := table.Rules[ruleIdx]
 
diff --git a/pkg/unet/unet.go b/pkg/unet/unet.go
index d843f19cfa..c976d72303 100644
--- a/pkg/unet/unet.go
+++ b/pkg/unet/unet.go
@@ -522,7 +522,7 @@ func (s *ServerSocket) Listen() error {
 // This is always blocking.
 //
 // Preconditions:
-//  * ServerSocket is listening (Listen called).
+// * ServerSocket is listening (Listen called).
 func (s *ServerSocket) Accept() (*Socket, error) {
 	fd, ok := s.socket.enterFD()
 	if !ok {
diff --git a/pkg/usermem/addr_range_seq_unsafe.go b/pkg/usermem/addr_range_seq_unsafe.go
index c09337c15d..495896ded1 100644
--- a/pkg/usermem/addr_range_seq_unsafe.go
+++ b/pkg/usermem/addr_range_seq_unsafe.go
@@ -81,8 +81,10 @@ func AddrRangeSeqFromSlice(slice []AddrRange) AddrRangeSeq {
 	return addrRangeSeqFromSliceLimited(slice, limit)
 }
 
-// Preconditions: The combined length of all AddrRanges in slice <= limit.
-// limit >= 0. If len(slice) != 0, then limit > 0.
+// Preconditions:
+// * The combined length of all AddrRanges in slice <= limit.
+// * limit >= 0.
+// * If len(slice) != 0, then limit > 0.
 func addrRangeSeqFromSliceLimited(slice []AddrRange, limit int64) AddrRangeSeq {
 	switch len(slice) {
 	case 0:
diff --git a/pkg/usermem/usermem.go b/pkg/usermem/usermem.go
index cd6a0ea6b7..27279b409e 100644
--- a/pkg/usermem/usermem.go
+++ b/pkg/usermem/usermem.go
@@ -54,8 +54,10 @@ type IO interface {
 	// of bytes zeroed. If the number of bytes zeroed is < toZero, it returns a
 	// non-nil error explaining why.
 	//
-	// Preconditions: The caller must not hold mm.MemoryManager.mappingMu or
-	// any following locks in the lock order. toZero >= 0.
+	// Preconditions:
+	// * The caller must not hold mm.MemoryManager.mappingMu or any
+	//   following locks in the lock order.
+	// * toZero >= 0.
 	ZeroOut(ctx context.Context, addr Addr, toZero int64, opts IOOpts) (int64, error)
 
 	// CopyOutFrom copies ars.NumBytes() bytes from src to the memory mapped at
@@ -66,9 +68,11 @@ type IO interface {
 	//
 	// CopyOutFrom calls src.ReadToBlocks at most once.
 	//
-	// Preconditions: The caller must not hold mm.MemoryManager.mappingMu or
-	// any following locks in the lock order. src.ReadToBlocks must not block
-	// on mm.MemoryManager.activeMu or any preceding locks in the lock order.
+	// Preconditions:
+	// * The caller must not hold mm.MemoryManager.mappingMu or any
+	//   following locks in the lock order.
+	// * src.ReadToBlocks must not block on mm.MemoryManager.activeMu or
+	//   any preceding locks in the lock order.
 	CopyOutFrom(ctx context.Context, ars AddrRangeSeq, src safemem.Reader, opts IOOpts) (int64, error)
 
 	// CopyInTo copies ars.NumBytes() bytes from the memory mapped at ars to
@@ -78,10 +82,11 @@ type IO interface {
 	//
 	// CopyInTo calls dst.WriteFromBlocks at most once.
 	//
-	// Preconditions: The caller must not hold mm.MemoryManager.mappingMu or
-	// any following locks in the lock order. dst.WriteFromBlocks must not
-	// block on mm.MemoryManager.activeMu or any preceding locks in the lock
-	// order.
+	// Preconditions:
+	// * The caller must not hold mm.MemoryManager.mappingMu or any
+	//   following locks in the lock order.
+	// * dst.WriteFromBlocks must not block on mm.MemoryManager.activeMu or
+	//   any preceding locks in the lock order.
 	CopyInTo(ctx context.Context, ars AddrRangeSeq, dst safemem.Writer, opts IOOpts) (int64, error)
 
 	// TODO(jamieliu): The requirement that CopyOutFrom/CopyInTo call src/dst
@@ -93,25 +98,28 @@ type IO interface {
 	// SwapUint32 atomically sets the uint32 value at addr to new and
 	// returns the previous value.
 	//
-	// Preconditions: The caller must not hold mm.MemoryManager.mappingMu or
-	// any following locks in the lock order. addr must be aligned to a 4-byte
-	// boundary.
+	// Preconditions:
+	// * The caller must not hold mm.MemoryManager.mappingMu or any
+	//   following locks in the lock order.
+	// * addr must be aligned to a 4-byte boundary.
 	SwapUint32(ctx context.Context, addr Addr, new uint32, opts IOOpts) (uint32, error)
 
 	// CompareAndSwapUint32 atomically compares the uint32 value at addr to
 	// old; if they are equal, the value in memory is replaced by new. In
 	// either case, the previous value stored in memory is returned.
 	//
-	// Preconditions: The caller must not hold mm.MemoryManager.mappingMu or
-	// any following locks in the lock order. addr must be aligned to a 4-byte
-	// boundary.
+	// Preconditions:
+	// * The caller must not hold mm.MemoryManager.mappingMu or any
+	//   following locks in the lock order.
+	// * addr must be aligned to a 4-byte boundary.
 	CompareAndSwapUint32(ctx context.Context, addr Addr, old, new uint32, opts IOOpts) (uint32, error)
 
 	// LoadUint32 atomically loads the uint32 value at addr and returns it.
 	//
-	// Preconditions: The caller must not hold mm.MemoryManager.mappingMu or
-	// any following locks in the lock order. addr must be aligned to a 4-byte
-	// boundary.
+	// Preconditions:
+	// * The caller must not hold mm.MemoryManager.mappingMu or any
+	//   following locks in the lock order.
+	// * addr must be aligned to a 4-byte boundary.
 	LoadUint32(ctx context.Context, addr Addr, opts IOOpts) (uint32, error)
 }
 
@@ -183,7 +191,7 @@ func (rw *IOReadWriter) Write(src []byte) (int, error) {
 // CopyObjectOut must use reflection to encode src; performance-sensitive
 // clients should do encoding manually and use uio.CopyOut directly.
 //
-// Preconditions: As for IO.CopyOut.
+// Preconditions: Same as IO.CopyOut.
 func CopyObjectOut(ctx context.Context, uio IO, addr Addr, src interface{}, opts IOOpts) (int, error) {
 	w := &IOReadWriter{
 		Ctx:  ctx,
@@ -205,7 +213,7 @@ func CopyObjectOut(ctx context.Context, uio IO, addr Addr, src interface{}, opts
 // CopyObjectIn must use reflection to decode dst; performance-sensitive
 // clients should use uio.CopyIn directly and do decoding manually.
 //
-// Preconditions: As for IO.CopyIn.
+// Preconditions: Same as IO.CopyIn.
 func CopyObjectIn(ctx context.Context, uio IO, addr Addr, dst interface{}, opts IOOpts) (int, error) {
 	r := &IOReadWriter{
 		Ctx:  ctx,
@@ -233,7 +241,8 @@ const (
 // would exceed maxlen, CopyStringIn returns the string truncated to maxlen and
 // ENAMETOOLONG.
 //
-// Preconditions: As for IO.CopyFromUser. maxlen >= 0.
+// Preconditions: Same as IO.CopyFromUser, plus:
+// * maxlen >= 0.
 func CopyStringIn(ctx context.Context, uio IO, addr Addr, maxlen int, opts IOOpts) (string, error) {
 	initLen := maxlen
 	if initLen > copyStringMaxInitBufLen {
@@ -287,7 +296,7 @@ func CopyStringIn(ctx context.Context, uio IO, addr Addr, maxlen int, opts IOOpt
 // less. CopyOutVec returns the number of bytes copied; if this is less than
 // the maximum, it returns a non-nil error explaining why.
 //
-// Preconditions: As for IO.CopyOut.
+// Preconditions: Same as IO.CopyOut.
 func CopyOutVec(ctx context.Context, uio IO, ars AddrRangeSeq, src []byte, opts IOOpts) (int, error) {
 	var done int
 	for !ars.IsEmpty() && done < len(src) {
@@ -311,7 +320,7 @@ func CopyOutVec(ctx context.Context, uio IO, ars AddrRangeSeq, src []byte, opts
 // less. CopyInVec returns the number of bytes copied; if this is less than the
 // maximum, it returns a non-nil error explaining why.
 //
-// Preconditions: As for IO.CopyIn.
+// Preconditions: Same as IO.CopyIn.
 func CopyInVec(ctx context.Context, uio IO, ars AddrRangeSeq, dst []byte, opts IOOpts) (int, error) {
 	var done int
 	for !ars.IsEmpty() && done < len(dst) {
@@ -335,7 +344,7 @@ func CopyInVec(ctx context.Context, uio IO, ars AddrRangeSeq, dst []byte, opts I
 // ZeroOutVec returns the number of bytes written; if this is less than the
 // maximum, it returns a non-nil error explaining why.
 //
-// Preconditions: As for IO.ZeroOut.
+// Preconditions: Same as IO.ZeroOut.
 func ZeroOutVec(ctx context.Context, uio IO, ars AddrRangeSeq, toZero int64, opts IOOpts) (int64, error) {
 	var done int64
 	for !ars.IsEmpty() && done < toZero {
@@ -388,7 +397,7 @@ func isASCIIWhitespace(b byte) bool {
 //
 // - CopyInt32StringsInVec returns EINVAL if ars.NumBytes() == 0.
 //
-// Preconditions: As for CopyInVec.
+// Preconditions: Same as CopyInVec.
 func CopyInt32StringsInVec(ctx context.Context, uio IO, ars AddrRangeSeq, dsts []int32, opts IOOpts) (int64, error) {
 	if len(dsts) == 0 {
 		return 0, nil
@@ -481,28 +490,28 @@ func (s IOSequence) NumBytes() int64 {
 
 // DropFirst returns a copy of s with s.Addrs.DropFirst(n).
 //
-// Preconditions: As for AddrRangeSeq.DropFirst.
+// Preconditions: Same as AddrRangeSeq.DropFirst.
 func (s IOSequence) DropFirst(n int) IOSequence {
 	return IOSequence{s.IO, s.Addrs.DropFirst(n), s.Opts}
 }
 
 // DropFirst64 returns a copy of s with s.Addrs.DropFirst64(n).
 //
-// Preconditions: As for AddrRangeSeq.DropFirst64.
+// Preconditions: Same as AddrRangeSeq.DropFirst64.
 func (s IOSequence) DropFirst64(n int64) IOSequence {
 	return IOSequence{s.IO, s.Addrs.DropFirst64(n), s.Opts}
 }
 
 // TakeFirst returns a copy of s with s.Addrs.TakeFirst(n).
 //
-// Preconditions: As for AddrRangeSeq.TakeFirst.
+// Preconditions: Same as AddrRangeSeq.TakeFirst.
 func (s IOSequence) TakeFirst(n int) IOSequence {
 	return IOSequence{s.IO, s.Addrs.TakeFirst(n), s.Opts}
 }
 
 // TakeFirst64 returns a copy of s with s.Addrs.TakeFirst64(n).
 //
-// Preconditions: As for AddrRangeSeq.TakeFirst64.
+// Preconditions: Same as AddrRangeSeq.TakeFirst64.
 func (s IOSequence) TakeFirst64(n int64) IOSequence {
 	return IOSequence{s.IO, s.Addrs.TakeFirst64(n), s.Opts}
 }
@@ -512,7 +521,7 @@ func (s IOSequence) TakeFirst64(n int64) IOSequence {
 // As with CopyOutVec, if s.NumBytes() < len(src), the copy will be truncated
 // to s.NumBytes(), and a nil error will be returned.
 //
-// Preconditions: As for CopyOutVec.
+// Preconditions: Same as CopyOutVec.
 func (s IOSequence) CopyOut(ctx context.Context, src []byte) (int, error) {
 	return CopyOutVec(ctx, s.IO, s.Addrs, src, s.Opts)
 }
@@ -522,7 +531,7 @@ func (s IOSequence) CopyOut(ctx context.Context, src []byte) (int, error) {
 // As with CopyInVec, if s.NumBytes() < len(dst), the copy will be truncated to
 // s.NumBytes(), and a nil error will be returned.
 //
-// Preconditions: As for CopyInVec.
+// Preconditions: Same as CopyInVec.
 func (s IOSequence) CopyIn(ctx context.Context, dst []byte) (int, error) {
 	return CopyInVec(ctx, s.IO, s.Addrs, dst, s.Opts)
 }
@@ -532,21 +541,21 @@ func (s IOSequence) CopyIn(ctx context.Context, dst []byte) (int, error) {
 // As with ZeroOutVec, if s.NumBytes() < toZero, the write will be truncated
 // to s.NumBytes(), and a nil error will be returned.
 //
-// Preconditions: As for ZeroOutVec.
+// Preconditions: Same as ZeroOutVec.
 func (s IOSequence) ZeroOut(ctx context.Context, toZero int64) (int64, error) {
 	return ZeroOutVec(ctx, s.IO, s.Addrs, toZero, s.Opts)
 }
 
 // CopyOutFrom invokes s.CopyOutFrom over s.Addrs.
 //
-// Preconditions: As for IO.CopyOutFrom.
+// Preconditions: Same as IO.CopyOutFrom.
 func (s IOSequence) CopyOutFrom(ctx context.Context, src safemem.Reader) (int64, error) {
 	return s.IO.CopyOutFrom(ctx, s.Addrs, src, s.Opts)
 }
 
 // CopyInTo invokes s.CopyInTo over s.Addrs.
 //
-// Preconditions: As for IO.CopyInTo.
+// Preconditions: Same as IO.CopyInTo.
 func (s IOSequence) CopyInTo(ctx context.Context, dst safemem.Writer) (int64, error) {
 	return s.IO.CopyInTo(ctx, s.Addrs, dst, s.Opts)
 }

From 76e5213c113a0eefa7341122dc06e4a4191800e8 Mon Sep 17 00:00:00 2001
From: Dean Deng <deandeng@google.com>
Date: Thu, 20 Aug 2020 14:10:46 -0700
Subject: [PATCH 042/211] Add reference count checking to the fsimpl/host
 package.

Includes a minor refactor for inode construction.

Updates #1486.

PiperOrigin-RevId: 327694933
---
 pkg/sentry/fsimpl/host/BUILD     | 25 +++++++++
 pkg/sentry/fsimpl/host/host.go   | 94 ++++++++++++++++++--------------
 pkg/sentry/fsimpl/host/socket.go | 20 +++----
 3 files changed, 88 insertions(+), 51 deletions(-)

diff --git a/pkg/sentry/fsimpl/host/BUILD b/pkg/sentry/fsimpl/host/BUILD
index bd701bbc7a..090ae08042 100644
--- a/pkg/sentry/fsimpl/host/BUILD
+++ b/pkg/sentry/fsimpl/host/BUILD
@@ -1,12 +1,37 @@
 load("//tools:defs.bzl", "go_library")
+load("//tools/go_generics:defs.bzl", "go_template_instance")
 
 licenses(["notice"])
 
+go_template_instance(
+    name = "inode_refs",
+    out = "inode_refs.go",
+    package = "host",
+    prefix = "inode",
+    template = "//pkg/refs_vfs2:refs_template",
+    types = {
+        "T": "inode",
+    },
+)
+
+go_template_instance(
+    name = "connected_endpoint_refs",
+    out = "connected_endpoint_refs.go",
+    package = "host",
+    prefix = "ConnectedEndpoint",
+    template = "//pkg/refs_vfs2:refs_template",
+    types = {
+        "T": "ConnectedEndpoint",
+    },
+)
+
 go_library(
     name = "host",
     srcs = [
+        "connected_endpoint_refs.go",
         "control.go",
         "host.go",
+        "inode_refs.go",
         "ioctl_unsafe.go",
         "mmap.go",
         "socket.go",
diff --git a/pkg/sentry/fsimpl/host/host.go b/pkg/sentry/fsimpl/host/host.go
index 56869f59a1..2d3821f333 100644
--- a/pkg/sentry/fsimpl/host/host.go
+++ b/pkg/sentry/fsimpl/host/host.go
@@ -27,7 +27,6 @@ import (
 	"gvisor.dev/gvisor/pkg/fdnotifier"
 	"gvisor.dev/gvisor/pkg/fspath"
 	"gvisor.dev/gvisor/pkg/log"
-	"gvisor.dev/gvisor/pkg/refs"
 	fslock "gvisor.dev/gvisor/pkg/sentry/fs/lock"
 	"gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
 	"gvisor.dev/gvisor/pkg/sentry/hostfd"
@@ -41,6 +40,44 @@ import (
 	"gvisor.dev/gvisor/pkg/waiter"
 )
 
+func newInode(fs *filesystem, hostFD int, fileType linux.FileMode, isTTY bool) (*inode, error) {
+	// Determine if hostFD is seekable. If not, this syscall will return ESPIPE
+	// (see fs/read_write.c:llseek), e.g. for pipes, sockets, and some character
+	// devices.
+	_, err := unix.Seek(hostFD, 0, linux.SEEK_CUR)
+	seekable := err != syserror.ESPIPE
+
+	i := &inode{
+		hostFD:     hostFD,
+		ino:        fs.NextIno(),
+		isTTY:      isTTY,
+		wouldBlock: wouldBlock(uint32(fileType)),
+		seekable:   seekable,
+		// NOTE(b/38213152): Technically, some obscure char devices can be memory
+		// mapped, but we only allow regular files.
+		canMap: fileType == linux.S_IFREG,
+	}
+	i.pf.inode = i
+	i.refs.EnableLeakCheck()
+
+	// Non-seekable files can't be memory mapped, assert this.
+	if !i.seekable && i.canMap {
+		panic("files that can return EWOULDBLOCK (sockets, pipes, etc.) cannot be memory mapped")
+	}
+
+	// If the hostFD would block, we must set it to non-blocking and handle
+	// blocking behavior in the sentry.
+	if i.wouldBlock {
+		if err := syscall.SetNonblock(i.hostFD, true); err != nil {
+			return nil, err
+		}
+		if err := fdnotifier.AddFD(int32(i.hostFD), &i.queue); err != nil {
+			return nil, err
+		}
+	}
+	return i, nil
+}
+
 // NewFDOptions contains options to NewFD.
 type NewFDOptions struct {
 	// If IsTTY is true, the file descriptor is a TTY.
@@ -76,44 +113,11 @@ func NewFD(ctx context.Context, mnt *vfs.Mount, hostFD int, opts *NewFDOptions)
 		flags = uint32(flagsInt)
 	}
 
-	fileMode := linux.FileMode(s.Mode)
-	fileType := fileMode.FileType()
-
-	// Determine if hostFD is seekable. If not, this syscall will return ESPIPE
-	// (see fs/read_write.c:llseek), e.g. for pipes, sockets, and some character
-	// devices.
-	_, err := unix.Seek(hostFD, 0, linux.SEEK_CUR)
-	seekable := err != syserror.ESPIPE
-
-	i := &inode{
-		hostFD:     hostFD,
-		ino:        fs.NextIno(),
-		isTTY:      opts.IsTTY,
-		wouldBlock: wouldBlock(uint32(fileType)),
-		seekable:   seekable,
-		// NOTE(b/38213152): Technically, some obscure char devices can be memory
-		// mapped, but we only allow regular files.
-		canMap: fileType == linux.S_IFREG,
-	}
-	i.pf.inode = i
-
-	// Non-seekable files can't be memory mapped, assert this.
-	if !i.seekable && i.canMap {
-		panic("files that can return EWOULDBLOCK (sockets, pipes, etc.) cannot be memory mapped")
-	}
-
-	// If the hostFD would block, we must set it to non-blocking and handle
-	// blocking behavior in the sentry.
-	if i.wouldBlock {
-		if err := syscall.SetNonblock(i.hostFD, true); err != nil {
-			return nil, err
-		}
-		if err := fdnotifier.AddFD(int32(i.hostFD), &i.queue); err != nil {
-			return nil, err
-		}
-	}
-
 	d := &kernfs.Dentry{}
+	i, err := newInode(fs, hostFD, linux.FileMode(s.Mode).FileType(), opts.IsTTY)
+	if err != nil {
+		return nil, err
+	}
 	d.Init(i)
 
 	// i.open will take a reference on d.
@@ -188,7 +192,7 @@ type inode struct {
 	locks vfs.FileLocks
 
 	// When the reference count reaches zero, the host fd is closed.
-	refs.AtomicRefCount
+	refs inodeRefs
 
 	// hostFD contains the host fd that this file was originally created from,
 	// which must be available at time of restore.
@@ -430,9 +434,19 @@ func (i *inode) SetStat(ctx context.Context, fs *vfs.Filesystem, creds *auth.Cre
 	return nil
 }
 
+// IncRef implements kernfs.Inode.
+func (i *inode) IncRef() {
+	i.refs.IncRef()
+}
+
+// TryIncRef implements kernfs.Inode.
+func (i *inode) TryIncRef() bool {
+	return i.refs.TryIncRef()
+}
+
 // DecRef implements kernfs.Inode.
 func (i *inode) DecRef(ctx context.Context) {
-	i.AtomicRefCount.DecRefWithDestructor(ctx, func(context.Context) {
+	i.refs.DecRef(func() {
 		if i.wouldBlock {
 			fdnotifier.RemoveFD(int32(i.hostFD))
 		}
diff --git a/pkg/sentry/fsimpl/host/socket.go b/pkg/sentry/fsimpl/host/socket.go
index 4979dd0a97..131145b85b 100644
--- a/pkg/sentry/fsimpl/host/socket.go
+++ b/pkg/sentry/fsimpl/host/socket.go
@@ -22,7 +22,6 @@ import (
 	"gvisor.dev/gvisor/pkg/context"
 	"gvisor.dev/gvisor/pkg/fdnotifier"
 	"gvisor.dev/gvisor/pkg/log"
-	"gvisor.dev/gvisor/pkg/refs"
 	"gvisor.dev/gvisor/pkg/sentry/socket/control"
 	"gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
 	"gvisor.dev/gvisor/pkg/sentry/uniqueid"
@@ -59,8 +58,7 @@ func newEndpoint(ctx context.Context, hostFD int, queue *waiter.Queue) (transpor
 //
 // +stateify savable
 type ConnectedEndpoint struct {
-	// ref keeps track of references to a ConnectedEndpoint.
-	ref refs.AtomicRefCount
+	ConnectedEndpointRefs
 
 	// mu protects fd below.
 	mu sync.RWMutex `state:"nosave"`
@@ -132,9 +130,9 @@ func NewConnectedEndpoint(ctx context.Context, hostFD int, addr string, saveable
 		return nil, err
 	}
 
-	// AtomicRefCounters start off with a single reference. We need two.
-	e.ref.IncRef()
-	e.ref.EnableLeakCheck("host.ConnectedEndpoint")
+	// ConnectedEndpointRefs start off with a single reference. We need two.
+	e.IncRef()
+	e.EnableLeakCheck()
 	return &e, nil
 }
 
@@ -318,7 +316,7 @@ func (c *ConnectedEndpoint) destroyLocked() {
 // Release implements transport.ConnectedEndpoint.Release and
 // transport.Receiver.Release.
 func (c *ConnectedEndpoint) Release(ctx context.Context) {
-	c.ref.DecRefWithDestructor(ctx, func(context.Context) {
+	c.DecRef(func() {
 		c.mu.Lock()
 		c.destroyLocked()
 		c.mu.Unlock()
@@ -348,7 +346,7 @@ func (e *SCMConnectedEndpoint) Init() error {
 // Release implements transport.ConnectedEndpoint.Release and
 // transport.Receiver.Release.
 func (e *SCMConnectedEndpoint) Release(ctx context.Context) {
-	e.ref.DecRefWithDestructor(ctx, func(context.Context) {
+	e.DecRef(func() {
 		e.mu.Lock()
 		if err := syscall.Close(e.fd); err != nil {
 			log.Warningf("Failed to close host fd %d: %v", err)
@@ -378,8 +376,8 @@ func NewSCMEndpoint(ctx context.Context, hostFD int, queue *waiter.Queue, addr s
 		return nil, err
 	}
 
-	// AtomicRefCounters start off with a single reference. We need two.
-	e.ref.IncRef()
-	e.ref.EnableLeakCheck("host.SCMConnectedEndpoint")
+	// ConnectedEndpointRefs start off with a single reference. We need two.
+	e.IncRef()
+	e.EnableLeakCheck()
 	return &e, nil
 }

From b17c7094f706ce92eba44e72e2cede8814b29607 Mon Sep 17 00:00:00 2001
From: Ting-Yu Wang <anivia@google.com>
Date: Thu, 20 Aug 2020 15:38:06 -0700
Subject: [PATCH 043/211] stateify: Fix afterLoad not being called for root
 object

PiperOrigin-RevId: 327711264
---
 pkg/state/decode.go          | 6 ++++--
 pkg/state/tests/load_test.go | 8 ++++++++
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/pkg/state/decode.go b/pkg/state/decode.go
index c9971cdf64..89467ca8ef 100644
--- a/pkg/state/decode.go
+++ b/pkg/state/decode.go
@@ -584,10 +584,12 @@ func (ds *decodeState) Load(obj reflect.Value) {
 	})
 
 	// Create the root object.
-	ds.objectsByID = append(ds.objectsByID, &objectDecodeState{
+	rootOds := &objectDecodeState{
 		id:  1,
 		obj: obj,
-	})
+	}
+	ds.objectsByID = append(ds.objectsByID, rootOds)
+	ds.pending.PushBack(rootOds)
 
 	// Read the number of objects.
 	lastID, object, err := ReadHeader(ds.r)
diff --git a/pkg/state/tests/load_test.go b/pkg/state/tests/load_test.go
index 1e97942961..3c73ac391b 100644
--- a/pkg/state/tests/load_test.go
+++ b/pkg/state/tests/load_test.go
@@ -20,6 +20,14 @@ import (
 
 func TestLoadHooks(t *testing.T) {
 	runTestCases(t, false, "load-hooks", []interface{}{
+		// Root object being a struct.
+		afterLoadStruct{v: 1},
+		valueLoadStruct{v: 1},
+		genericContainer{v: &afterLoadStruct{v: 1}},
+		genericContainer{v: &valueLoadStruct{v: 1}},
+		sliceContainer{v: []interface{}{&afterLoadStruct{v: 1}}},
+		sliceContainer{v: []interface{}{&valueLoadStruct{v: 1}}},
+		// Root object being a pointer.
 		&afterLoadStruct{v: 1},
 		&valueLoadStruct{v: 1},
 		&genericContainer{v: &afterLoadStruct{v: 1}},

From 4c758df8075628e10a58c0117a17dc5c41d57be9 Mon Sep 17 00:00:00 2001
From: Fabricio Voznika <fvoznika@google.com>
Date: Thu, 20 Aug 2020 16:16:46 -0700
Subject: [PATCH 044/211] Enable strace+debug in syscall tests

This is done to ease troubleshooting when tests fail. runsc
logs are not stored when tests passe, so this will only
affect failing tests and should not increase log storage
too badly.

PiperOrigin-RevId: 327717551
---
 test/perf/BUILD      | 22 ++++++++++++++++++++++
 test/runner/defs.bzl |  6 +++++-
 test/syscalls/BUILD  |  2 ++
 3 files changed, 29 insertions(+), 1 deletion(-)

diff --git a/test/perf/BUILD b/test/perf/BUILD
index 471d8c2ab1..b763be50ef 100644
--- a/test/perf/BUILD
+++ b/test/perf/BUILD
@@ -3,33 +3,40 @@ load("//test/runner:defs.bzl", "syscall_test")
 package(licenses = ["notice"])
 
 syscall_test(
+    debug = False,
     test = "//test/perf/linux:clock_getres_benchmark",
 )
 
 syscall_test(
+    debug = False,
     test = "//test/perf/linux:clock_gettime_benchmark",
 )
 
 syscall_test(
+    debug = False,
     test = "//test/perf/linux:death_benchmark",
 )
 
 syscall_test(
+    debug = False,
     test = "//test/perf/linux:epoll_benchmark",
 )
 
 syscall_test(
     size = "large",
+    debug = False,
     test = "//test/perf/linux:fork_benchmark",
 )
 
 syscall_test(
     size = "large",
+    debug = False,
     test = "//test/perf/linux:futex_benchmark",
 )
 
 syscall_test(
     size = "enormous",
+    debug = False,
     shard_count = 10,
     tags = ["nogotsan"],
     test = "//test/perf/linux:getdents_benchmark",
@@ -37,81 +44,96 @@ syscall_test(
 
 syscall_test(
     size = "large",
+    debug = False,
     test = "//test/perf/linux:getpid_benchmark",
 )
 
 syscall_test(
     size = "enormous",
+    debug = False,
     tags = ["nogotsan"],
     test = "//test/perf/linux:gettid_benchmark",
 )
 
 syscall_test(
     size = "large",
+    debug = False,
     test = "//test/perf/linux:mapping_benchmark",
 )
 
 syscall_test(
     size = "large",
     add_overlay = True,
+    debug = False,
     test = "//test/perf/linux:open_benchmark",
 )
 
 syscall_test(
+    debug = False,
     test = "//test/perf/linux:pipe_benchmark",
 )
 
 syscall_test(
     size = "large",
     add_overlay = True,
+    debug = False,
     test = "//test/perf/linux:randread_benchmark",
 )
 
 syscall_test(
     size = "large",
     add_overlay = True,
+    debug = False,
     test = "//test/perf/linux:read_benchmark",
 )
 
 syscall_test(
     size = "large",
+    debug = False,
     test = "//test/perf/linux:sched_yield_benchmark",
 )
 
 syscall_test(
     size = "large",
+    debug = False,
     test = "//test/perf/linux:send_recv_benchmark",
 )
 
 syscall_test(
     size = "large",
     add_overlay = True,
+    debug = False,
     test = "//test/perf/linux:seqwrite_benchmark",
 )
 
 syscall_test(
     size = "enormous",
+    debug = False,
     test = "//test/perf/linux:signal_benchmark",
 )
 
 syscall_test(
+    debug = False,
     test = "//test/perf/linux:sleep_benchmark",
 )
 
 syscall_test(
     size = "large",
     add_overlay = True,
+    debug = False,
     test = "//test/perf/linux:stat_benchmark",
 )
 
 syscall_test(
     size = "enormous",
     add_overlay = True,
+    debug = False,
     test = "//test/perf/linux:unlink_benchmark",
 )
 
 syscall_test(
     size = "large",
     add_overlay = True,
+    debug = False,
     test = "//test/perf/linux:write_benchmark",
 )
diff --git a/test/runner/defs.bzl b/test/runner/defs.bzl
index 2d64934b09..032ebd04e3 100644
--- a/test/runner/defs.bzl
+++ b/test/runner/defs.bzl
@@ -62,7 +62,8 @@ def _syscall_test(
         overlay = False,
         add_uds_tree = False,
         vfs2 = False,
-        fuse = False):
+        fuse = False,
+        debug = True):
     # Prepend "runsc" to non-native platform names.
     full_platform = platform if platform == "native" else "runsc_" + platform
 
@@ -111,6 +112,8 @@ def _syscall_test(
         "--add-uds-tree=" + str(add_uds_tree),
         "--vfs2=" + str(vfs2),
         "--fuse=" + str(fuse),
+        "--strace=" + str(debug),
+        "--debug=" + str(debug),
     ]
 
     # Call the rule above.
@@ -134,6 +137,7 @@ def syscall_test(
         add_hostinet = False,
         vfs2 = True,
         fuse = False,
+        debug = True,
         tags = None):
     """syscall_test is a macro that will create targets for all platforms.
 
diff --git a/test/syscalls/BUILD b/test/syscalls/BUILD
index d11412c55c..9897946eda 100644
--- a/test/syscalls/BUILD
+++ b/test/syscalls/BUILD
@@ -64,6 +64,8 @@ syscall_test(
 
 syscall_test(
     size = "large",
+    # Produce too many logs in the debug mode.
+    debug = False,
     shard_count = 50,
     # Takes too long for TSAN. Since this is kind of a stress test that doesn't
     # involve much concurrency, TSAN's usefulness here is limited anyway.

From fc68f90fc066473951521b6bdd3adfd6c7dfc61f Mon Sep 17 00:00:00 2001
From: Ayush Ranjan <ayushranjan@google.com>
Date: Thu, 20 Aug 2020 16:25:57 -0700
Subject: [PATCH 045/211] [vfs] Create recursive dir creation util.

Refactored the recursive dir creation util in runsc/boot/vfs.go to be more
flexible.

PiperOrigin-RevId: 327719100
---
 pkg/sentry/vfs/vfs.go | 33 +++++++++++++++++++++++++++++++++
 runsc/boot/vfs.go     | 32 ++------------------------------
 2 files changed, 35 insertions(+), 30 deletions(-)

diff --git a/pkg/sentry/vfs/vfs.go b/pkg/sentry/vfs/vfs.go
index 8a79e13257..ec27562d6c 100644
--- a/pkg/sentry/vfs/vfs.go
+++ b/pkg/sentry/vfs/vfs.go
@@ -36,6 +36,7 @@ package vfs
 
 import (
 	"fmt"
+	"path"
 
 	"gvisor.dev/gvisor/pkg/abi/linux"
 	"gvisor.dev/gvisor/pkg/context"
@@ -782,6 +783,38 @@ func (vfs *VirtualFilesystem) SyncAllFilesystems(ctx context.Context) error {
 	return retErr
 }
 
+// MkdirAllAt recursively creates non-existent directories on the given path
+// (including the last component).
+func (vfs *VirtualFilesystem) MkdirAllAt(ctx context.Context, currentPath string, root VirtualDentry, creds *auth.Credentials, mkdirOpts *MkdirOptions) error {
+	pop := &PathOperation{
+		Root:  root,
+		Start: root,
+		Path:  fspath.Parse(currentPath),
+	}
+	stat, err := vfs.StatAt(ctx, creds, pop, &StatOptions{Mask: linux.STATX_TYPE})
+	switch err {
+	case nil:
+		if stat.Mask&linux.STATX_TYPE == 0 || stat.Mode&linux.FileTypeMask != linux.ModeDirectory {
+			return syserror.ENOTDIR
+		}
+		// Directory already exists.
+		return nil
+	case syserror.ENOENT:
+		// Expected, we will create the dir.
+	default:
+		return fmt.Errorf("stat failed for %q during directory creation: %w", currentPath, err)
+	}
+
+	// Recurse to ensure parent is created and then create the final directory.
+	if err := vfs.MkdirAllAt(ctx, path.Dir(currentPath), root, creds, mkdirOpts); err != nil {
+		return err
+	}
+	if err := vfs.MkdirAt(ctx, creds, pop, mkdirOpts); err != nil {
+		return fmt.Errorf("failed to create directory %q: %w", currentPath, err)
+	}
+	return nil
+}
+
 // A VirtualDentry represents a node in a VFS tree, by combining a Dentry
 // (which represents a node in a Filesystem's tree) and a Mount (which
 // represents the Filesystem's position in a VFS mount tree).
diff --git a/runsc/boot/vfs.go b/runsc/boot/vfs.go
index 3da7a64f04..f27a6ff6bd 100644
--- a/runsc/boot/vfs.go
+++ b/runsc/boot/vfs.go
@@ -16,7 +16,6 @@ package boot
 
 import (
 	"fmt"
-	"path"
 	"sort"
 	"strings"
 
@@ -274,7 +273,7 @@ func (c *containerMounter) mountSubmountVFS2(ctx context.Context, conf *config.C
 		return nil
 	}
 
-	if err := c.makeSyntheticMount(ctx, submount.Destination, root, creds); err != nil {
+	if err := c.k.VFS().MkdirAllAt(ctx, submount.Destination, root, creds, &vfs.MkdirOptions{Mode: 0777, ForSyntheticMountpoint: true}); err != nil {
 		return err
 	}
 	if err := c.k.VFS().MountAt(ctx, creds, "", target, fsName, opts); err != nil {
@@ -348,33 +347,6 @@ func (c *containerMounter) getMountNameAndOptionsVFS2(conf *config.Config, m *mo
 	return fsName, opts, nil
 }
 
-func (c *containerMounter) makeSyntheticMount(ctx context.Context, currentPath string, root vfs.VirtualDentry, creds *auth.Credentials) error {
-	target := &vfs.PathOperation{
-		Root:  root,
-		Start: root,
-		Path:  fspath.Parse(currentPath),
-	}
-	_, err := c.k.VFS().StatAt(ctx, creds, target, &vfs.StatOptions{})
-	if err == nil {
-		log.Debugf("Mount point %q already exists", currentPath)
-		return nil
-	}
-	if err != syserror.ENOENT {
-		return fmt.Errorf("stat failed for %q during mount point creation: %w", currentPath, err)
-	}
-
-	// Recurse to ensure parent is created and then create the mount point.
-	if err := c.makeSyntheticMount(ctx, path.Dir(currentPath), root, creds); err != nil {
-		return err
-	}
-	log.Debugf("Creating dir %q for mount point", currentPath)
-	mkdirOpts := &vfs.MkdirOptions{Mode: 0777, ForSyntheticMountpoint: true}
-	if err := c.k.VFS().MkdirAt(ctx, creds, target, mkdirOpts); err != nil {
-		return fmt.Errorf("failed to create directory %q for mount: %w", currentPath, err)
-	}
-	return nil
-}
-
 // mountTmpVFS2 mounts an internal tmpfs at '/tmp' if it's safe to do so.
 // Technically we don't have to mount tmpfs at /tmp, as we could just rely on
 // the host /tmp, but this is a nice optimization, and fixes some apps that call
@@ -503,7 +475,7 @@ func (c *containerMounter) mountSharedSubmountVFS2(ctx context.Context, conf *co
 
 	root := mns.Root()
 	defer root.DecRef(ctx)
-	if err := c.makeSyntheticMount(ctx, mount.Destination, root, creds); err != nil {
+	if err := c.k.VFS().MkdirAllAt(ctx, mount.Destination, root, creds, &vfs.MkdirOptions{Mode: 0777, ForSyntheticMountpoint: true}); err != nil {
 		return err
 	}
 

From 1c994ac2e7e5be11c3e773f04da253d955bb6ed1 Mon Sep 17 00:00:00 2001
From: Nicolas Lacasse <nlacasse@google.com>
Date: Fri, 21 Aug 2020 10:21:17 -0700
Subject: [PATCH 046/211] Skip socket stress tests on KVM platform.

They time out.

PiperOrigin-RevId: 327830892
---
 test/syscalls/linux/socket_generic_stress.cc | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/test/syscalls/linux/socket_generic_stress.cc b/test/syscalls/linux/socket_generic_stress.cc
index 19239e9e94..6cd67123dc 100644
--- a/test/syscalls/linux/socket_generic_stress.cc
+++ b/test/syscalls/linux/socket_generic_stress.cc
@@ -30,6 +30,9 @@ namespace testing {
 using ConnectStressTest = SocketPairTest;
 
 TEST_P(ConnectStressTest, Reset65kTimes) {
+  // TODO(b/165912341): These are too slow on KVM platform with nested virt.
+  SKIP_IF(GvisorPlatform() == Platform::kKVM);
+
   for (int i = 0; i < 1 << 16; ++i) {
     auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
 
@@ -68,6 +71,9 @@ INSTANTIATE_TEST_SUITE_P(
 using PersistentListenerConnectStressTest = SocketPairTest;
 
 TEST_P(PersistentListenerConnectStressTest, 65kTimesShutdownCloseFirst) {
+  // TODO(b/165912341): These are too slow on KVM platform with nested virt.
+  SKIP_IF(GvisorPlatform() == Platform::kKVM);
+
   for (int i = 0; i < 1 << 16; ++i) {
     auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
     ASSERT_THAT(shutdown(sockets->first_fd(), SHUT_RDWR), SyscallSucceeds());
@@ -87,6 +93,9 @@ TEST_P(PersistentListenerConnectStressTest, 65kTimesShutdownCloseFirst) {
 }
 
 TEST_P(PersistentListenerConnectStressTest, 65kTimesShutdownCloseSecond) {
+  // TODO(b/165912341): These are too slow on KVM platform with nested virt.
+  SKIP_IF(GvisorPlatform() == Platform::kKVM);
+
   for (int i = 0; i < 1 << 16; ++i) {
     auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
     ASSERT_THAT(shutdown(sockets->second_fd(), SHUT_RDWR), SyscallSucceeds());
@@ -106,6 +115,9 @@ TEST_P(PersistentListenerConnectStressTest, 65kTimesShutdownCloseSecond) {
 }
 
 TEST_P(PersistentListenerConnectStressTest, 65kTimesClose) {
+  // TODO(b/165912341): These are too slow on KVM platform with nested virt.
+  SKIP_IF(GvisorPlatform() == Platform::kKVM);
+
   for (int i = 0; i < 1 << 16; ++i) {
     auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
   }

From a687a27e25a13128f7f3a914585a8cf8dc816e8f Mon Sep 17 00:00:00 2001
From: Zach Koopmans <zkoopmans@google.com>
Date: Fri, 21 Aug 2020 11:37:12 -0700
Subject: [PATCH 047/211] Skip ElfInterpreterStaticTest for 5.X kernels.

gVisor emulates 4.6 kernel versions, and test doesn't work on 5.0 versions
(observed on our Ubuntu18.04 image). Skip it.

PiperOrigin-RevId: 327845037
---
 test/syscalls/linux/exec_binary.cc | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/test/syscalls/linux/exec_binary.cc b/test/syscalls/linux/exec_binary.cc
index 18d2f22c15..3797fd4c89 100644
--- a/test/syscalls/linux/exec_binary.cc
+++ b/test/syscalls/linux/exec_binary.cc
@@ -1042,6 +1042,13 @@ class ElfInterpreterStaticTest
 
 // Statically linked ELF with a statically linked ELF interpreter.
 TEST_P(ElfInterpreterStaticTest, Test) {
+  // TODO(gvisor.dev/issue/3721): Test has been observed to segfault on 5.X
+  // kernels.
+  if (!IsRunningOnGvisor()) {
+    auto version = ASSERT_NO_ERRNO_AND_VALUE(GetKernelVersion());
+    SKIP_IF(version.major > 4);
+  }
+
   const std::vector<char> segment_suffix = std::get<0>(GetParam());
   const int expected_errno = std::get<1>(GetParam());
 

From 124b95efc2799b09a8a5b47f0bfa387eaace8cf5 Mon Sep 17 00:00:00 2001
From: Nicolas Lacasse <nlacasse@google.com>
Date: Fri, 21 Aug 2020 12:08:25 -0700
Subject: [PATCH 048/211] Fix parent directory creation in CreateDeviceFile.

It was not properly creating recursive directories. Added tests for this case.

Updates #1196

PiperOrigin-RevId: 327850811
---
 pkg/sentry/fsimpl/devtmpfs/BUILD            |   1 +
 pkg/sentry/fsimpl/devtmpfs/devtmpfs.go      |  13 +-
 pkg/sentry/fsimpl/devtmpfs/devtmpfs_test.go | 189 +++++++++++++++-----
 3 files changed, 155 insertions(+), 48 deletions(-)

diff --git a/pkg/sentry/fsimpl/devtmpfs/BUILD b/pkg/sentry/fsimpl/devtmpfs/BUILD
index aa0c2ad8cb..01bbee5adc 100644
--- a/pkg/sentry/fsimpl/devtmpfs/BUILD
+++ b/pkg/sentry/fsimpl/devtmpfs/BUILD
@@ -24,6 +24,7 @@ go_test(
     library = ":devtmpfs",
     deps = [
         "//pkg/abi/linux",
+        "//pkg/context",
         "//pkg/fspath",
         "//pkg/sentry/contexttest",
         "//pkg/sentry/fsimpl/tmpfs",
diff --git a/pkg/sentry/fsimpl/devtmpfs/devtmpfs.go b/pkg/sentry/fsimpl/devtmpfs/devtmpfs.go
index 2ed5fa8a98..52f44f66db 100644
--- a/pkg/sentry/fsimpl/devtmpfs/devtmpfs.go
+++ b/pkg/sentry/fsimpl/devtmpfs/devtmpfs.go
@@ -18,6 +18,7 @@ package devtmpfs
 
 import (
 	"fmt"
+	"path"
 
 	"gvisor.dev/gvisor/pkg/abi/linux"
 	"gvisor.dev/gvisor/pkg/context"
@@ -150,13 +151,11 @@ func (a *Accessor) CreateDeviceFile(ctx context.Context, pathname string, kind v
 
 	// Create any parent directories. See
 	// devtmpfs.c:handle_create()=>path_create().
-	for it := fspath.Parse(pathname).Begin; it.NextOk(); it = it.Next() {
-		pop := a.pathOperationAt(it.String())
-		if err := a.vfsObj.MkdirAt(actx, a.creds, pop, &vfs.MkdirOptions{
-			Mode: 0755,
-		}); err != nil {
-			return fmt.Errorf("failed to create directory %q: %v", it.String(), err)
-		}
+	parent := path.Dir(pathname)
+	if err := a.vfsObj.MkdirAllAt(ctx, parent, a.root, a.creds, &vfs.MkdirOptions{
+		Mode: 0755,
+	}); err != nil {
+		return fmt.Errorf("failed to create device parent directory %q: %v", parent, err)
 	}
 
 	// NOTE: Linux's devtmpfs refuses to automatically delete files it didn't
diff --git a/pkg/sentry/fsimpl/devtmpfs/devtmpfs_test.go b/pkg/sentry/fsimpl/devtmpfs/devtmpfs_test.go
index 747867cca1..6b56c5e71d 100644
--- a/pkg/sentry/fsimpl/devtmpfs/devtmpfs_test.go
+++ b/pkg/sentry/fsimpl/devtmpfs/devtmpfs_test.go
@@ -15,9 +15,11 @@
 package devtmpfs
 
 import (
+	"path"
 	"testing"
 
 	"gvisor.dev/gvisor/pkg/abi/linux"
+	"gvisor.dev/gvisor/pkg/context"
 	"gvisor.dev/gvisor/pkg/fspath"
 	"gvisor.dev/gvisor/pkg/sentry/contexttest"
 	"gvisor.dev/gvisor/pkg/sentry/fsimpl/tmpfs"
@@ -25,10 +27,13 @@ import (
 	"gvisor.dev/gvisor/pkg/sentry/vfs"
 )
 
-func TestDevtmpfs(t *testing.T) {
+const devPath = "/dev"
+
+func setupDevtmpfs(t *testing.T) (context.Context, *auth.Credentials, *vfs.VirtualFilesystem, vfs.VirtualDentry, func()) {
+	t.Helper()
+
 	ctx := contexttest.Context(t)
 	creds := auth.CredentialsFromContext(ctx)
-
 	vfsObj := &vfs.VirtualFilesystem{}
 	if err := vfsObj.Init(ctx); err != nil {
 		t.Fatalf("VFS init: %v", err)
@@ -43,14 +48,11 @@ func TestDevtmpfs(t *testing.T) {
 	})
 
 	// Create a test mount namespace with devtmpfs mounted at "/dev".
-	const devPath = "/dev"
 	mntns, err := vfsObj.NewMountNamespace(ctx, creds, "tmpfs" /* source */, "tmpfs" /* fsTypeName */, &vfs.GetFilesystemOptions{})
 	if err != nil {
 		t.Fatalf("failed to create tmpfs root mount: %v", err)
 	}
-	defer mntns.DecRef(ctx)
 	root := mntns.Root()
-	defer root.DecRef(ctx)
 	devpop := vfs.PathOperation{
 		Root:  root,
 		Start: root,
@@ -65,6 +67,16 @@ func TestDevtmpfs(t *testing.T) {
 		t.Fatalf("failed to mount devtmpfs: %v", err)
 	}
 
+	return ctx, creds, vfsObj, root, func() {
+		root.DecRef(ctx)
+		mntns.DecRef(ctx)
+	}
+}
+
+func TestUserspaceInit(t *testing.T) {
+	ctx, creds, vfsObj, root, cleanup := setupDevtmpfs(t)
+	defer cleanup()
+
 	a, err := NewAccessor(ctx, vfsObj, creds, "devtmpfs")
 	if err != nil {
 		t.Fatalf("failed to create devtmpfs.Accessor: %v", err)
@@ -75,48 +87,143 @@ func TestDevtmpfs(t *testing.T) {
 	if err := a.UserspaceInit(ctx); err != nil {
 		t.Fatalf("failed to userspace-initialize devtmpfs: %v", err)
 	}
+
 	// Created files should be visible in the test mount namespace.
-	abspath := devPath + "/fd"
-	target, err := vfsObj.ReadlinkAt(ctx, creds, &vfs.PathOperation{
-		Root:  root,
-		Start: root,
-		Path:  fspath.Parse(abspath),
-	})
-	if want := "/proc/self/fd"; err != nil || target != want {
-		t.Fatalf("readlink(%q): got (%q, %v), wanted (%q, nil)", abspath, target, err, want)
+	links := []struct {
+		source string
+		target string
+	}{
+		{
+			source: "fd",
+			target: "/proc/self/fd",
+		},
+		{
+			source: "stdin",
+			target: "/proc/self/fd/0",
+		},
+		{
+			source: "stdout",
+			target: "/proc/self/fd/1",
+		},
+		{
+			source: "stderr",
+			target: "/proc/self/fd/2",
+		},
+		{
+			source: "ptmx",
+			target: "pts/ptmx",
+		},
 	}
 
-	// Create a dummy device special file using a devtmpfs.Accessor.
-	const (
-		pathInDev = "dummy"
-		kind      = vfs.CharDevice
-		major     = 12
-		minor     = 34
-		perms     = 0600
-		wantMode  = linux.S_IFCHR | perms
-	)
-	if err := a.CreateDeviceFile(ctx, pathInDev, kind, major, minor, perms); err != nil {
-		t.Fatalf("failed to create device file: %v", err)
+	for _, link := range links {
+		abspath := path.Join(devPath, link.source)
+		if gotTarget, err := vfsObj.ReadlinkAt(ctx, creds, &vfs.PathOperation{
+			Root:  root,
+			Start: root,
+			Path:  fspath.Parse(abspath),
+		}); err != nil || gotTarget != link.target {
+			t.Errorf("readlink(%q): got (%q, %v), wanted (%q, nil)", abspath, gotTarget, err, link.target)
+		}
 	}
-	// The device special file should be visible in the test mount namespace.
-	abspath = devPath + "/" + pathInDev
-	stat, err := vfsObj.StatAt(ctx, creds, &vfs.PathOperation{
-		Root:  root,
-		Start: root,
-		Path:  fspath.Parse(abspath),
-	}, &vfs.StatOptions{
-		Mask: linux.STATX_TYPE | linux.STATX_MODE,
-	})
-	if err != nil {
-		t.Fatalf("failed to stat device file at %q: %v", abspath, err)
+
+	dirs := []string{"shm", "pts"}
+	for _, dir := range dirs {
+		abspath := path.Join(devPath, dir)
+		statx, err := vfsObj.StatAt(ctx, creds, &vfs.PathOperation{
+			Root:  root,
+			Start: root,
+			Path:  fspath.Parse(abspath),
+		}, &vfs.StatOptions{
+			Mask: linux.STATX_MODE,
+		})
+		if err != nil {
+			t.Errorf("stat(%q): got error %v ", abspath, err)
+			continue
+		}
+		if want := uint16(0755) | linux.S_IFDIR; statx.Mode != want {
+			t.Errorf("stat(%q): got mode %x, want %x", abspath, statx.Mode, want)
+		}
 	}
-	if stat.Mode != wantMode {
-		t.Errorf("device file mode: got %v, wanted %v", stat.Mode, wantMode)
+}
+
+func TestCreateDeviceFile(t *testing.T) {
+	ctx, creds, vfsObj, root, cleanup := setupDevtmpfs(t)
+	defer cleanup()
+
+	a, err := NewAccessor(ctx, vfsObj, creds, "devtmpfs")
+	if err != nil {
+		t.Fatalf("failed to create devtmpfs.Accessor: %v", err)
 	}
-	if stat.RdevMajor != major {
-		t.Errorf("major device number: got %v, wanted %v", stat.RdevMajor, major)
+	defer a.Release(ctx)
+
+	devFiles := []struct {
+		path  string
+		kind  vfs.DeviceKind
+		major uint32
+		minor uint32
+		perms uint16
+	}{
+		{
+			path:  "dummy",
+			kind:  vfs.CharDevice,
+			major: 12,
+			minor: 34,
+			perms: 0600,
+		},
+		{
+			path:  "foo/bar",
+			kind:  vfs.BlockDevice,
+			major: 13,
+			minor: 35,
+			perms: 0660,
+		},
+		{
+			path:  "foo/baz",
+			kind:  vfs.CharDevice,
+			major: 12,
+			minor: 40,
+			perms: 0666,
+		},
+		{
+			path:  "a/b/c/d/e",
+			kind:  vfs.BlockDevice,
+			major: 12,
+			minor: 34,
+			perms: 0600,
+		},
 	}
-	if stat.RdevMinor != minor {
-		t.Errorf("minor device number: got %v, wanted %v", stat.RdevMinor, minor)
+
+	for _, f := range devFiles {
+		if err := a.CreateDeviceFile(ctx, f.path, f.kind, f.major, f.minor, f.perms); err != nil {
+			t.Fatalf("failed to create device file: %v", err)
+		}
+		// The device special file should be visible in the test mount namespace.
+		abspath := path.Join(devPath, f.path)
+		stat, err := vfsObj.StatAt(ctx, creds, &vfs.PathOperation{
+			Root:  root,
+			Start: root,
+			Path:  fspath.Parse(abspath),
+		}, &vfs.StatOptions{
+			Mask: linux.STATX_TYPE | linux.STATX_MODE,
+		})
+		if err != nil {
+			t.Fatalf("failed to stat device file at %q: %v", abspath, err)
+		}
+		if stat.RdevMajor != f.major {
+			t.Errorf("major device number: got %v, wanted %v", stat.RdevMajor, f.major)
+		}
+		if stat.RdevMinor != f.minor {
+			t.Errorf("minor device number: got %v, wanted %v", stat.RdevMinor, f.minor)
+		}
+		wantMode := f.perms
+		switch f.kind {
+		case vfs.CharDevice:
+			wantMode |= linux.S_IFCHR
+		case vfs.BlockDevice:
+			wantMode |= linux.S_IFBLK
+		}
+		if stat.Mode != wantMode {
+			t.Errorf("device file mode: got %v, wanted %v", stat.Mode, wantMode)
+		}
 	}
 }

From b3f271479b0de5876e46e0afba473f6888b6ce50 Mon Sep 17 00:00:00 2001
From: Zach Koopmans <zkoopmans@google.com>
Date: Fri, 21 Aug 2020 14:19:29 -0700
Subject: [PATCH 049/211] Fix Inotify tests in open source.

The order of unlink events (dir event/file event) is undefined,
so make tests accept both orderings.

PiperOrigin-RevId: 327873316
---
 test/syscalls/linux/inotify.cc | 56 ++++++++++++++++++++++------------
 1 file changed, 36 insertions(+), 20 deletions(-)

diff --git a/test/syscalls/linux/inotify.cc b/test/syscalls/linux/inotify.cc
index 5cb325a9e8..a5c421118f 100644
--- a/test/syscalls/linux/inotify.cc
+++ b/test/syscalls/linux/inotify.cc
@@ -1371,9 +1371,10 @@ TEST(Inotify, HardlinksReuseSameWatch) {
   // that now and drain the resulting events.
   file1_fd.reset();
   events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
-  ASSERT_THAT(events,
-              Are({Event(IN_CLOSE_WRITE, root_wd, Basename(file1.path())),
-                   Event(IN_CLOSE_WRITE, file1_wd)}));
+  ASSERT_THAT(
+      events,
+      AreUnordered({Event(IN_CLOSE_WRITE, root_wd, Basename(file1.path())),
+                    Event(IN_CLOSE_WRITE, file1_wd)}));
 
   // Try removing the link and let's see what events show up. Note that after
   // this, we still have a link to the file so the watch shouldn't be
@@ -1381,8 +1382,9 @@ TEST(Inotify, HardlinksReuseSameWatch) {
   const std::string link1_path = link1.reset();
 
   events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
-  ASSERT_THAT(events, Are({Event(IN_ATTRIB, link1_wd),
-                           Event(IN_DELETE, root_wd, Basename(link1_path))}));
+  ASSERT_THAT(events,
+              AreUnordered({Event(IN_ATTRIB, link1_wd),
+                            Event(IN_DELETE, root_wd, Basename(link1_path))}));
 
   // Now remove the other link. Since this is the last link to the file, the
   // watch should be automatically removed.
@@ -1934,14 +1936,22 @@ TEST(Inotify, IncludeUnlinkedFile_NoRandomSave) {
   ASSERT_THAT(write(fd.get(), &val, sizeof(val)), SyscallSucceeds());
   std::vector<Event> events =
       ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get()));
-  EXPECT_THAT(events, Are({
-                          Event(IN_ATTRIB, file_wd),
-                          Event(IN_DELETE, dir_wd, Basename(file.path())),
-                          Event(IN_ACCESS, dir_wd, Basename(file.path())),
-                          Event(IN_ACCESS, file_wd),
-                          Event(IN_MODIFY, dir_wd, Basename(file.path())),
-                          Event(IN_MODIFY, file_wd),
-                      }));
+  EXPECT_THAT(events, AnyOf(Are({
+                                Event(IN_ATTRIB, file_wd),
+                                Event(IN_DELETE, dir_wd, Basename(file.path())),
+                                Event(IN_ACCESS, dir_wd, Basename(file.path())),
+                                Event(IN_ACCESS, file_wd),
+                                Event(IN_MODIFY, dir_wd, Basename(file.path())),
+                                Event(IN_MODIFY, file_wd),
+                            }),
+                            Are({
+                                Event(IN_DELETE, dir_wd, Basename(file.path())),
+                                Event(IN_ATTRIB, file_wd),
+                                Event(IN_ACCESS, dir_wd, Basename(file.path())),
+                                Event(IN_ACCESS, file_wd),
+                                Event(IN_MODIFY, dir_wd, Basename(file.path())),
+                                Event(IN_MODIFY, file_wd),
+                            })));
 
   fd.reset();
   events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get()));
@@ -1984,7 +1994,7 @@ TEST(Inotify, ExcludeUnlink_NoRandomSave) {
   ASSERT_THAT(read(fd.get(), &val, sizeof(val)), SyscallSucceeds());
   std::vector<Event> events =
       ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get()));
-  EXPECT_THAT(events, Are({
+  EXPECT_THAT(events, AreUnordered({
                           Event(IN_ATTRIB, file_wd),
                           Event(IN_DELETE, dir_wd, Basename(file.path())),
                       }));
@@ -2127,12 +2137,18 @@ TEST(Inotify, ExcludeUnlinkInodeEvents_NoRandomSave) {
   ASSERT_THAT(ftruncate(fd.get(), 12345), SyscallSucceeds());
   std::vector<Event> events =
       ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get()));
-  EXPECT_THAT(events, Are({
-                          Event(IN_ATTRIB, file_wd),
-                          Event(IN_DELETE, dir_wd, Basename(file.path())),
-                          Event(IN_MODIFY, dir_wd, Basename(file.path())),
-                          Event(IN_MODIFY, file_wd),
-                      }));
+  EXPECT_THAT(events, AnyOf(Are({
+                                Event(IN_ATTRIB, file_wd),
+                                Event(IN_DELETE, dir_wd, Basename(file.path())),
+                                Event(IN_MODIFY, dir_wd, Basename(file.path())),
+                                Event(IN_MODIFY, file_wd),
+                            }),
+                            Are({
+                                Event(IN_DELETE, dir_wd, Basename(file.path())),
+                                Event(IN_ATTRIB, file_wd),
+                                Event(IN_MODIFY, dir_wd, Basename(file.path())),
+                                Event(IN_MODIFY, file_wd),
+                            })));
 
   const struct timeval times[2] = {{1, 0}, {2, 0}};
   ASSERT_THAT(futimes(fd.get(), times), SyscallSucceeds());

From 1666c8919d9d4ced966977f23e2905ff835eaaa0 Mon Sep 17 00:00:00 2001
From: Nicolas Lacasse <nlacasse@google.com>
Date: Fri, 21 Aug 2020 14:28:27 -0700
Subject: [PATCH 050/211] Make mounts ReadWrite first, then later change to
 ReadOnly.

This lets us create "synthetic" mountpoint directories in ReadOnly mounts
during VFS setup.

Also add context.WithMountNamespace, as some filesystems (like overlay) require
a MountNamespace on ctx to handle vfs.Filesystem Operations.

PiperOrigin-RevId: 327874971
---
 pkg/sentry/fsimpl/devtmpfs/devtmpfs_test.go   |  2 +-
 .../fsimpl/ext/benchmark/benchmark_test.go    |  2 +-
 pkg/sentry/fsimpl/proc/tasks_test.go          |  2 +-
 pkg/sentry/fsimpl/tmpfs/benchmark_test.go     |  2 +-
 pkg/sentry/syscalls/linux/vfs2/mount.go       |  4 +-
 pkg/sentry/vfs/context.go                     | 24 ++++++++
 pkg/sentry/vfs/mount.go                       | 19 +++++--
 runsc/boot/vfs.go                             | 55 +++++++++++++------
 8 files changed, 83 insertions(+), 27 deletions(-)

diff --git a/pkg/sentry/fsimpl/devtmpfs/devtmpfs_test.go b/pkg/sentry/fsimpl/devtmpfs/devtmpfs_test.go
index 6b56c5e71d..827a608cb8 100644
--- a/pkg/sentry/fsimpl/devtmpfs/devtmpfs_test.go
+++ b/pkg/sentry/fsimpl/devtmpfs/devtmpfs_test.go
@@ -63,7 +63,7 @@ func setupDevtmpfs(t *testing.T) (context.Context, *auth.Credentials, *vfs.Virtu
 	}); err != nil {
 		t.Fatalf("failed to create mount point: %v", err)
 	}
-	if err := vfsObj.MountAt(ctx, creds, "devtmpfs" /* source */, &devpop, "devtmpfs" /* fsTypeName */, &vfs.MountOptions{}); err != nil {
+	if _, err := vfsObj.MountAt(ctx, creds, "devtmpfs" /* source */, &devpop, "devtmpfs" /* fsTypeName */, &vfs.MountOptions{}); err != nil {
 		t.Fatalf("failed to mount devtmpfs: %v", err)
 	}
 
diff --git a/pkg/sentry/fsimpl/ext/benchmark/benchmark_test.go b/pkg/sentry/fsimpl/ext/benchmark/benchmark_test.go
index 8f7d5a9bb6..a2cc9b59f7 100644
--- a/pkg/sentry/fsimpl/ext/benchmark/benchmark_test.go
+++ b/pkg/sentry/fsimpl/ext/benchmark/benchmark_test.go
@@ -90,7 +90,7 @@ func mount(b *testing.B, imagePath string, vfsfs *vfs.VirtualFilesystem, pop *vf
 	ctx := contexttest.Context(b)
 	creds := auth.CredentialsFromContext(ctx)
 
-	if err := vfsfs.MountAt(ctx, creds, imagePath, pop, "extfs", &vfs.MountOptions{
+	if _, err := vfsfs.MountAt(ctx, creds, imagePath, pop, "extfs", &vfs.MountOptions{
 		GetFilesystemOptions: vfs.GetFilesystemOptions{
 			InternalData: int(f.Fd()),
 		},
diff --git a/pkg/sentry/fsimpl/proc/tasks_test.go b/pkg/sentry/fsimpl/proc/tasks_test.go
index 3c9297dee7..d82b3d2f37 100644
--- a/pkg/sentry/fsimpl/proc/tasks_test.go
+++ b/pkg/sentry/fsimpl/proc/tasks_test.go
@@ -132,7 +132,7 @@ func setup(t *testing.T) *testutil.System {
 			},
 		},
 	}
-	if err := k.VFS().MountAt(ctx, creds, "", pop, Name, mntOpts); err != nil {
+	if _, err := k.VFS().MountAt(ctx, creds, "", pop, Name, mntOpts); err != nil {
 		t.Fatalf("MountAt(/proc): %v", err)
 	}
 	return testutil.NewSystem(ctx, t, k.VFS(), mntns)
diff --git a/pkg/sentry/fsimpl/tmpfs/benchmark_test.go b/pkg/sentry/fsimpl/tmpfs/benchmark_test.go
index d263147c24..e5a4218e86 100644
--- a/pkg/sentry/fsimpl/tmpfs/benchmark_test.go
+++ b/pkg/sentry/fsimpl/tmpfs/benchmark_test.go
@@ -405,7 +405,7 @@ func BenchmarkVFS2TmpfsMountStat(b *testing.B) {
 			}
 			defer mountPoint.DecRef(ctx)
 			// Create and mount the submount.
-			if err := vfsObj.MountAt(ctx, creds, "", &pop, "tmpfs", &vfs.MountOptions{}); err != nil {
+			if _, err := vfsObj.MountAt(ctx, creds, "", &pop, "tmpfs", &vfs.MountOptions{}); err != nil {
 				b.Fatalf("failed to mount tmpfs submount: %v", err)
 			}
 			filePathBuilder.WriteString(mountPointName)
diff --git a/pkg/sentry/syscalls/linux/vfs2/mount.go b/pkg/sentry/syscalls/linux/vfs2/mount.go
index 4bd5c7ca2e..769c9b92fc 100644
--- a/pkg/sentry/syscalls/linux/vfs2/mount.go
+++ b/pkg/sentry/syscalls/linux/vfs2/mount.go
@@ -109,8 +109,8 @@ func Mount(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
 		return 0, nil, err
 	}
 	defer target.Release(t)
-
-	return 0, nil, t.Kernel().VFS().MountAt(t, creds, source, &target.pop, fsType, &opts)
+	_, err = t.Kernel().VFS().MountAt(t, creds, source, &target.pop, fsType, &opts)
+	return 0, nil, err
 }
 
 // Umount2 implements Linux syscall umount2(2).
diff --git a/pkg/sentry/vfs/context.go b/pkg/sentry/vfs/context.go
index c9e724fef3..97018651f2 100644
--- a/pkg/sentry/vfs/context.go
+++ b/pkg/sentry/vfs/context.go
@@ -40,6 +40,30 @@ func MountNamespaceFromContext(ctx context.Context) *MountNamespace {
 	return nil
 }
 
+type mountNamespaceContext struct {
+	context.Context
+	mntns *MountNamespace
+}
+
+// Value implements Context.Value.
+func (mc mountNamespaceContext) Value(key interface{}) interface{} {
+	switch key {
+	case CtxMountNamespace:
+		mc.mntns.IncRef()
+		return mc.mntns
+	default:
+		return mc.Context.Value(key)
+	}
+}
+
+// WithMountNamespace returns a copy of ctx with the given MountNamespace.
+func WithMountNamespace(ctx context.Context, mntns *MountNamespace) context.Context {
+	return &mountNamespaceContext{
+		Context: ctx,
+		mntns:   mntns,
+	}
+}
+
 // RootFromContext returns the VFS root used by ctx. It takes a reference on
 // the returned VirtualDentry. If ctx does not have a specific VFS root,
 // RootFromContext returns a zero-value VirtualDentry.
diff --git a/pkg/sentry/vfs/mount.go b/pkg/sentry/vfs/mount.go
index 714af69072..09fea3628c 100644
--- a/pkg/sentry/vfs/mount.go
+++ b/pkg/sentry/vfs/mount.go
@@ -263,16 +263,20 @@ func (vfs *VirtualFilesystem) ConnectMountAt(ctx context.Context, creds *auth.Cr
 }
 
 // MountAt creates and mounts a Filesystem configured by the given arguments.
-func (vfs *VirtualFilesystem) MountAt(ctx context.Context, creds *auth.Credentials, source string, target *PathOperation, fsTypeName string, opts *MountOptions) error {
+// The VirtualFilesystem will hold a reference to the Mount until it is unmounted.
+//
+// This method returns the mounted Mount without a reference, for convenience
+// during VFS setup when there is no chance of racing with unmount.
+func (vfs *VirtualFilesystem) MountAt(ctx context.Context, creds *auth.Credentials, source string, target *PathOperation, fsTypeName string, opts *MountOptions) (*Mount, error) {
 	mnt, err := vfs.MountDisconnected(ctx, creds, source, fsTypeName, opts)
 	if err != nil {
-		return err
+		return nil, err
 	}
 	defer mnt.DecRef(ctx)
 	if err := vfs.ConnectMountAt(ctx, creds, mnt, target); err != nil {
-		return err
+		return nil, err
 	}
-	return nil
+	return mnt, nil
 }
 
 // UmountAt removes the Mount at the given path.
@@ -657,6 +661,13 @@ retryFirst:
 	return VirtualDentry{mnt, d}
 }
 
+// SetMountReadOnly sets the mount as ReadOnly.
+func (vfs *VirtualFilesystem) SetMountReadOnly(mnt *Mount, ro bool) error {
+	vfs.mountMu.Lock()
+	defer vfs.mountMu.Unlock()
+	return mnt.setReadOnlyLocked(ro)
+}
+
 // CheckBeginWrite increments the counter of in-progress write operations on
 // mnt. If mnt is mounted MS_RDONLY, CheckBeginWrite does nothing and returns
 // EROFS.
diff --git a/runsc/boot/vfs.go b/runsc/boot/vfs.go
index f27a6ff6bd..fb200e9887 100644
--- a/runsc/boot/vfs.go
+++ b/runsc/boot/vfs.go
@@ -205,15 +205,34 @@ func (c *containerMounter) mountSubmountsVFS2(ctx context.Context, conf *config.
 	for i := range mounts {
 		submount := &mounts[i]
 		log.Debugf("Mounting %q to %q, type: %s, options: %s", submount.Source, submount.Destination, submount.Type, submount.Options)
+		var (
+			mnt *vfs.Mount
+			err error
+		)
+
 		if hint := c.hints.findMount(submount.Mount); hint != nil && hint.isSupported() {
-			if err := c.mountSharedSubmountVFS2(ctx, conf, mns, creds, submount.Mount, hint); err != nil {
+			mnt, err = c.mountSharedSubmountVFS2(ctx, conf, mns, creds, submount.Mount, hint)
+			if err != nil {
 				return fmt.Errorf("mount shared mount %q to %q: %v", hint.name, submount.Destination, err)
 			}
 		} else {
-			if err := c.mountSubmountVFS2(ctx, conf, mns, creds, submount); err != nil {
+			mnt, err = c.mountSubmountVFS2(ctx, conf, mns, creds, submount)
+			if err != nil {
 				return fmt.Errorf("mount submount %q: %w", submount.Destination, err)
 			}
 		}
+
+		if mnt != nil && mnt.ReadOnly() {
+			// Switch to ReadWrite while we setup submounts.
+			if err := c.k.VFS().SetMountReadOnly(mnt, false); err != nil {
+				return fmt.Errorf("failed to set mount at %q readwrite: %v", submount.Destination, err)
+			}
+			defer func() {
+				if err := c.k.VFS().SetMountReadOnly(mnt, true); err != nil {
+					panic(fmt.Sprintf("failed to restore mount at %q back to readonly: %v", submount.Destination, err))
+				}
+			}()
+		}
 	}
 
 	if err := c.mountTmpVFS2(ctx, conf, creds, mns); err != nil {
@@ -256,7 +275,7 @@ func (c *containerMounter) prepareMountsVFS2() ([]mountAndFD, error) {
 	return mounts, nil
 }
 
-func (c *containerMounter) mountSubmountVFS2(ctx context.Context, conf *config.Config, mns *vfs.MountNamespace, creds *auth.Credentials, submount *mountAndFD) error {
+func (c *containerMounter) mountSubmountVFS2(ctx context.Context, conf *config.Config, mns *vfs.MountNamespace, creds *auth.Credentials, submount *mountAndFD) (*vfs.Mount, error) {
 	root := mns.Root()
 	defer root.DecRef(ctx)
 	target := &vfs.PathOperation{
@@ -266,21 +285,22 @@ func (c *containerMounter) mountSubmountVFS2(ctx context.Context, conf *config.C
 	}
 	fsName, opts, err := c.getMountNameAndOptionsVFS2(conf, submount)
 	if err != nil {
-		return fmt.Errorf("mountOptions failed: %w", err)
+		return nil, fmt.Errorf("mountOptions failed: %w", err)
 	}
 	if len(fsName) == 0 {
 		// Filesystem is not supported (e.g. cgroup), just skip it.
-		return nil
+		return nil, nil
 	}
 
 	if err := c.k.VFS().MkdirAllAt(ctx, submount.Destination, root, creds, &vfs.MkdirOptions{Mode: 0777, ForSyntheticMountpoint: true}); err != nil {
-		return err
+		return nil, err
 	}
-	if err := c.k.VFS().MountAt(ctx, creds, "", target, fsName, opts); err != nil {
-		return fmt.Errorf("failed to mount %q (type: %s): %w, opts: %v", submount.Destination, submount.Type, err, opts)
+	mnt, err := c.k.VFS().MountAt(ctx, creds, "", target, fsName, opts)
+	if err != nil {
+		return nil, fmt.Errorf("failed to mount %q (type: %s): %w, opts: %v", submount.Destination, submount.Type, err, opts)
 	}
 	log.Infof("Mounted %q to %q type: %s, internal-options: %q", submount.Source, submount.Destination, submount.Type, opts.GetFilesystemOptions.Data)
-	return nil
+	return mnt, nil
 }
 
 // getMountNameAndOptionsVFS2 retrieves the fsName, opts, and useOverlay values
@@ -407,7 +427,8 @@ func (c *containerMounter) mountTmpVFS2(ctx context.Context, conf *config.Config
 			// another user. This is normally done for /tmp.
 			Options: []string{"mode=01777"},
 		}
-		return c.mountSubmountVFS2(ctx, conf, mns, creds, &mountAndFD{Mount: tmpMount})
+		_, err := c.mountSubmountVFS2(ctx, conf, mns, creds, &mountAndFD{Mount: tmpMount})
+		return err
 
 	case syserror.ENOTDIR:
 		// Not a dir?! Let it be.
@@ -458,25 +479,25 @@ func (c *containerMounter) mountSharedMasterVFS2(ctx context.Context, conf *conf
 
 // mountSharedSubmount binds mount to a previously mounted volume that is shared
 // among containers in the same pod.
-func (c *containerMounter) mountSharedSubmountVFS2(ctx context.Context, conf *config.Config, mns *vfs.MountNamespace, creds *auth.Credentials, mount specs.Mount, source *mountHint) error {
+func (c *containerMounter) mountSharedSubmountVFS2(ctx context.Context, conf *config.Config, mns *vfs.MountNamespace, creds *auth.Credentials, mount specs.Mount, source *mountHint) (*vfs.Mount, error) {
 	if err := source.checkCompatible(mount); err != nil {
-		return err
+		return nil, err
 	}
 
 	_, opts, err := c.getMountNameAndOptionsVFS2(conf, &mountAndFD{Mount: mount})
 	if err != nil {
-		return err
+		return nil, err
 	}
 	newMnt, err := c.k.VFS().NewDisconnectedMount(source.vfsMount.Filesystem(), source.vfsMount.Root(), opts)
 	if err != nil {
-		return err
+		return nil, err
 	}
 	defer newMnt.DecRef(ctx)
 
 	root := mns.Root()
 	defer root.DecRef(ctx)
 	if err := c.k.VFS().MkdirAllAt(ctx, mount.Destination, root, creds, &vfs.MkdirOptions{Mode: 0777, ForSyntheticMountpoint: true}); err != nil {
-		return err
+		return nil, err
 	}
 
 	target := &vfs.PathOperation{
@@ -485,8 +506,8 @@ func (c *containerMounter) mountSharedSubmountVFS2(ctx context.Context, conf *co
 		Path:  fspath.Parse(mount.Destination),
 	}
 	if err := c.k.VFS().ConnectMountAt(ctx, creds, newMnt, target); err != nil {
-		return err
+		return nil, err
 	}
 	log.Infof("Mounted %q type shared bind to %q", mount.Destination, source.name)
-	return nil
+	return newMnt, nil
 }

From edf3d6c9e6730d246fd7f26925fbfec8823638d2 Mon Sep 17 00:00:00 2001
From: Nicolas Lacasse <nlacasse@google.com>
Date: Fri, 21 Aug 2020 15:04:24 -0700
Subject: [PATCH 051/211] Pass overlay credentials via context in copy up.

Some VFS operations (those which operate on FDs) get their credentials via the
context instead of via an explicit creds param. For these cases, we must pass
the overlay credentials on the context.

PiperOrigin-RevId: 327881259
---
 pkg/sentry/contexttest/contexttest.go | 22 +---------------------
 pkg/sentry/fsimpl/overlay/copy_up.go  |  5 +++++
 pkg/sentry/kernel/auth/context.go     | 20 ++++++++++++++++++++
 3 files changed, 26 insertions(+), 21 deletions(-)

diff --git a/pkg/sentry/contexttest/contexttest.go b/pkg/sentry/contexttest/contexttest.go
index 8e5658c7a0..dfd195a235 100644
--- a/pkg/sentry/contexttest/contexttest.go
+++ b/pkg/sentry/contexttest/contexttest.go
@@ -144,27 +144,7 @@ func (t *TestContext) MemoryFile() *pgalloc.MemoryFile {
 // RootContext returns a Context that may be used in tests that need root
 // credentials. Uses ptrace as the platform.Platform.
 func RootContext(tb testing.TB) context.Context {
-	return WithCreds(Context(tb), auth.NewRootCredentials(auth.NewRootUserNamespace()))
-}
-
-// WithCreds returns a copy of ctx carrying creds.
-func WithCreds(ctx context.Context, creds *auth.Credentials) context.Context {
-	return &authContext{ctx, creds}
-}
-
-type authContext struct {
-	context.Context
-	creds *auth.Credentials
-}
-
-// Value implements context.Context.
-func (ac *authContext) Value(key interface{}) interface{} {
-	switch key {
-	case auth.CtxCredentials:
-		return ac.creds
-	default:
-		return ac.Context.Value(key)
-	}
+	return auth.ContextWithCredentials(Context(tb), auth.NewRootCredentials(auth.NewRootUserNamespace()))
 }
 
 // WithLimitSet returns a copy of ctx carrying l.
diff --git a/pkg/sentry/fsimpl/overlay/copy_up.go b/pkg/sentry/fsimpl/overlay/copy_up.go
index b3d19ff827..13735eb052 100644
--- a/pkg/sentry/fsimpl/overlay/copy_up.go
+++ b/pkg/sentry/fsimpl/overlay/copy_up.go
@@ -22,6 +22,7 @@ import (
 	"gvisor.dev/gvisor/pkg/abi/linux"
 	"gvisor.dev/gvisor/pkg/context"
 	"gvisor.dev/gvisor/pkg/fspath"
+	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
 	"gvisor.dev/gvisor/pkg/sentry/vfs"
 	"gvisor.dev/gvisor/pkg/syserror"
 	"gvisor.dev/gvisor/pkg/usermem"
@@ -40,6 +41,10 @@ func (d *dentry) copyUpLocked(ctx context.Context) error {
 		return nil
 	}
 
+	// Attach our credentials to the context, as some VFS operations use
+	// credentials from context rather an take an explicit creds parameter.
+	ctx = auth.ContextWithCredentials(ctx, d.fs.creds)
+
 	ftype := atomic.LoadUint32(&d.mode) & linux.S_IFMT
 	switch ftype {
 	case linux.S_IFREG, linux.S_IFDIR, linux.S_IFLNK, linux.S_IFBLK, linux.S_IFCHR:
diff --git a/pkg/sentry/kernel/auth/context.go b/pkg/sentry/kernel/auth/context.go
index ef57231271..c08d477871 100644
--- a/pkg/sentry/kernel/auth/context.go
+++ b/pkg/sentry/kernel/auth/context.go
@@ -34,3 +34,23 @@ func CredentialsFromContext(ctx context.Context) *Credentials {
 	}
 	return NewAnonymousCredentials()
 }
+
+// ContextWithCredentials returns a copy of ctx carrying creds.
+func ContextWithCredentials(ctx context.Context, creds *Credentials) context.Context {
+	return &authContext{ctx, creds}
+}
+
+type authContext struct {
+	context.Context
+	creds *Credentials
+}
+
+// Value implements context.Context.
+func (ac *authContext) Value(key interface{}) interface{} {
+	switch key {
+	case CtxCredentials:
+		return ac.creds
+	default:
+		return ac.Context.Value(key)
+	}
+}

From 3810a62b3a2e6bb55c3d030e15ba09665f2f91b3 Mon Sep 17 00:00:00 2001
From: Rahat Mahmood <rahat@google.com>
Date: Fri, 21 Aug 2020 16:03:38 -0700
Subject: [PATCH 052/211] Clarify seek behaviour for kernfs.GenericDirectoryFD.

- Remove comment about GenericDirectoryFD not being compatible with
  dynamic directories. It is currently being used to implement dynamic
  directories.

- Try to handle SEEK_END better than setting the offset to
  infinity. SEEK_END is poorly defined for dynamic directories
  anyways, so at least try make it work correctly for the static
  entries.

Updates #1193.

PiperOrigin-RevId: 327890128
---
 pkg/sentry/fsimpl/devpts/devpts.go          |  4 +-
 pkg/sentry/fsimpl/fuse/fusefs.go            |  4 +-
 pkg/sentry/fsimpl/kernfs/fd_impl_util.go    | 46 +++++++++++++++++----
 pkg/sentry/fsimpl/kernfs/inode_impl_util.go | 12 +++---
 pkg/sentry/fsimpl/kernfs/kernfs_test.go     |  8 +++-
 pkg/sentry/fsimpl/proc/filesystem.go        |  6 +++
 pkg/sentry/fsimpl/proc/subtasks.go          |  4 +-
 pkg/sentry/fsimpl/proc/task.go              |  8 +++-
 pkg/sentry/fsimpl/proc/task_fds.go          |  8 +++-
 pkg/sentry/fsimpl/proc/tasks.go             |  4 +-
 pkg/sentry/fsimpl/proc/tasks_sys.go         | 12 +++---
 pkg/sentry/fsimpl/sys/sys.go                |  4 +-
 12 files changed, 89 insertions(+), 31 deletions(-)

diff --git a/pkg/sentry/fsimpl/devpts/devpts.go b/pkg/sentry/fsimpl/devpts/devpts.go
index 7169e91af7..3f3a099bd1 100644
--- a/pkg/sentry/fsimpl/devpts/devpts.go
+++ b/pkg/sentry/fsimpl/devpts/devpts.go
@@ -185,7 +185,9 @@ func (i *rootInode) masterClose(t *Terminal) {
 
 // Open implements kernfs.Inode.Open.
 func (i *rootInode) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
-	fd, err := kernfs.NewGenericDirectoryFD(rp.Mount(), vfsd, &i.OrderedChildren, &i.locks, &opts)
+	fd, err := kernfs.NewGenericDirectoryFD(rp.Mount(), vfsd, &i.OrderedChildren, &i.locks, &opts, kernfs.GenericDirectoryFDOptions{
+		SeekEnd: kernfs.SeekEndStaticEntries,
+	})
 	if err != nil {
 		return nil, err
 	}
diff --git a/pkg/sentry/fsimpl/fuse/fusefs.go b/pkg/sentry/fsimpl/fuse/fusefs.go
index 83c24ec250..44021ee4bc 100644
--- a/pkg/sentry/fsimpl/fuse/fusefs.go
+++ b/pkg/sentry/fsimpl/fuse/fusefs.go
@@ -220,7 +220,9 @@ func (fs *filesystem) newInode(creds *auth.Credentials, mode linux.FileMode) *ke
 
 // Open implements kernfs.Inode.Open.
 func (i *inode) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
-	fd, err := kernfs.NewGenericDirectoryFD(rp.Mount(), vfsd, &i.OrderedChildren, &i.locks, &opts)
+	fd, err := kernfs.NewGenericDirectoryFD(rp.Mount(), vfsd, &i.OrderedChildren, &i.locks, &opts, kernfs.GenericDirectoryFDOptions{
+		SeekEnd: kernfs.SeekEndStaticEntries,
+	})
 	if err != nil {
 		return nil, err
 	}
diff --git a/pkg/sentry/fsimpl/kernfs/fd_impl_util.go b/pkg/sentry/fsimpl/kernfs/fd_impl_util.go
index fcee6200a1..6518ff5cd0 100644
--- a/pkg/sentry/fsimpl/kernfs/fd_impl_util.go
+++ b/pkg/sentry/fsimpl/kernfs/fd_impl_util.go
@@ -15,7 +15,7 @@
 package kernfs
 
 import (
-	"math"
+	"fmt"
 
 	"gvisor.dev/gvisor/pkg/abi/linux"
 	"gvisor.dev/gvisor/pkg/context"
@@ -28,9 +28,25 @@ import (
 	"gvisor.dev/gvisor/pkg/usermem"
 )
 
+// SeekEndConfig describes the SEEK_END behaviour for FDs.
+type SeekEndConfig int
+
+// Constants related to SEEK_END behaviour for FDs.
+const (
+	// Consider the end of the file to be after the final static entry. This is
+	// the default option.
+	SeekEndStaticEntries = iota
+	// Consider the end of the file to be at offset 0.
+	SeekEndZero
+)
+
+// GenericDirectoryFDOptions contains configuration for a GenericDirectoryFD.
+type GenericDirectoryFDOptions struct {
+	SeekEnd SeekEndConfig
+}
+
 // GenericDirectoryFD implements vfs.FileDescriptionImpl for a generic directory
-// inode that uses OrderChildren to track child nodes. GenericDirectoryFD is not
-// compatible with dynamic directories.
+// inode that uses OrderChildren to track child nodes.
 //
 // Note that GenericDirectoryFD holds a lock over OrderedChildren while calling
 // IterDirents callback. The IterDirents callback therefore cannot hash or
@@ -45,6 +61,9 @@ type GenericDirectoryFD struct {
 	vfs.DirectoryFileDescriptionDefaultImpl
 	vfs.LockFD
 
+	// Immutable.
+	seekEnd SeekEndConfig
+
 	vfsfd    vfs.FileDescription
 	children *OrderedChildren
 
@@ -57,9 +76,9 @@ type GenericDirectoryFD struct {
 
 // NewGenericDirectoryFD creates a new GenericDirectoryFD and returns its
 // dentry.
-func NewGenericDirectoryFD(m *vfs.Mount, d *vfs.Dentry, children *OrderedChildren, locks *vfs.FileLocks, opts *vfs.OpenOptions) (*GenericDirectoryFD, error) {
+func NewGenericDirectoryFD(m *vfs.Mount, d *vfs.Dentry, children *OrderedChildren, locks *vfs.FileLocks, opts *vfs.OpenOptions, fdOpts GenericDirectoryFDOptions) (*GenericDirectoryFD, error) {
 	fd := &GenericDirectoryFD{}
-	if err := fd.Init(children, locks, opts); err != nil {
+	if err := fd.Init(children, locks, opts, fdOpts); err != nil {
 		return nil, err
 	}
 	if err := fd.vfsfd.Init(fd, opts.Flags, m, d, &vfs.FileDescriptionOptions{}); err != nil {
@@ -71,12 +90,13 @@ func NewGenericDirectoryFD(m *vfs.Mount, d *vfs.Dentry, children *OrderedChildre
 // Init initializes a GenericDirectoryFD. Use it when overriding
 // GenericDirectoryFD. Caller must call fd.VFSFileDescription.Init() with the
 // correct implementation.
-func (fd *GenericDirectoryFD) Init(children *OrderedChildren, locks *vfs.FileLocks, opts *vfs.OpenOptions) error {
+func (fd *GenericDirectoryFD) Init(children *OrderedChildren, locks *vfs.FileLocks, opts *vfs.OpenOptions, fdOpts GenericDirectoryFDOptions) error {
 	if vfs.AccessTypesForOpenFlags(opts)&vfs.MayWrite != 0 {
 		// Can't open directories for writing.
 		return syserror.EISDIR
 	}
 	fd.LockFD.Init(locks)
+	fd.seekEnd = fdOpts.SeekEnd
 	fd.children = children
 	return nil
 }
@@ -209,9 +229,17 @@ func (fd *GenericDirectoryFD) Seek(ctx context.Context, offset int64, whence int
 	case linux.SEEK_CUR:
 		offset += fd.off
 	case linux.SEEK_END:
-		// TODO(gvisor.dev/issue/1193): This can prevent new files from showing up
-		// if they are added after SEEK_END.
-		offset = math.MaxInt64
+		switch fd.seekEnd {
+		case SeekEndStaticEntries:
+			fd.children.mu.RLock()
+			offset += int64(len(fd.children.set))
+			offset += 2 // '.' and '..' aren't tracked in children.
+			fd.children.mu.RUnlock()
+		case SeekEndZero:
+			// No-op: offset += 0.
+		default:
+			panic(fmt.Sprintf("Invalid GenericDirectoryFD.seekEnd = %v", fd.seekEnd))
+		}
 	default:
 		return 0, syserror.EINVAL
 	}
diff --git a/pkg/sentry/fsimpl/kernfs/inode_impl_util.go b/pkg/sentry/fsimpl/kernfs/inode_impl_util.go
index fe8a1e7100..885856868f 100644
--- a/pkg/sentry/fsimpl/kernfs/inode_impl_util.go
+++ b/pkg/sentry/fsimpl/kernfs/inode_impl_util.go
@@ -555,15 +555,16 @@ type StaticDirectory struct {
 	InodeNoDynamicLookup
 	OrderedChildren
 
-	locks vfs.FileLocks
+	locks  vfs.FileLocks
+	fdOpts GenericDirectoryFDOptions
 }
 
 var _ Inode = (*StaticDirectory)(nil)
 
 // NewStaticDir creates a new static directory and returns its dentry.
-func NewStaticDir(creds *auth.Credentials, devMajor, devMinor uint32, ino uint64, perm linux.FileMode, children map[string]*Dentry) *Dentry {
+func NewStaticDir(creds *auth.Credentials, devMajor, devMinor uint32, ino uint64, perm linux.FileMode, children map[string]*Dentry, fdOpts GenericDirectoryFDOptions) *Dentry {
 	inode := &StaticDirectory{}
-	inode.Init(creds, devMajor, devMinor, ino, perm)
+	inode.Init(creds, devMajor, devMinor, ino, perm, fdOpts)
 
 	dentry := &Dentry{}
 	dentry.Init(inode)
@@ -576,16 +577,17 @@ func NewStaticDir(creds *auth.Credentials, devMajor, devMinor uint32, ino uint64
 }
 
 // Init initializes StaticDirectory.
-func (s *StaticDirectory) Init(creds *auth.Credentials, devMajor, devMinor uint32, ino uint64, perm linux.FileMode) {
+func (s *StaticDirectory) Init(creds *auth.Credentials, devMajor, devMinor uint32, ino uint64, perm linux.FileMode, fdOpts GenericDirectoryFDOptions) {
 	if perm&^linux.PermissionsMask != 0 {
 		panic(fmt.Sprintf("Only permission mask must be set: %x", perm&linux.PermissionsMask))
 	}
+	s.fdOpts = fdOpts
 	s.InodeAttrs.Init(creds, devMajor, devMinor, ino, linux.ModeDirectory|perm)
 }
 
 // Open implements kernfs.Inode.
 func (s *StaticDirectory) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
-	fd, err := NewGenericDirectoryFD(rp.Mount(), vfsd, &s.OrderedChildren, &s.locks, &opts)
+	fd, err := NewGenericDirectoryFD(rp.Mount(), vfsd, &s.OrderedChildren, &s.locks, &opts, s.fdOpts)
 	if err != nil {
 		return nil, err
 	}
diff --git a/pkg/sentry/fsimpl/kernfs/kernfs_test.go b/pkg/sentry/fsimpl/kernfs/kernfs_test.go
index c5d5afedf0..e5c28c0e4c 100644
--- a/pkg/sentry/fsimpl/kernfs/kernfs_test.go
+++ b/pkg/sentry/fsimpl/kernfs/kernfs_test.go
@@ -119,7 +119,9 @@ func (fs *filesystem) newReadonlyDir(creds *auth.Credentials, mode linux.FileMod
 }
 
 func (d *readonlyDir) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
-	fd, err := kernfs.NewGenericDirectoryFD(rp.Mount(), vfsd, &d.OrderedChildren, &d.locks, &opts)
+	fd, err := kernfs.NewGenericDirectoryFD(rp.Mount(), vfsd, &d.OrderedChildren, &d.locks, &opts, kernfs.GenericDirectoryFDOptions{
+		SeekEnd: kernfs.SeekEndStaticEntries,
+	})
 	if err != nil {
 		return nil, err
 	}
@@ -151,7 +153,9 @@ func (fs *filesystem) newDir(creds *auth.Credentials, mode linux.FileMode, conte
 }
 
 func (d *dir) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
-	fd, err := kernfs.NewGenericDirectoryFD(rp.Mount(), vfsd, &d.OrderedChildren, &d.locks, &opts)
+	fd, err := kernfs.NewGenericDirectoryFD(rp.Mount(), vfsd, &d.OrderedChildren, &d.locks, &opts, kernfs.GenericDirectoryFDOptions{
+		SeekEnd: kernfs.SeekEndStaticEntries,
+	})
 	if err != nil {
 		return nil, err
 	}
diff --git a/pkg/sentry/fsimpl/proc/filesystem.go b/pkg/sentry/fsimpl/proc/filesystem.go
index 2463d51cd8..c350ec1271 100644
--- a/pkg/sentry/fsimpl/proc/filesystem.go
+++ b/pkg/sentry/fsimpl/proc/filesystem.go
@@ -110,6 +110,12 @@ func newStaticFile(data string) *staticFile {
 	return &staticFile{StaticData: vfs.StaticData{Data: data}}
 }
 
+func newStaticDir(creds *auth.Credentials, devMajor, devMinor uint32, ino uint64, perm linux.FileMode, children map[string]*kernfs.Dentry) *kernfs.Dentry {
+	return kernfs.NewStaticDir(creds, devMajor, devMinor, ino, perm, children, kernfs.GenericDirectoryFDOptions{
+		SeekEnd: kernfs.SeekEndZero,
+	})
+}
+
 // InternalData contains internal data passed in to the procfs mount via
 // vfs.GetFilesystemOptions.InternalData.
 type InternalData struct {
diff --git a/pkg/sentry/fsimpl/proc/subtasks.go b/pkg/sentry/fsimpl/proc/subtasks.go
index 79c2725f3b..f25747da32 100644
--- a/pkg/sentry/fsimpl/proc/subtasks.go
+++ b/pkg/sentry/fsimpl/proc/subtasks.go
@@ -155,7 +155,9 @@ func (fd *subtasksFD) SetStat(ctx context.Context, opts vfs.SetStatOptions) erro
 // Open implements kernfs.Inode.
 func (i *subtasksInode) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
 	fd := &subtasksFD{task: i.task}
-	if err := fd.Init(&i.OrderedChildren, &i.locks, &opts); err != nil {
+	if err := fd.Init(&i.OrderedChildren, &i.locks, &opts, kernfs.GenericDirectoryFDOptions{
+		SeekEnd: kernfs.SeekEndZero,
+	}); err != nil {
 		return nil, err
 	}
 	if err := fd.VFSFileDescription().Init(fd, opts.Flags, rp.Mount(), vfsd, &vfs.FileDescriptionOptions{}); err != nil {
diff --git a/pkg/sentry/fsimpl/proc/task.go b/pkg/sentry/fsimpl/proc/task.go
index a5c7aa4708..109b31b4cd 100644
--- a/pkg/sentry/fsimpl/proc/task.go
+++ b/pkg/sentry/fsimpl/proc/task.go
@@ -105,7 +105,9 @@ func (i *taskInode) Valid(ctx context.Context) bool {
 
 // Open implements kernfs.Inode.
 func (i *taskInode) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
-	fd, err := kernfs.NewGenericDirectoryFD(rp.Mount(), vfsd, &i.OrderedChildren, &i.locks, &opts)
+	fd, err := kernfs.NewGenericDirectoryFD(rp.Mount(), vfsd, &i.OrderedChildren, &i.locks, &opts, kernfs.GenericDirectoryFDOptions{
+		SeekEnd: kernfs.SeekEndZero,
+	})
 	if err != nil {
 		return nil, err
 	}
@@ -142,7 +144,9 @@ func (fs *filesystem) newTaskOwnedDir(task *kernel.Task, ino uint64, perm linux.
 	dir := &kernfs.StaticDirectory{}
 
 	// Note: credentials are overridden by taskOwnedInode.
-	dir.Init(task.Credentials(), linux.UNNAMED_MAJOR, fs.devMinor, ino, perm)
+	dir.Init(task.Credentials(), linux.UNNAMED_MAJOR, fs.devMinor, ino, perm, kernfs.GenericDirectoryFDOptions{
+		SeekEnd: kernfs.SeekEndZero,
+	})
 
 	inode := &taskOwnedInode{Inode: dir, owner: task}
 	d := &kernfs.Dentry{}
diff --git a/pkg/sentry/fsimpl/proc/task_fds.go b/pkg/sentry/fsimpl/proc/task_fds.go
index f0d3f7f5ef..e8fcb9aa11 100644
--- a/pkg/sentry/fsimpl/proc/task_fds.go
+++ b/pkg/sentry/fsimpl/proc/task_fds.go
@@ -144,7 +144,9 @@ func (i *fdDirInode) Lookup(ctx context.Context, name string) (*vfs.Dentry, erro
 
 // Open implements kernfs.Inode.
 func (i *fdDirInode) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
-	fd, err := kernfs.NewGenericDirectoryFD(rp.Mount(), vfsd, &i.OrderedChildren, &i.locks, &opts)
+	fd, err := kernfs.NewGenericDirectoryFD(rp.Mount(), vfsd, &i.OrderedChildren, &i.locks, &opts, kernfs.GenericDirectoryFDOptions{
+		SeekEnd: kernfs.SeekEndZero,
+	})
 	if err != nil {
 		return nil, err
 	}
@@ -271,7 +273,9 @@ func (i *fdInfoDirInode) Lookup(ctx context.Context, name string) (*vfs.Dentry,
 
 // Open implements kernfs.Inode.
 func (i *fdInfoDirInode) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
-	fd, err := kernfs.NewGenericDirectoryFD(rp.Mount(), vfsd, &i.OrderedChildren, &i.locks, &opts)
+	fd, err := kernfs.NewGenericDirectoryFD(rp.Mount(), vfsd, &i.OrderedChildren, &i.locks, &opts, kernfs.GenericDirectoryFDOptions{
+		SeekEnd: kernfs.SeekEndZero,
+	})
 	if err != nil {
 		return nil, err
 	}
diff --git a/pkg/sentry/fsimpl/proc/tasks.go b/pkg/sentry/fsimpl/proc/tasks.go
index 6d2b90a8b1..1391992b7e 100644
--- a/pkg/sentry/fsimpl/proc/tasks.go
+++ b/pkg/sentry/fsimpl/proc/tasks.go
@@ -199,7 +199,9 @@ func (i *tasksInode) IterDirents(ctx context.Context, cb vfs.IterDirentsCallback
 
 // Open implements kernfs.Inode.
 func (i *tasksInode) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
-	fd, err := kernfs.NewGenericDirectoryFD(rp.Mount(), vfsd, &i.OrderedChildren, &i.locks, &opts)
+	fd, err := kernfs.NewGenericDirectoryFD(rp.Mount(), vfsd, &i.OrderedChildren, &i.locks, &opts, kernfs.GenericDirectoryFDOptions{
+		SeekEnd: kernfs.SeekEndZero,
+	})
 	if err != nil {
 		return nil, err
 	}
diff --git a/pkg/sentry/fsimpl/proc/tasks_sys.go b/pkg/sentry/fsimpl/proc/tasks_sys.go
index 6435385efe..038a194c70 100644
--- a/pkg/sentry/fsimpl/proc/tasks_sys.go
+++ b/pkg/sentry/fsimpl/proc/tasks_sys.go
@@ -39,14 +39,14 @@ const (
 
 // newSysDir returns the dentry corresponding to /proc/sys directory.
 func (fs *filesystem) newSysDir(root *auth.Credentials, k *kernel.Kernel) *kernfs.Dentry {
-	return kernfs.NewStaticDir(root, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), 0555, map[string]*kernfs.Dentry{
-		"kernel": kernfs.NewStaticDir(root, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), 0555, map[string]*kernfs.Dentry{
+	return newStaticDir(root, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), 0555, map[string]*kernfs.Dentry{
+		"kernel": newStaticDir(root, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), 0555, map[string]*kernfs.Dentry{
 			"hostname": fs.newDentry(root, fs.NextIno(), 0444, &hostnameData{}),
 			"shmall":   fs.newDentry(root, fs.NextIno(), 0444, shmData(linux.SHMALL)),
 			"shmmax":   fs.newDentry(root, fs.NextIno(), 0444, shmData(linux.SHMMAX)),
 			"shmmni":   fs.newDentry(root, fs.NextIno(), 0444, shmData(linux.SHMMNI)),
 		}),
-		"vm": kernfs.NewStaticDir(root, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), 0555, map[string]*kernfs.Dentry{
+		"vm": newStaticDir(root, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), 0555, map[string]*kernfs.Dentry{
 			"mmap_min_addr":     fs.newDentry(root, fs.NextIno(), 0444, &mmapMinAddrData{k: k}),
 			"overcommit_memory": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("0\n")),
 		}),
@@ -62,7 +62,7 @@ func (fs *filesystem) newSysNetDir(root *auth.Credentials, k *kernel.Kernel) *ke
 	// network namespace of the calling process.
 	if stack := k.RootNetworkNamespace().Stack(); stack != nil {
 		contents = map[string]*kernfs.Dentry{
-			"ipv4": kernfs.NewStaticDir(root, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), 0555, map[string]*kernfs.Dentry{
+			"ipv4": newStaticDir(root, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), 0555, map[string]*kernfs.Dentry{
 				"tcp_recovery": fs.newDentry(root, fs.NextIno(), 0644, &tcpRecoveryData{stack: stack}),
 				"tcp_rmem":     fs.newDentry(root, fs.NextIno(), 0644, &tcpMemData{stack: stack, dir: tcpRMem}),
 				"tcp_sack":     fs.newDentry(root, fs.NextIno(), 0644, &tcpSackData{stack: stack}),
@@ -109,7 +109,7 @@ func (fs *filesystem) newSysNetDir(root *auth.Credentials, k *kernel.Kernel) *ke
 				"tcp_syn_retries":           fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("3")),
 				"tcp_timestamps":            fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("1")),
 			}),
-			"core": kernfs.NewStaticDir(root, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), 0555, map[string]*kernfs.Dentry{
+			"core": newStaticDir(root, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), 0555, map[string]*kernfs.Dentry{
 				"default_qdisc": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("pfifo_fast")),
 				"message_burst": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("10")),
 				"message_cost":  fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("5")),
@@ -123,7 +123,7 @@ func (fs *filesystem) newSysNetDir(root *auth.Credentials, k *kernel.Kernel) *ke
 		}
 	}
 
-	return kernfs.NewStaticDir(root, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), 0555, contents)
+	return newStaticDir(root, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), 0555, contents)
 }
 
 // mmapMinAddrData implements vfs.DynamicBytesSource for
diff --git a/pkg/sentry/fsimpl/sys/sys.go b/pkg/sentry/fsimpl/sys/sys.go
index 0401726b66..393feb8022 100644
--- a/pkg/sentry/fsimpl/sys/sys.go
+++ b/pkg/sentry/fsimpl/sys/sys.go
@@ -131,7 +131,9 @@ func (*dir) SetStat(context.Context, *vfs.Filesystem, *auth.Credentials, vfs.Set
 
 // Open implements kernfs.Inode.Open.
 func (d *dir) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
-	fd, err := kernfs.NewGenericDirectoryFD(rp.Mount(), vfsd, &d.OrderedChildren, &d.locks, &opts)
+	fd, err := kernfs.NewGenericDirectoryFD(rp.Mount(), vfsd, &d.OrderedChildren, &d.locks, &opts, kernfs.GenericDirectoryFDOptions{
+		SeekEnd: kernfs.SeekEndStaticEntries,
+	})
 	if err != nil {
 		return nil, err
 	}

From ac83a6a5ed237e0ddcb473b1cbc2e30d8e6c6740 Mon Sep 17 00:00:00 2001
From: gVisor bot <gvisor-bot@google.com>
Date: Fri, 21 Aug 2020 16:18:31 -0700
Subject: [PATCH 053/211] Internal change.

PiperOrigin-RevId: 327892274
---
 pkg/sentry/limits/context.go  | 9 +++++++++
 pkg/sentry/vfs/permissions.go | 2 +-
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/pkg/sentry/limits/context.go b/pkg/sentry/limits/context.go
index 77e1fe217c..0bade6e570 100644
--- a/pkg/sentry/limits/context.go
+++ b/pkg/sentry/limits/context.go
@@ -33,3 +33,12 @@ func FromContext(ctx context.Context) *LimitSet {
 	}
 	return nil
 }
+
+// FromContextOrDie returns FromContext(ctx) if the latter is not nil.
+// Otherwise, panic is triggered.
+func FromContextOrDie(ctx context.Context) *LimitSet {
+	if v := ctx.Value(CtxLimits); v != nil {
+		return v.(*LimitSet)
+	}
+	panic("failed to create limit set from context")
+}
diff --git a/pkg/sentry/vfs/permissions.go b/pkg/sentry/vfs/permissions.go
index 33389c1df3..014b928eda 100644
--- a/pkg/sentry/vfs/permissions.go
+++ b/pkg/sentry/vfs/permissions.go
@@ -271,7 +271,7 @@ func HasCapabilityOnFile(creds *auth.Credentials, cp linux.Capability, kuid auth
 // operation must not proceed. Otherwise it returns the max length allowed to
 // without violating the limit.
 func CheckLimit(ctx context.Context, offset, size int64) (int64, error) {
-	fileSizeLimit := limits.FromContext(ctx).Get(limits.FileSize).Cur
+	fileSizeLimit := limits.FromContextOrDie(ctx).Get(limits.FileSize).Cur
 	if fileSizeLimit > math.MaxInt64 {
 		return size, nil
 	}

From ec987e01ce48d909cb124cc90c416a7c1f03b547 Mon Sep 17 00:00:00 2001
From: Ting-Yu Wang <anivia@google.com>
Date: Fri, 21 Aug 2020 16:33:04 -0700
Subject: [PATCH 054/211] Move udp port exhaustion tests to a 'nogotsan' one.

It frequently times out under GoTSAN.

PiperOrigin-RevId: 327894343
---
 test/syscalls/BUILD                           |  7 ++
 test/syscalls/linux/BUILD                     | 17 ++++
 .../syscalls/linux/socket_ipv4_udp_unbound.cc | 56 -----------
 ...cket_ipv4_udp_unbound_loopback_nogotsan.cc | 94 +++++++++++++++++++
 4 files changed, 118 insertions(+), 56 deletions(-)
 create mode 100644 test/syscalls/linux/socket_ipv4_udp_unbound_loopback_nogotsan.cc

diff --git a/test/syscalls/BUILD b/test/syscalls/BUILD
index 9897946eda..ad53e92e5e 100644
--- a/test/syscalls/BUILD
+++ b/test/syscalls/BUILD
@@ -667,6 +667,13 @@ syscall_test(
     test = "//test/syscalls/linux:socket_ipv4_udp_unbound_loopback_test",
 )
 
+syscall_test(
+    size = "medium",
+    # Takes too long under gotsan to run.
+    tags = ["nogotsan"],
+    test = "//test/syscalls/linux:socket_ipv4_udp_unbound_loopback_nogotsan_test",
+)
+
 syscall_test(
     test = "//test/syscalls/linux:socket_ip_unbound_test",
 )
diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD
index 3009f5cada..ecd2d8d2a7 100644
--- a/test/syscalls/linux/BUILD
+++ b/test/syscalls/linux/BUILD
@@ -2738,6 +2738,23 @@ cc_binary(
     ],
 )
 
+cc_binary(
+    name = "socket_ipv4_udp_unbound_loopback_nogotsan_test",
+    testonly = 1,
+    srcs = [
+        "socket_ipv4_udp_unbound_loopback_nogotsan.cc",
+    ],
+    linkstatic = 1,
+    deps = [
+        ":ip_socket_test_util",
+        ":socket_test_util",
+        gtest,
+        "//test/util:test_main",
+        "//test/util:test_util",
+        "@com_google_absl//absl/memory",
+    ],
+)
+
 cc_binary(
     name = "socket_ip_unbound_test",
     testonly = 1,
diff --git a/test/syscalls/linux/socket_ipv4_udp_unbound.cc b/test/syscalls/linux/socket_ipv4_udp_unbound.cc
index cdc9c22666..02ea05e22e 100644
--- a/test/syscalls/linux/socket_ipv4_udp_unbound.cc
+++ b/test/syscalls/linux/socket_ipv4_udp_unbound.cc
@@ -2121,62 +2121,6 @@ TEST_P(IPv4UDPUnboundSocketTest, ReuseAddrReusePortDistribution) {
               SyscallSucceedsWithValue(kMessageSize));
 }
 
-// Check that connect returns EAGAIN when out of local ephemeral ports.
-// We disable S/R because this test creates a large number of sockets.
-TEST_P(IPv4UDPUnboundSocketTest, UDPConnectPortExhaustion_NoRandomSave) {
-  auto receiver1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
-  constexpr int kClients = 65536;
-  // Bind the first socket to the loopback and take note of the selected port.
-  auto addr = V4Loopback();
-  ASSERT_THAT(bind(receiver1->get(), reinterpret_cast<sockaddr*>(&addr.addr),
-                   addr.addr_len),
-              SyscallSucceeds());
-  socklen_t addr_len = addr.addr_len;
-  ASSERT_THAT(getsockname(receiver1->get(),
-                          reinterpret_cast<sockaddr*>(&addr.addr), &addr_len),
-              SyscallSucceeds());
-  EXPECT_EQ(addr_len, addr.addr_len);
-
-  // Disable cooperative S/R as we are making too many syscalls.
-  DisableSave ds;
-  std::vector<std::unique_ptr<FileDescriptor>> sockets;
-  for (int i = 0; i < kClients; i++) {
-    auto s = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
-
-    int ret = connect(s->get(), reinterpret_cast<sockaddr*>(&addr.addr),
-                      addr.addr_len);
-    if (ret == 0) {
-      sockets.push_back(std::move(s));
-      continue;
-    }
-    ASSERT_THAT(ret, SyscallFailsWithErrno(EAGAIN));
-    break;
-  }
-}
-
-// Check that bind returns EADDRINUSE when out of local ephemeral ports.
-// We disable S/R because this test creates a large number of sockets.
-TEST_P(IPv4UDPUnboundSocketTest, UDPBindPortExhaustion_NoRandomSave) {
-  auto receiver1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
-  constexpr int kClients = 65536;
-  auto addr = V4Loopback();
-  // Disable cooperative S/R as we are making too many syscalls.
-  DisableSave ds;
-  std::vector<std::unique_ptr<FileDescriptor>> sockets;
-  for (int i = 0; i < kClients; i++) {
-    auto s = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
-
-    int ret =
-        bind(s->get(), reinterpret_cast<sockaddr*>(&addr.addr), addr.addr_len);
-    if (ret == 0) {
-      sockets.push_back(std::move(s));
-      continue;
-    }
-    ASSERT_THAT(ret, SyscallFailsWithErrno(EADDRINUSE));
-    break;
-  }
-}
-
 // Test that socket will receive packet info control message.
 TEST_P(IPv4UDPUnboundSocketTest, SetAndReceiveIPPKTINFO) {
   // TODO(gvisor.dev/issue/1202): ioctl() is not supported by hostinet.
diff --git a/test/syscalls/linux/socket_ipv4_udp_unbound_loopback_nogotsan.cc b/test/syscalls/linux/socket_ipv4_udp_unbound_loopback_nogotsan.cc
new file mode 100644
index 0000000000..bcbd2feace
--- /dev/null
+++ b/test/syscalls/linux/socket_ipv4_udp_unbound_loopback_nogotsan.cc
@@ -0,0 +1,94 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <sys/socket.h>
+#include <sys/types.h>
+
+#include "gtest/gtest.h"
+#include "absl/memory/memory.h"
+#include "test/syscalls/linux/ip_socket_test_util.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+// Test fixture for tests that apply to IPv4 UDP sockets.
+using IPv4UDPUnboundSocketNogotsanTest = SimpleSocketTest;
+
+// Check that connect returns EAGAIN when out of local ephemeral ports.
+// We disable S/R because this test creates a large number of sockets.
+TEST_P(IPv4UDPUnboundSocketNogotsanTest,
+       UDPConnectPortExhaustion_NoRandomSave) {
+  auto receiver1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+  constexpr int kClients = 65536;
+  // Bind the first socket to the loopback and take note of the selected port.
+  auto addr = V4Loopback();
+  ASSERT_THAT(bind(receiver1->get(), reinterpret_cast<sockaddr*>(&addr.addr),
+                   addr.addr_len),
+              SyscallSucceeds());
+  socklen_t addr_len = addr.addr_len;
+  ASSERT_THAT(getsockname(receiver1->get(),
+                          reinterpret_cast<sockaddr*>(&addr.addr), &addr_len),
+              SyscallSucceeds());
+  EXPECT_EQ(addr_len, addr.addr_len);
+
+  // Disable cooperative S/R as we are making too many syscalls.
+  DisableSave ds;
+  std::vector<std::unique_ptr<FileDescriptor>> sockets;
+  for (int i = 0; i < kClients; i++) {
+    auto s = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
+    int ret = connect(s->get(), reinterpret_cast<sockaddr*>(&addr.addr),
+                      addr.addr_len);
+    if (ret == 0) {
+      sockets.push_back(std::move(s));
+      continue;
+    }
+    ASSERT_THAT(ret, SyscallFailsWithErrno(EAGAIN));
+    break;
+  }
+}
+
+// Check that bind returns EADDRINUSE when out of local ephemeral ports.
+// We disable S/R because this test creates a large number of sockets.
+TEST_P(IPv4UDPUnboundSocketNogotsanTest, UDPBindPortExhaustion_NoRandomSave) {
+  auto receiver1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+  constexpr int kClients = 65536;
+  auto addr = V4Loopback();
+  // Disable cooperative S/R as we are making too many syscalls.
+  DisableSave ds;
+  std::vector<std::unique_ptr<FileDescriptor>> sockets;
+  for (int i = 0; i < kClients; i++) {
+    auto s = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
+    int ret =
+        bind(s->get(), reinterpret_cast<sockaddr*>(&addr.addr), addr.addr_len);
+    if (ret == 0) {
+      sockets.push_back(std::move(s));
+      continue;
+    }
+    ASSERT_THAT(ret, SyscallFailsWithErrno(EADDRINUSE));
+    break;
+  }
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    IPv4UDPSockets, IPv4UDPUnboundSocketNogotsanTest,
+    ::testing::ValuesIn(ApplyVec<SocketKind>(IPv4UDPUnboundSocket,
+                                             AllBitwiseCombinations(List<int>{
+                                                 0, SOCK_NONBLOCK}))));
+
+}  // namespace testing
+}  // namespace gvisor

From 8bf0bd8ab97958bb43bb0388f1f40965cf989207 Mon Sep 17 00:00:00 2001
From: gVisor bot <gvisor-bot@google.com>
Date: Fri, 21 Aug 2020 17:24:45 -0700
Subject: [PATCH 055/211] Provide fdReader/Writer for FileDescription

fdReader/Writer implements io.Reader/Writer so that they can be passed
to Merkle tree library.

PiperOrigin-RevId: 327901376
---
 pkg/sentry/vfs/file_description.go | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/pkg/sentry/vfs/file_description.go b/pkg/sentry/vfs/file_description.go
index d3abe28eed..33910e0950 100644
--- a/pkg/sentry/vfs/file_description.go
+++ b/pkg/sentry/vfs/file_description.go
@@ -847,3 +847,31 @@ func (fd *FileDescription) SetAsyncHandler(newHandler func() FileAsync) FileAsyn
 	}
 	return fd.asyncHandler
 }
+
+// FileReadWriteSeeker is a helper struct to pass a FileDescription as
+// io.Reader/io.Writer/io.ReadSeeker/etc.
+type FileReadWriteSeeker struct {
+	Fd    *FileDescription
+	Ctx   context.Context
+	ROpts ReadOptions
+	WOpts WriteOptions
+}
+
+// Read implements io.ReadWriteSeeker.Read.
+func (f *FileReadWriteSeeker) Read(p []byte) (int, error) {
+	dst := usermem.BytesIOSequence(p)
+	ret, err := f.Fd.Read(f.Ctx, dst, f.ROpts)
+	return int(ret), err
+}
+
+// Seek implements io.ReadWriteSeeker.Seek.
+func (f *FileReadWriteSeeker) Seek(offset int64, whence int) (int64, error) {
+	return f.Fd.Seek(f.Ctx, offset, int32(whence))
+}
+
+// Write implements io.ReadWriteSeeker.Write.
+func (f *FileReadWriteSeeker) Write(p []byte) (int, error) {
+	buf := usermem.BytesIOSequence(p)
+	ret, err := f.Fd.Write(f.Ctx, buf, f.WOpts)
+	return int(ret), err
+}

From b9aa0fd7dacc84bbaffbda41e3b40aa4e876b3c1 Mon Sep 17 00:00:00 2001
From: Ting-Yu Wang <anivia@google.com>
Date: Fri, 21 Aug 2020 17:32:19 -0700
Subject: [PATCH 056/211] stateify: Fix pretty print not printing odd numbered
 fields.

PiperOrigin-RevId: 327902182
---
 pkg/state/pretty/pretty.go | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pkg/state/pretty/pretty.go b/pkg/state/pretty/pretty.go
index cf37aaa498..1375fcc38d 100644
--- a/pkg/state/pretty/pretty.go
+++ b/pkg/state/pretty/pretty.go
@@ -148,7 +148,6 @@ func format(graph uint64, depth int, encoded wire.Object, html bool) (string, bo
 			element, ok := format(graph, depth+1, *x.Field(i), html)
 			allZero = allZero && !ok
 			items = append(items, fmt.Sprintf("\t%d: %s,", i, element))
-			i++
 		}
 		items = append(items, "}")
 		return strings.Join(items, tabs), !allZero

From 4459eb7bb42c1f920760d2ca5e147b81d04fdc00 Mon Sep 17 00:00:00 2001
From: Ayush Ranjan <ayushranjan@google.com>
Date: Fri, 21 Aug 2020 20:04:31 -0700
Subject: [PATCH 057/211] [vfs] Allow mountpoint to be an existing
 non-directory.

Unlike linux mount(2), OCI spec allows mounting on top of an existing
non-directory file.

PiperOrigin-RevId: 327914342
---
 pkg/sentry/vfs/mount.go | 26 ++++++++++++++++++++++++++
 runsc/boot/vfs.go       |  4 ++--
 2 files changed, 28 insertions(+), 2 deletions(-)

diff --git a/pkg/sentry/vfs/mount.go b/pkg/sentry/vfs/mount.go
index 09fea3628c..cd5456eefa 100644
--- a/pkg/sentry/vfs/mount.go
+++ b/pkg/sentry/vfs/mount.go
@@ -18,12 +18,14 @@ import (
 	"bytes"
 	"fmt"
 	"math"
+	"path"
 	"sort"
 	"strings"
 	"sync/atomic"
 
 	"gvisor.dev/gvisor/pkg/abi/linux"
 	"gvisor.dev/gvisor/pkg/context"
+	"gvisor.dev/gvisor/pkg/fspath"
 	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
 	"gvisor.dev/gvisor/pkg/syserror"
 )
@@ -888,6 +890,30 @@ func (vfs *VirtualFilesystem) GenerateProcMountInfo(ctx context.Context, taskRoo
 	}
 }
 
+// MakeSyntheticMountpoint creates parent directories of target if they do not
+// exist and attempts to create a directory for the mountpoint. If a
+// non-directory file already exists there then we allow it.
+func (vfs *VirtualFilesystem) MakeSyntheticMountpoint(ctx context.Context, target string, root VirtualDentry, creds *auth.Credentials) error {
+	mkdirOpts := &MkdirOptions{Mode: 0777, ForSyntheticMountpoint: true}
+
+	// Make sure the parent directory of target exists.
+	if err := vfs.MkdirAllAt(ctx, path.Dir(target), root, creds, mkdirOpts); err != nil {
+		return fmt.Errorf("failed to create parent directory of mountpoint %q: %w", target, err)
+	}
+
+	// Attempt to mkdir the final component. If a file (of any type) exists
+	// then we let allow mounting on top of that because we do not require the
+	// target to be an existing directory, unlike Linux mount(2).
+	if err := vfs.MkdirAt(ctx, creds, &PathOperation{
+		Root:  root,
+		Start: root,
+		Path:  fspath.Parse(target),
+	}, mkdirOpts); err != nil && err != syserror.EEXIST {
+		return fmt.Errorf("failed to create mountpoint %q: %w", target, err)
+	}
+	return nil
+}
+
 // manglePath replaces ' ', '\t', '\n', and '\\' with their octal equivalents.
 // See Linux fs/seq_file.c:mangle_path.
 func manglePath(p string) string {
diff --git a/runsc/boot/vfs.go b/runsc/boot/vfs.go
index fb200e9887..66b6cf19b9 100644
--- a/runsc/boot/vfs.go
+++ b/runsc/boot/vfs.go
@@ -292,7 +292,7 @@ func (c *containerMounter) mountSubmountVFS2(ctx context.Context, conf *config.C
 		return nil, nil
 	}
 
-	if err := c.k.VFS().MkdirAllAt(ctx, submount.Destination, root, creds, &vfs.MkdirOptions{Mode: 0777, ForSyntheticMountpoint: true}); err != nil {
+	if err := c.k.VFS().MakeSyntheticMountpoint(ctx, submount.Destination, root, creds); err != nil {
 		return nil, err
 	}
 	mnt, err := c.k.VFS().MountAt(ctx, creds, "", target, fsName, opts)
@@ -496,7 +496,7 @@ func (c *containerMounter) mountSharedSubmountVFS2(ctx context.Context, conf *co
 
 	root := mns.Root()
 	defer root.DecRef(ctx)
-	if err := c.k.VFS().MkdirAllAt(ctx, mount.Destination, root, creds, &vfs.MkdirOptions{Mode: 0777, ForSyntheticMountpoint: true}); err != nil {
+	if err := c.k.VFS().MakeSyntheticMountpoint(ctx, mount.Destination, root, creds); err != nil {
 		return nil, err
 	}
 

From 431d6f2aa82dd62c1498518d5c25515fc775178f Mon Sep 17 00:00:00 2001
From: Mithun Iyer <iyerm@google.com>
Date: Fri, 21 Aug 2020 22:47:06 -0700
Subject: [PATCH 058/211] Add syscall tests for SO_REUSEADDR.

Add tests for socket re-bind/listen of client and server sockets
with the older connection still in TIME_WAIT state and with
SO_REUSEADDR enabled.

PiperOrigin-RevId: 327924702
---
 test/syscalls/linux/socket_inet_loopback.cc | 244 ++++++++++----------
 1 file changed, 128 insertions(+), 116 deletions(-)

diff --git a/test/syscalls/linux/socket_inet_loopback.cc b/test/syscalls/linux/socket_inet_loopback.cc
index a62a10088a..bd30fb86b8 100644
--- a/test/syscalls/linux/socket_inet_loopback.cc
+++ b/test/syscalls/linux/socket_inet_loopback.cc
@@ -861,36 +861,38 @@ TEST_P(SocketInetLoopbackTest, TCPResetAfterClose) {
               SyscallSucceedsWithValue(0));
 }
 
-// This test is disabled under random save as the the restore run
-// results in the stack.Seed() being different which can cause
-// sequence number of final connect to be one that is considered
-// old and can cause the test to be flaky.
-TEST_P(SocketInetLoopbackTest, TCPPassiveCloseNoTimeWaitTest_NoRandomSave) {
-  auto const& param = GetParam();
-  TestAddress const& listener = param.listener;
-  TestAddress const& connector = param.connector;
-
+// setupTimeWaitClose sets up a socket endpoint in TIME_WAIT state.
+// Callers can choose to perform active close on either ends of the connection
+// and also specify if they want to enabled SO_REUSEADDR.
+void setupTimeWaitClose(const TestAddress* listener,
+                        const TestAddress* connector, bool reuse,
+                        bool accept_close, sockaddr_storage* listen_addr,
+                        sockaddr_storage* conn_bound_addr) {
   // Create the listening socket.
-  const FileDescriptor listen_fd = ASSERT_NO_ERRNO_AND_VALUE(
-      Socket(listener.family(), SOCK_STREAM, IPPROTO_TCP));
-  sockaddr_storage listen_addr = listener.addr;
-  ASSERT_THAT(bind(listen_fd.get(), reinterpret_cast<sockaddr*>(&listen_addr),
-                   listener.addr_len),
+  FileDescriptor listen_fd = ASSERT_NO_ERRNO_AND_VALUE(
+      Socket(listener->family(), SOCK_STREAM, IPPROTO_TCP));
+  if (reuse) {
+    ASSERT_THAT(setsockopt(listen_fd.get(), SOL_SOCKET, SO_REUSEADDR,
+                           &kSockOptOn, sizeof(kSockOptOn)),
+                SyscallSucceeds());
+  }
+  ASSERT_THAT(bind(listen_fd.get(), reinterpret_cast<sockaddr*>(listen_addr),
+                   listener->addr_len),
               SyscallSucceeds());
   ASSERT_THAT(listen(listen_fd.get(), SOMAXCONN), SyscallSucceeds());
 
   // Get the port bound by the listening socket.
-  socklen_t addrlen = listener.addr_len;
+  socklen_t addrlen = listener->addr_len;
   ASSERT_THAT(getsockname(listen_fd.get(),
-                          reinterpret_cast<sockaddr*>(&listen_addr), &addrlen),
+                          reinterpret_cast<sockaddr*>(listen_addr), &addrlen),
               SyscallSucceeds());
 
   uint16_t const port =
-      ASSERT_NO_ERRNO_AND_VALUE(AddrPort(listener.family(), listen_addr));
+      ASSERT_NO_ERRNO_AND_VALUE(AddrPort(listener->family(), *listen_addr));
 
   // Connect to the listening socket.
   FileDescriptor conn_fd = ASSERT_NO_ERRNO_AND_VALUE(
-      Socket(connector.family(), SOCK_STREAM, IPPROTO_TCP));
+      Socket(connector->family(), SOCK_STREAM, IPPROTO_TCP));
 
   // We disable saves after this point as a S/R causes the netstack seed
   // to be regenerated which changes what ports/ISN is picked for a given
@@ -901,11 +903,12 @@ TEST_P(SocketInetLoopbackTest, TCPPassiveCloseNoTimeWaitTest_NoRandomSave) {
   //
   // TODO(gvisor.dev/issue/940): S/R portSeed/portHint
   DisableSave ds;
-  sockaddr_storage conn_addr = connector.addr;
-  ASSERT_NO_ERRNO(SetAddrPort(connector.family(), &conn_addr, port));
+
+  sockaddr_storage conn_addr = connector->addr;
+  ASSERT_NO_ERRNO(SetAddrPort(connector->family(), &conn_addr, port));
   ASSERT_THAT(RetryEINTR(connect)(conn_fd.get(),
                                   reinterpret_cast<sockaddr*>(&conn_addr),
-                                  connector.addr_len),
+                                  connector->addr_len),
               SyscallSucceeds());
 
   // Accept the connection.
@@ -913,136 +916,145 @@ TEST_P(SocketInetLoopbackTest, TCPPassiveCloseNoTimeWaitTest_NoRandomSave) {
       ASSERT_NO_ERRNO_AND_VALUE(Accept(listen_fd.get(), nullptr, nullptr));
 
   // Get the address/port bound by the connecting socket.
-  sockaddr_storage conn_bound_addr;
-  socklen_t conn_addrlen = connector.addr_len;
+  socklen_t conn_addrlen = connector->addr_len;
   ASSERT_THAT(
-      getsockname(conn_fd.get(), reinterpret_cast<sockaddr*>(&conn_bound_addr),
+      getsockname(conn_fd.get(), reinterpret_cast<sockaddr*>(conn_bound_addr),
                   &conn_addrlen),
       SyscallSucceeds());
 
-  // shutdown the accept FD to trigger TIME_WAIT on the accepted socket which
-  // should cause the conn_fd to follow CLOSE_WAIT->LAST_ACK->CLOSED instead of
-  // TIME_WAIT.
-  ASSERT_THAT(shutdown(accepted.get(), SHUT_RDWR), SyscallSucceeds());
+  FileDescriptor active_closefd, passive_closefd;
+  if (accept_close) {
+    active_closefd = std::move(accepted);
+    passive_closefd = std::move(conn_fd);
+  } else {
+    active_closefd = std::move(conn_fd);
+    passive_closefd = std::move(accepted);
+  }
+
+  // shutdown to trigger TIME_WAIT.
+  ASSERT_THAT(shutdown(active_closefd.get(), SHUT_RDWR), SyscallSucceeds());
   {
     const int kTimeout = 10000;
     struct pollfd pfd = {
-        .fd = conn_fd.get(),
+        .fd = passive_closefd.get(),
         .events = POLLIN,
     };
     ASSERT_THAT(poll(&pfd, 1, kTimeout), SyscallSucceedsWithValue(1));
     ASSERT_EQ(pfd.revents, POLLIN);
   }
+  ScopedThread t([&]() {
+    constexpr int kTimeout = 10000;
+    constexpr int16_t want_events = POLLHUP;
+    struct pollfd pfd = {
+        .fd = active_closefd.get(),
+        .events = want_events,
+    };
+    ASSERT_THAT(poll(&pfd, 1, kTimeout), SyscallSucceedsWithValue(1));
+  });
 
-  conn_fd.reset();
-  // This sleep is required to give conn_fd time to transition to TIME-WAIT.
+  passive_closefd.reset();
+  t.Join();
+  active_closefd.reset();
+  // This sleep is needed to reduce flake to ensure that the passive-close
+  // ensures the state transitions to CLOSE from LAST_ACK.
   absl::SleepFor(absl::Seconds(1));
+}
 
-  // At this point conn_fd should be the one that moved to CLOSE_WAIT and
-  // eventually to CLOSED.
-
-  // Now bind and connect a new socket and verify that we can immediately
-  // rebind the address bound by the conn_fd as it never entered TIME_WAIT.
-  const FileDescriptor conn_fd2 = ASSERT_NO_ERRNO_AND_VALUE(
-      Socket(connector.family(), SOCK_STREAM, IPPROTO_TCP));
+// These tests are disabled under random save as the the restore run
+// results in the stack.Seed() being different which can cause
+// sequence number of final connect to be one that is considered
+// old and can cause the test to be flaky.
+//
+// Test re-binding of client and server bound addresses when the older
+// connection is in TIME_WAIT.
+TEST_P(SocketInetLoopbackTest, TCPPassiveCloseNoTimeWaitTest_NoRandomSave) {
+  auto const& param = GetParam();
+  sockaddr_storage listen_addr, conn_bound_addr;
+  listen_addr = param.listener.addr;
+  setupTimeWaitClose(&param.listener, &param.connector, false /*reuse*/,
+                     true /*accept_close*/, &listen_addr, &conn_bound_addr);
 
-  ASSERT_THAT(bind(conn_fd2.get(),
-                   reinterpret_cast<sockaddr*>(&conn_bound_addr), conn_addrlen),
-              SyscallSucceeds());
-  ASSERT_THAT(RetryEINTR(connect)(conn_fd2.get(),
-                                  reinterpret_cast<sockaddr*>(&conn_addr),
-                                  conn_addrlen),
+  // Now bind a new socket and verify that we can immediately rebind the address
+  // bound by the conn_fd as it never entered TIME_WAIT.
+  const FileDescriptor conn_fd = ASSERT_NO_ERRNO_AND_VALUE(
+      Socket(param.connector.family(), SOCK_STREAM, IPPROTO_TCP));
+  ASSERT_THAT(bind(conn_fd.get(), reinterpret_cast<sockaddr*>(&conn_bound_addr),
+                   param.connector.addr_len),
               SyscallSucceeds());
+
+  FileDescriptor listen_fd = ASSERT_NO_ERRNO_AND_VALUE(
+      Socket(param.listener.family(), SOCK_STREAM, IPPROTO_TCP));
+  ASSERT_THAT(bind(listen_fd.get(), reinterpret_cast<sockaddr*>(&listen_addr),
+                   param.listener.addr_len),
+              SyscallFailsWithErrno(EADDRINUSE));
 }
 
-TEST_P(SocketInetLoopbackTest, TCPActiveCloseTimeWaitTest_NoRandomSave) {
+TEST_P(SocketInetLoopbackTest,
+       TCPPassiveCloseNoTimeWaitReuseTest_NoRandomSave) {
   auto const& param = GetParam();
-  TestAddress const& listener = param.listener;
-  TestAddress const& connector = param.connector;
+  sockaddr_storage listen_addr, conn_bound_addr;
+  listen_addr = param.listener.addr;
+  setupTimeWaitClose(&param.listener, &param.connector, true /*reuse*/,
+                     true /*accept_close*/, &listen_addr, &conn_bound_addr);
 
-  // Create the listening socket.
-  const FileDescriptor listen_fd = ASSERT_NO_ERRNO_AND_VALUE(
-      Socket(listener.family(), SOCK_STREAM, IPPROTO_TCP));
-  sockaddr_storage listen_addr = listener.addr;
+  FileDescriptor listen_fd = ASSERT_NO_ERRNO_AND_VALUE(
+      Socket(param.listener.family(), SOCK_STREAM, IPPROTO_TCP));
+  ASSERT_THAT(setsockopt(listen_fd.get(), SOL_SOCKET, SO_REUSEADDR, &kSockOptOn,
+                         sizeof(kSockOptOn)),
+              SyscallSucceeds());
   ASSERT_THAT(bind(listen_fd.get(), reinterpret_cast<sockaddr*>(&listen_addr),
-                   listener.addr_len),
+                   param.listener.addr_len),
               SyscallSucceeds());
   ASSERT_THAT(listen(listen_fd.get(), SOMAXCONN), SyscallSucceeds());
 
-  // Get the port bound by the listening socket.
-  socklen_t addrlen = listener.addr_len;
-  ASSERT_THAT(getsockname(listen_fd.get(),
-                          reinterpret_cast<sockaddr*>(&listen_addr), &addrlen),
+  // Now bind and connect  new socket and verify that we can immediately rebind
+  // the address bound by the conn_fd as it never entered TIME_WAIT.
+  const FileDescriptor conn_fd = ASSERT_NO_ERRNO_AND_VALUE(
+      Socket(param.connector.family(), SOCK_STREAM, IPPROTO_TCP));
+  ASSERT_THAT(setsockopt(conn_fd.get(), SOL_SOCKET, SO_REUSEADDR, &kSockOptOn,
+                         sizeof(kSockOptOn)),
+              SyscallSucceeds());
+  ASSERT_THAT(bind(conn_fd.get(), reinterpret_cast<sockaddr*>(&conn_bound_addr),
+                   param.connector.addr_len),
               SyscallSucceeds());
 
   uint16_t const port =
-      ASSERT_NO_ERRNO_AND_VALUE(AddrPort(listener.family(), listen_addr));
-
-  // Connect to the listening socket.
-  FileDescriptor conn_fd = ASSERT_NO_ERRNO_AND_VALUE(
-      Socket(connector.family(), SOCK_STREAM, IPPROTO_TCP));
-
-  // We disable saves after this point as a S/R causes the netstack seed
-  // to be regenerated which changes what ports/ISN is picked for a given
-  // tuple (src ip,src port, dst ip, dst port). This can cause the final
-  // SYN to use a sequence number that looks like one from the current
-  // connection in TIME_WAIT and will not be accepted causing the test
-  // to timeout.
-  //
-  // TODO(gvisor.dev/issue/940): S/R portSeed/portHint
-  DisableSave ds;
-
-  sockaddr_storage conn_addr = connector.addr;
-  ASSERT_NO_ERRNO(SetAddrPort(connector.family(), &conn_addr, port));
+      ASSERT_NO_ERRNO_AND_VALUE(AddrPort(param.listener.family(), listen_addr));
+  sockaddr_storage conn_addr = param.connector.addr;
+  ASSERT_NO_ERRNO(SetAddrPort(param.connector.family(), &conn_addr, port));
   ASSERT_THAT(RetryEINTR(connect)(conn_fd.get(),
                                   reinterpret_cast<sockaddr*>(&conn_addr),
-                                  connector.addr_len),
+                                  param.connector.addr_len),
               SyscallSucceeds());
+}
 
-  // Accept the connection.
-  auto accepted =
-      ASSERT_NO_ERRNO_AND_VALUE(Accept(listen_fd.get(), nullptr, nullptr));
-
-  // Get the address/port bound by the connecting socket.
-  sockaddr_storage conn_bound_addr;
-  socklen_t conn_addrlen = connector.addr_len;
-  ASSERT_THAT(
-      getsockname(conn_fd.get(), reinterpret_cast<sockaddr*>(&conn_bound_addr),
-                  &conn_addrlen),
-      SyscallSucceeds());
-
-  // shutdown the conn FD to trigger TIME_WAIT on the connect socket.
-  ASSERT_THAT(shutdown(conn_fd.get(), SHUT_RDWR), SyscallSucceeds());
-  {
-    const int kTimeout = 10000;
-    struct pollfd pfd = {
-        .fd = accepted.get(),
-        .events = POLLIN,
-    };
-    ASSERT_THAT(poll(&pfd, 1, kTimeout), SyscallSucceedsWithValue(1));
-    ASSERT_EQ(pfd.revents, POLLIN);
-  }
-  ScopedThread t([&]() {
-    constexpr int kTimeout = 10000;
-    constexpr int16_t want_events = POLLHUP;
-    struct pollfd pfd = {
-        .fd = conn_fd.get(),
-        .events = want_events,
-    };
-    ASSERT_THAT(poll(&pfd, 1, kTimeout), SyscallSucceedsWithValue(1));
-  });
-
-  accepted.reset();
-  t.Join();
-  conn_fd.reset();
+TEST_P(SocketInetLoopbackTest, TCPActiveCloseTimeWaitTest_NoRandomSave) {
+  auto const& param = GetParam();
+  sockaddr_storage listen_addr, conn_bound_addr;
+  listen_addr = param.listener.addr;
+  setupTimeWaitClose(&param.listener, &param.connector, false /*reuse*/,
+                     false /*accept_close*/, &listen_addr, &conn_bound_addr);
+  FileDescriptor conn_fd = ASSERT_NO_ERRNO_AND_VALUE(
+      Socket(param.connector.family(), SOCK_STREAM, IPPROTO_TCP));
 
-  // Now bind and connect a new socket and verify that we can't immediately
-  // rebind the address bound by the conn_fd as it is in TIME_WAIT.
-  conn_fd = ASSERT_NO_ERRNO_AND_VALUE(
-      Socket(connector.family(), SOCK_STREAM, IPPROTO_TCP));
+  ASSERT_THAT(bind(conn_fd.get(), reinterpret_cast<sockaddr*>(&conn_bound_addr),
+                   param.connector.addr_len),
+              SyscallFailsWithErrno(EADDRINUSE));
+}
 
+TEST_P(SocketInetLoopbackTest, TCPActiveCloseTimeWaitReuseTest_NoRandomSave) {
+  auto const& param = GetParam();
+  sockaddr_storage listen_addr, conn_bound_addr;
+  listen_addr = param.listener.addr;
+  setupTimeWaitClose(&param.listener, &param.connector, true /*reuse*/,
+                     false /*accept_close*/, &listen_addr, &conn_bound_addr);
+  FileDescriptor conn_fd = ASSERT_NO_ERRNO_AND_VALUE(
+      Socket(param.connector.family(), SOCK_STREAM, IPPROTO_TCP));
+  ASSERT_THAT(setsockopt(conn_fd.get(), SOL_SOCKET, SO_REUSEADDR, &kSockOptOn,
+                         sizeof(kSockOptOn)),
+              SyscallSucceeds());
   ASSERT_THAT(bind(conn_fd.get(), reinterpret_cast<sockaddr*>(&conn_bound_addr),
-                   conn_addrlen),
+                   param.connector.addr_len),
               SyscallFailsWithErrno(EADDRINUSE));
 }
 

From 442af00e8cb678859e44a149a97885d102f94edb Mon Sep 17 00:00:00 2001
From: gVisor bot <gvisor-bot@google.com>
Date: Sat, 22 Aug 2020 09:53:11 -0700
Subject: [PATCH 059/211] Implement GetFilesystem for verity fs

verity GetFilesystem is implemented by mounting the underlying file
system, save the mount, and store both the underlying root dentry and
root Merkle file dentry in verity's root dentry.

PiperOrigin-RevId: 327959334
---
 pkg/sentry/fsimpl/verity/verity.go | 128 ++++++++++++++++++++++++++---
 1 file changed, 118 insertions(+), 10 deletions(-)

diff --git a/pkg/sentry/fsimpl/verity/verity.go b/pkg/sentry/fsimpl/verity/verity.go
index cb29d33a58..1c5b07aa5f 100644
--- a/pkg/sentry/fsimpl/verity/verity.go
+++ b/pkg/sentry/fsimpl/verity/verity.go
@@ -26,6 +26,7 @@ import (
 
 	"gvisor.dev/gvisor/pkg/abi/linux"
 	"gvisor.dev/gvisor/pkg/context"
+	"gvisor.dev/gvisor/pkg/fspath"
 	fslock "gvisor.dev/gvisor/pkg/sentry/fs/lock"
 	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
 	"gvisor.dev/gvisor/pkg/sentry/vfs"
@@ -36,10 +37,16 @@ import (
 // Name is the default filesystem name.
 const Name = "verity"
 
-// testOnlyDebugging allows verity file system to return error instead of
-// crashing the application when a malicious action is detected. This should
-// only be set for tests.
-var testOnlyDebugging bool
+// merklePrefix is the prefix of the Merkle tree files. For example, the Merkle
+// tree file for "/foo" is "/.merkle.verity.foo".
+const merklePrefix = ".merkle.verity."
+
+// noCrashOnVerificationFailure indicates whether the sandbox should panic
+// whenever verification fails. If true, an error is returned instead of
+// panicking. This should only be set for tests.
+// TOOD(b/165661693): Decide whether to panic or return error based on this
+// flag.
+var noCrashOnVerificationFailure bool
 
 // FilesystemType implements vfs.FilesystemType.
 type FilesystemType struct{}
@@ -93,10 +100,10 @@ type InternalFilesystemOptions struct {
 	// system wrapped by verity file system.
 	LowerGetFSOptions vfs.GetFilesystemOptions
 
-	// TestOnlyDebugging allows verity file system to return error instead
-	// of crashing the application when a malicious action is detected. This
-	// should only be set for tests.
-	TestOnlyDebugging bool
+	// NoCrashOnVerificationFailure indicates whether the sandbox should
+	// panic whenever verification fails. If true, an error is returned
+	// instead of panicking. This should only be set for tests.
+	NoCrashOnVerificationFailure bool
 }
 
 // Name implements vfs.FilesystemType.Name.
@@ -106,8 +113,109 @@ func (FilesystemType) Name() string {
 
 // GetFilesystem implements vfs.FilesystemType.GetFilesystem.
 func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *auth.Credentials, source string, opts vfs.GetFilesystemOptions) (*vfs.Filesystem, *vfs.Dentry, error) {
-	//TODO(b/159261227): Implement GetFilesystem.
-	return nil, nil, nil
+	iopts, ok := opts.InternalData.(InternalFilesystemOptions)
+	if !ok {
+		ctx.Warningf("verity.FilesystemType.GetFilesystem: missing verity configs")
+		return nil, nil, syserror.EINVAL
+	}
+	noCrashOnVerificationFailure = iopts.NoCrashOnVerificationFailure
+
+	// Mount the lower file system. The lower file system is wrapped inside
+	// verity, and should not be exposed or connected.
+	mopts := &vfs.MountOptions{
+		GetFilesystemOptions: iopts.LowerGetFSOptions,
+	}
+	mnt, err := vfsObj.MountDisconnected(ctx, creds, "", iopts.LowerName, mopts)
+	if err != nil {
+		return nil, nil, err
+	}
+
+	fs := &filesystem{
+		creds:              creds.Fork(),
+		lowerMount:         mnt,
+		allowRuntimeEnable: iopts.AllowRuntimeEnable,
+	}
+	fs.vfsfs.Init(vfsObj, &fstype, fs)
+
+	// Construct the root dentry.
+	d := fs.newDentry()
+	d.refs = 1
+	lowerVD := vfs.MakeVirtualDentry(mnt, mnt.Root())
+	lowerVD.IncRef()
+	d.lowerVD = lowerVD
+
+	rootMerkleName := merklePrefix + iopts.RootMerkleFileName
+
+	lowerMerkleVD, err := vfsObj.GetDentryAt(ctx, fs.creds, &vfs.PathOperation{
+		Root:  lowerVD,
+		Start: lowerVD,
+		Path:  fspath.Parse(rootMerkleName),
+	}, &vfs.GetDentryOptions{})
+
+	// If runtime enable is allowed, the root merkle tree may be absent. We
+	// should create the tree file.
+	if err == syserror.ENOENT && fs.allowRuntimeEnable {
+		lowerMerkleFD, err := vfsObj.OpenAt(ctx, fs.creds, &vfs.PathOperation{
+			Root:  lowerVD,
+			Start: lowerVD,
+			Path:  fspath.Parse(rootMerkleName),
+		}, &vfs.OpenOptions{
+			Flags: linux.O_RDWR | linux.O_CREAT,
+			Mode:  0644,
+		})
+		if err != nil {
+			fs.vfsfs.DecRef(ctx)
+			d.DecRef(ctx)
+			return nil, nil, err
+		}
+		lowerMerkleFD.DecRef(ctx)
+		lowerMerkleVD, err = vfsObj.GetDentryAt(ctx, fs.creds, &vfs.PathOperation{
+			Root:  lowerVD,
+			Start: lowerVD,
+			Path:  fspath.Parse(rootMerkleName),
+		}, &vfs.GetDentryOptions{})
+		if err != nil {
+			fs.vfsfs.DecRef(ctx)
+			d.DecRef(ctx)
+			return nil, nil, err
+		}
+	} else if err != nil {
+		// Failed to get dentry for the root Merkle file. This indicates
+		// an attack that removed/renamed the root Merkle file, or it's
+		// never generated.
+		if noCrashOnVerificationFailure {
+			fs.vfsfs.DecRef(ctx)
+			d.DecRef(ctx)
+			return nil, nil, err
+		}
+		panic("Failed to find root Merkle file")
+	}
+	d.lowerMerkleVD = lowerMerkleVD
+
+	// Get metadata from the underlying file system.
+	const statMask = linux.STATX_TYPE | linux.STATX_MODE | linux.STATX_UID | linux.STATX_GID
+	stat, err := vfsObj.StatAt(ctx, creds, &vfs.PathOperation{
+		Root:  lowerVD,
+		Start: lowerVD,
+	}, &vfs.StatOptions{
+		Mask: statMask,
+	})
+	if err != nil {
+		fs.vfsfs.DecRef(ctx)
+		d.DecRef(ctx)
+		return nil, nil, err
+	}
+
+	// TODO(b/162788573): Verify Metadata.
+	d.mode = uint32(stat.Mode)
+	d.uid = stat.UID
+	d.gid = stat.GID
+
+	d.rootHash = make([]byte, len(iopts.RootHash))
+	copy(d.rootHash, iopts.RootHash)
+	d.vfsd.Init(d)
+
+	return &fs.vfsfs, &d.vfsd, nil
 }
 
 // Release implements vfs.FilesystemImpl.Release.

From 194bcb6ca3c3a6ef59a471b5d46886b41575d1bc Mon Sep 17 00:00:00 2001
From: gVisor bot <gvisor-bot@google.com>
Date: Mon, 24 Aug 2020 09:49:46 -0700
Subject: [PATCH 060/211] Internal change.

PiperOrigin-RevId: 328157101
---
 test/packetimpact/dut/posix_server.cc | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/test/packetimpact/dut/posix_server.cc b/test/packetimpact/dut/posix_server.cc
index 29d4cc6fee..76ba701dad 100644
--- a/test/packetimpact/dut/posix_server.cc
+++ b/test/packetimpact/dut/posix_server.cc
@@ -28,6 +28,7 @@
 
 #include "include/grpcpp/security/server_credentials.h"
 #include "include/grpcpp/server_builder.h"
+#include "include/grpcpp/server_context.h"
 #include "test/packetimpact/proto/posix_server.grpc.pb.h"
 #include "test/packetimpact/proto/posix_server.pb.h"
 
@@ -108,7 +109,7 @@ ::grpc::Status proto_to_sockaddr(const posix_server::Sockaddr &sockaddr_proto,
 }
 
 class PosixImpl final : public posix_server::Posix::Service {
-  ::grpc::Status Accept(grpc_impl::ServerContext *context,
+  ::grpc::Status Accept(grpc::ServerContext *context,
                         const ::posix_server::AcceptRequest *request,
                         ::posix_server::AcceptResponse *response) override {
     sockaddr_storage addr;
@@ -119,7 +120,7 @@ class PosixImpl final : public posix_server::Posix::Service {
     return sockaddr_to_proto(addr, addrlen, response->mutable_addr());
   }
 
-  ::grpc::Status Bind(grpc_impl::ServerContext *context,
+  ::grpc::Status Bind(grpc::ServerContext *context,
                       const ::posix_server::BindRequest *request,
                       ::posix_server::BindResponse *response) override {
     if (!request->has_addr()) {
@@ -140,7 +141,7 @@ class PosixImpl final : public posix_server::Posix::Service {
     return ::grpc::Status::OK;
   }
 
-  ::grpc::Status Close(grpc_impl::ServerContext *context,
+  ::grpc::Status Close(grpc::ServerContext *context,
                        const ::posix_server::CloseRequest *request,
                        ::posix_server::CloseResponse *response) override {
     response->set_ret(close(request->fd()));
@@ -148,7 +149,7 @@ class PosixImpl final : public posix_server::Posix::Service {
     return ::grpc::Status::OK;
   }
 
-  ::grpc::Status Connect(grpc_impl::ServerContext *context,
+  ::grpc::Status Connect(grpc::ServerContext *context,
                          const ::posix_server::ConnectRequest *request,
                          ::posix_server::ConnectResponse *response) override {
     if (!request->has_addr()) {
@@ -168,7 +169,7 @@ class PosixImpl final : public posix_server::Posix::Service {
     return ::grpc::Status::OK;
   }
 
-  ::grpc::Status Fcntl(grpc_impl::ServerContext *context,
+  ::grpc::Status Fcntl(grpc::ServerContext *context,
                        const ::posix_server::FcntlRequest *request,
                        ::posix_server::FcntlResponse *response) override {
     response->set_ret(::fcntl(request->fd(), request->cmd(), request->arg()));
@@ -177,7 +178,7 @@ class PosixImpl final : public posix_server::Posix::Service {
   }
 
   ::grpc::Status GetSockName(
-      grpc_impl::ServerContext *context,
+      grpc::ServerContext *context,
       const ::posix_server::GetSockNameRequest *request,
       ::posix_server::GetSockNameResponse *response) override {
     sockaddr_storage addr;
@@ -189,7 +190,7 @@ class PosixImpl final : public posix_server::Posix::Service {
   }
 
   ::grpc::Status GetSockOpt(
-      grpc_impl::ServerContext *context,
+      grpc::ServerContext *context,
       const ::posix_server::GetSockOptRequest *request,
       ::posix_server::GetSockOptResponse *response) override {
     switch (request->type()) {
@@ -230,7 +231,7 @@ class PosixImpl final : public posix_server::Posix::Service {
     return ::grpc::Status::OK;
   }
 
-  ::grpc::Status Listen(grpc_impl::ServerContext *context,
+  ::grpc::Status Listen(grpc::ServerContext *context,
                         const ::posix_server::ListenRequest *request,
                         ::posix_server::ListenResponse *response) override {
     response->set_ret(listen(request->sockfd(), request->backlog()));
@@ -269,7 +270,7 @@ class PosixImpl final : public posix_server::Posix::Service {
   }
 
   ::grpc::Status SetSockOpt(
-      grpc_impl::ServerContext *context,
+      grpc::ServerContext *context,
       const ::posix_server::SetSockOptRequest *request,
       ::posix_server::SetSockOptResponse *response) override {
     switch (request->optval().val_case()) {
@@ -302,7 +303,7 @@ class PosixImpl final : public posix_server::Posix::Service {
     return ::grpc::Status::OK;
   }
 
-  ::grpc::Status Socket(grpc_impl::ServerContext *context,
+  ::grpc::Status Socket(grpc::ServerContext *context,
                         const ::posix_server::SocketRequest *request,
                         ::posix_server::SocketResponse *response) override {
     response->set_fd(

From 78c8c9e4c8a47818df14aac192a33a0f7b9f9006 Mon Sep 17 00:00:00 2001
From: Dean Deng <deandeng@google.com>
Date: Mon, 24 Aug 2020 11:28:28 -0700
Subject: [PATCH 061/211] Update inotify documentation for gofer filesystem.

We now allow hard links to be created within gofer fs (see
github.com/google/gvisor/commit/f20e63e31b56784c596897e86f03441f9d05f567).
Update the inotify documentation accordingly.

PiperOrigin-RevId: 328177485
---
 pkg/sentry/fsimpl/gofer/gofer.go     |  7 +++++++
 pkg/sentry/syscalls/linux/linux64.go | 14 +++++++-------
 pkg/sentry/vfs/g3doc/inotify.md      | 18 +++++++++---------
 3 files changed, 23 insertions(+), 16 deletions(-)

diff --git a/pkg/sentry/fsimpl/gofer/gofer.go b/pkg/sentry/fsimpl/gofer/gofer.go
index c6696b9d82..81d34cfe36 100644
--- a/pkg/sentry/fsimpl/gofer/gofer.go
+++ b/pkg/sentry/fsimpl/gofer/gofer.go
@@ -703,6 +703,13 @@ type dentry struct {
 	locks vfs.FileLocks
 
 	// Inotify watches for this dentry.
+	//
+	// Note that inotify may behave unexpectedly in the presence of hard links,
+	// because dentries corresponding to the same file have separate inotify
+	// watches when they should share the same set. This is the case because it is
+	// impossible for us to know for sure whether two dentries correspond to the
+	// same underlying file (see the gofer filesystem section fo vfs/inotify.md for
+	// a more in-depth discussion on this matter).
 	watches vfs.Watches
 }
 
diff --git a/pkg/sentry/syscalls/linux/linux64.go b/pkg/sentry/syscalls/linux/linux64.go
index 80c65164a0..da6bd85e1e 100644
--- a/pkg/sentry/syscalls/linux/linux64.go
+++ b/pkg/sentry/syscalls/linux/linux64.go
@@ -305,9 +305,9 @@ var AMD64 = &kernel.SyscallTable{
 		250: syscalls.Error("keyctl", syserror.EACCES, "Not available to user.", nil),
 		251: syscalls.CapError("ioprio_set", linux.CAP_SYS_ADMIN, "", nil), // requires cap_sys_nice or cap_sys_admin (depending)
 		252: syscalls.CapError("ioprio_get", linux.CAP_SYS_ADMIN, "", nil), // requires cap_sys_nice or cap_sys_admin (depending)
-		253: syscalls.PartiallySupported("inotify_init", InotifyInit, "inotify events are only available inside the sandbox.", nil),
-		254: syscalls.PartiallySupported("inotify_add_watch", InotifyAddWatch, "inotify events are only available inside the sandbox.", nil),
-		255: syscalls.PartiallySupported("inotify_rm_watch", InotifyRmWatch, "inotify events are only available inside the sandbox.", nil),
+		253: syscalls.PartiallySupported("inotify_init", InotifyInit, "Inotify events are only available inside the sandbox. Hard links are treated as different watch targets in gofer fs.", nil),
+		254: syscalls.PartiallySupported("inotify_add_watch", InotifyAddWatch, "Inotify events are only available inside the sandbox. Hard links are treated as different watch targets in gofer fs.", nil),
+		255: syscalls.PartiallySupported("inotify_rm_watch", InotifyRmWatch, "Inotify events are only available inside the sandbox. Hard links are treated as different watch targets in gofer fs.", nil),
 		256: syscalls.CapError("migrate_pages", linux.CAP_SYS_NICE, "", nil),
 		257: syscalls.Supported("openat", Openat),
 		258: syscalls.Supported("mkdirat", Mkdirat),
@@ -346,7 +346,7 @@ var AMD64 = &kernel.SyscallTable{
 		291: syscalls.Supported("epoll_create1", EpollCreate1),
 		292: syscalls.Supported("dup3", Dup3),
 		293: syscalls.Supported("pipe2", Pipe2),
-		294: syscalls.Supported("inotify_init1", InotifyInit1),
+		294: syscalls.PartiallySupported("inotify_init1", InotifyInit1, "Inotify events are only available inside the sandbox. Hard links are treated as different watch targets in gofer fs.", nil),
 		295: syscalls.Supported("preadv", Preadv),
 		296: syscalls.Supported("pwritev", Pwritev),
 		297: syscalls.Supported("rt_tgsigqueueinfo", RtTgsigqueueinfo),
@@ -454,9 +454,9 @@ var ARM64 = &kernel.SyscallTable{
 		23:  syscalls.Supported("dup", Dup),
 		24:  syscalls.Supported("dup3", Dup3),
 		25:  syscalls.PartiallySupported("fcntl", Fcntl, "Not all options are supported.", nil),
-		26:  syscalls.Supported("inotify_init1", InotifyInit1),
-		27:  syscalls.PartiallySupported("inotify_add_watch", InotifyAddWatch, "inotify events are only available inside the sandbox.", nil),
-		28:  syscalls.PartiallySupported("inotify_rm_watch", InotifyRmWatch, "inotify events are only available inside the sandbox.", nil),
+		26:  syscalls.PartiallySupported("inotify_init1", InotifyInit1, "Inotify events are only available inside the sandbox. Hard links are treated as different watch targets in gofer fs.", nil),
+		27:  syscalls.PartiallySupported("inotify_add_watch", InotifyAddWatch, "Inotify events are only available inside the sandbox. Hard links are treated as different watch targets in gofer fs.", nil),
+		28:  syscalls.PartiallySupported("inotify_rm_watch", InotifyRmWatch, "Inotify events are only available inside the sandbox. Hard links are treated as different watch targets in gofer fs.", nil),
 		29:  syscalls.PartiallySupported("ioctl", Ioctl, "Only a few ioctls are implemented for backing devices and file systems.", nil),
 		30:  syscalls.CapError("ioprio_set", linux.CAP_SYS_ADMIN, "", nil), // requires cap_sys_nice or cap_sys_admin (depending)
 		31:  syscalls.CapError("ioprio_get", linux.CAP_SYS_ADMIN, "", nil), // requires cap_sys_nice or cap_sys_admin (depending)
diff --git a/pkg/sentry/vfs/g3doc/inotify.md b/pkg/sentry/vfs/g3doc/inotify.md
index e7da49faab..833db213fd 100644
--- a/pkg/sentry/vfs/g3doc/inotify.md
+++ b/pkg/sentry/vfs/g3doc/inotify.md
@@ -28,9 +28,9 @@ The set of all watches held on a single file (i.e., the watch target) is stored
 in vfs.Watches. Each watch will belong to a different inotify instance (an
 instance can only have one watch on any watch target). The watches are stored in
 a map indexed by their vfs.Inotify owner’s id. Hard links and file descriptions
-to a single file will all share the same vfs.Watches. Activity on the target
-causes its vfs.Watches to generate notifications on its watches’ inotify
-instances.
+to a single file will all share the same vfs.Watches (with the exception of the
+gofer filesystem, described in a later section). Activity on the target causes
+its vfs.Watches to generate notifications on its watches’ inotify instances.
 
 ### vfs.Watch
 
@@ -103,12 +103,12 @@ inotify:
     unopened p9 file (and possibly an open FID), through which the Sentry
     interacts with the gofer.
     *   *Solution:* Because there is no inode structure stored in the sandbox,
-        inotify watches must be held on the dentry. This would be an issue in
-        the presence of hard links, where multiple dentries would need to share
-        the same set of watches, but in VFS2, we do not support the internal
-        creation of hard links on gofer fs. As a result, we make the assumption
-        that every dentry corresponds to a unique inode. However, the next point
-        raises an issue with this assumption:
+        inotify watches must be held on the dentry. For the purposes of inotify,
+        we assume that every dentry corresponds to a unique inode, which may
+        cause unexpected behavior in the presence of hard links, where multiple
+        dentries should share the same set of watches. Indeed, it is impossible
+        for us to be absolutely sure whether dentries correspond to the same
+        file or not, due to the following point:
 *   **The Sentry cannot always be aware of hard links on the remote
     filesystem.** There is no way for us to confirm whether two files on the
     remote filesystem are actually links to the same inode. QIDs and inodes are

From 901de6dc776c00bbdd60c12e800c6b10839b1466 Mon Sep 17 00:00:00 2001
From: Ghanan Gowripalan <ghanan@google.com>
Date: Mon, 24 Aug 2020 12:27:01 -0700
Subject: [PATCH 062/211] Consider loopback bound to all addresses in subnet

When a loopback interface is configurd with an address and associated
subnet, the loopback should treat all addresses in that subnet as an
address it owns.

This is mimicking linux behaviour as seen below:
```
$ ip addr show dev lo
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN group ...
link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
inet 127.0.0.1/8 scope host lo
valid_lft forever preferred_lft forever
inet6 ::1/128 scope host
valid_lft forever preferred_lft forever
$ ping 192.0.2.1
PING 192.0.2.1 (192.0.2.1) 56(84) bytes of data.
^C
--- 192.0.2.1 ping statistics ---
2 packets transmitted, 0 received, 100% packet loss, time 1018ms

$ ping 192.0.2.2
PING 192.0.2.2 (192.0.2.2) 56(84) bytes of data.
^C
--- 192.0.2.2 ping statistics ---
3 packets transmitted, 0 received, 100% packet loss, time 2039ms

$ sudo ip addr add 192.0.2.1/24 dev lo
$ ip addr show dev lo
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN group ...
link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
inet 127.0.0.1/8 scope host lo
valid_lft forever preferred_lft forever
inet 192.0.2.1/24 scope global lo
valid_lft forever preferred_lft forever
inet6 ::1/128 scope host
valid_lft forever preferred_lft forever
$ ping 192.0.2.1
PING 192.0.2.1 (192.0.2.1) 56(84) bytes of data.
64 bytes from 192.0.2.1: icmp_seq=1 ttl=64 time=0.131 ms
64 bytes from 192.0.2.1: icmp_seq=2 ttl=64 time=0.046 ms
64 bytes from 192.0.2.1: icmp_seq=3 ttl=64 time=0.048 ms
^C
--- 192.0.2.1 ping statistics ---
3 packets transmitted, 3 received, 0% packet loss, time 2042ms
rtt min/avg/max/mdev = 0.046/0.075/0.131/0.039 ms
$ ping 192.0.2.2
PING 192.0.2.2 (192.0.2.2) 56(84) bytes of data.
64 bytes from 192.0.2.2: icmp_seq=1 ttl=64 time=0.131 ms
64 bytes from 192.0.2.2: icmp_seq=2 ttl=64 time=0.069 ms
64 bytes from 192.0.2.2: icmp_seq=3 ttl=64 time=0.049 ms
64 bytes from 192.0.2.2: icmp_seq=4 ttl=64 time=0.035 ms
^C
--- 192.0.2.2 ping statistics ---
4 packets transmitted, 4 received, 0% packet loss, time 3049ms
rtt min/avg/max/mdev = 0.035/0.071/0.131/0.036 ms
```

Test: integration_test.TestLoopbackAcceptAllInSubnet
PiperOrigin-RevId: 328188546
---
 pkg/tcpip/stack/nic.go                        |  13 +-
 pkg/tcpip/tests/integration/BUILD             |   6 +-
 pkg/tcpip/tests/integration/loopback_test.go  | 229 ++++++++++++++++++
 .../integration/multicast_broadcast_test.go   |   2 +-
 test/syscalls/BUILD                           |   8 +
 test/syscalls/linux/BUILD                     |  83 +++++++
 .../socket_ip_udp_unbound_netlink_util.cc     |  58 +++++
 .../socket_ip_udp_unbound_netlink_util.h      |  34 +++
 ...ocket_ipv4_udp_unbound_loopback_netlink.cc |  32 +++
 .../linux/socket_ipv4_udp_unbound_netlink.cc  |  60 +++++
 .../linux/socket_ipv4_udp_unbound_netlink.h   |  29 +++
 ...ocket_ipv6_udp_unbound_loopback_netlink.cc |  32 +++
 .../linux/socket_ipv6_udp_unbound_netlink.cc  |  60 +++++
 .../linux/socket_ipv6_udp_unbound_netlink.h   |  29 +++
 14 files changed, 672 insertions(+), 3 deletions(-)
 create mode 100644 pkg/tcpip/tests/integration/loopback_test.go
 create mode 100644 test/syscalls/linux/socket_ip_udp_unbound_netlink_util.cc
 create mode 100644 test/syscalls/linux/socket_ip_udp_unbound_netlink_util.h
 create mode 100644 test/syscalls/linux/socket_ipv4_udp_unbound_loopback_netlink.cc
 create mode 100644 test/syscalls/linux/socket_ipv4_udp_unbound_netlink.cc
 create mode 100644 test/syscalls/linux/socket_ipv4_udp_unbound_netlink.h
 create mode 100644 test/syscalls/linux/socket_ipv6_udp_unbound_loopback_netlink.cc
 create mode 100644 test/syscalls/linux/socket_ipv6_udp_unbound_netlink.cc
 create mode 100644 test/syscalls/linux/socket_ipv6_udp_unbound_netlink.h

diff --git a/pkg/tcpip/stack/nic.go b/pkg/tcpip/stack/nic.go
index 7282927820..aff29f9cca 100644
--- a/pkg/tcpip/stack/nic.go
+++ b/pkg/tcpip/stack/nic.go
@@ -666,8 +666,19 @@ func (n *NIC) getRefOrCreateTemp(protocol tcpip.NetworkProtocolNumber, address t
 	}
 
 	// A usable reference was not found, create a temporary one if requested by
-	// the caller or if the address is found in the NIC's subnets.
+	// the caller or if the address is found in the NIC's subnets and the NIC is
+	// a loopback interface.
 	createTempEP := spoofingOrPromiscuous
+	if !createTempEP && n.isLoopback() {
+		for _, r := range n.mu.endpoints {
+			addr := r.addrWithPrefix()
+			subnet := addr.Subnet()
+			if subnet.Contains(address) {
+				createTempEP = true
+				break
+			}
+		}
+	}
 	n.mu.RUnlock()
 
 	if !createTempEP {
diff --git a/pkg/tcpip/tests/integration/BUILD b/pkg/tcpip/tests/integration/BUILD
index 6d52af98a6..06c7a3cd35 100644
--- a/pkg/tcpip/tests/integration/BUILD
+++ b/pkg/tcpip/tests/integration/BUILD
@@ -5,12 +5,16 @@ package(licenses = ["notice"])
 go_test(
     name = "integration_test",
     size = "small",
-    srcs = ["multicast_broadcast_test.go"],
+    srcs = [
+        "loopback_test.go",
+        "multicast_broadcast_test.go",
+    ],
     deps = [
         "//pkg/tcpip",
         "//pkg/tcpip/buffer",
         "//pkg/tcpip/header",
         "//pkg/tcpip/link/channel",
+        "//pkg/tcpip/link/loopback",
         "//pkg/tcpip/network/ipv4",
         "//pkg/tcpip/network/ipv6",
         "//pkg/tcpip/stack",
diff --git a/pkg/tcpip/tests/integration/loopback_test.go b/pkg/tcpip/tests/integration/loopback_test.go
new file mode 100644
index 0000000000..3a2f758377
--- /dev/null
+++ b/pkg/tcpip/tests/integration/loopback_test.go
@@ -0,0 +1,229 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package integration_test
+
+import (
+	"testing"
+
+	"github.com/google/go-cmp/cmp"
+	"gvisor.dev/gvisor/pkg/tcpip"
+	"gvisor.dev/gvisor/pkg/tcpip/buffer"
+	"gvisor.dev/gvisor/pkg/tcpip/header"
+	"gvisor.dev/gvisor/pkg/tcpip/link/loopback"
+	"gvisor.dev/gvisor/pkg/tcpip/network/ipv4"
+	"gvisor.dev/gvisor/pkg/tcpip/network/ipv6"
+	"gvisor.dev/gvisor/pkg/tcpip/stack"
+	"gvisor.dev/gvisor/pkg/tcpip/transport/udp"
+	"gvisor.dev/gvisor/pkg/waiter"
+)
+
+// TestLoopbackAcceptAllInSubnet tests that a loopback interface considers
+// itself bound to all addresses in the subnet of an assigned address.
+func TestLoopbackAcceptAllInSubnet(t *testing.T) {
+	const (
+		nicID     = 1
+		localPort = 80
+	)
+
+	data := []byte{1, 2, 3, 4}
+
+	ipv4ProtocolAddress := tcpip.ProtocolAddress{
+		Protocol:          header.IPv4ProtocolNumber,
+		AddressWithPrefix: ipv4Addr,
+	}
+	ipv4Bytes := []byte(ipv4Addr.Address)
+	ipv4Bytes[len(ipv4Bytes)-1]++
+	otherIPv4Address := tcpip.Address(ipv4Bytes)
+
+	ipv6ProtocolAddress := tcpip.ProtocolAddress{
+		Protocol:          header.IPv6ProtocolNumber,
+		AddressWithPrefix: ipv6Addr,
+	}
+	ipv6Bytes := []byte(ipv6Addr.Address)
+	ipv6Bytes[len(ipv6Bytes)-1]++
+	otherIPv6Address := tcpip.Address(ipv6Bytes)
+
+	tests := []struct {
+		name       string
+		addAddress tcpip.ProtocolAddress
+		bindAddr   tcpip.Address
+		dstAddr    tcpip.Address
+		expectRx   bool
+	}{
+		{
+			name:       "IPv4 bind to wildcard and send to assigned address",
+			addAddress: ipv4ProtocolAddress,
+			dstAddr:    ipv4Addr.Address,
+			expectRx:   true,
+		},
+		{
+			name:       "IPv4 bind to wildcard and send to other subnet-local address",
+			addAddress: ipv4ProtocolAddress,
+			dstAddr:    otherIPv4Address,
+			expectRx:   true,
+		},
+		{
+			name:       "IPv4 bind to wildcard send to other address",
+			addAddress: ipv4ProtocolAddress,
+			dstAddr:    remoteIPv4Addr,
+			expectRx:   false,
+		},
+		{
+			name:       "IPv4 bind to other subnet-local address and send to assigned address",
+			addAddress: ipv4ProtocolAddress,
+			bindAddr:   otherIPv4Address,
+			dstAddr:    ipv4Addr.Address,
+			expectRx:   false,
+		},
+		{
+			name:       "IPv4 bind and send to other subnet-local address",
+			addAddress: ipv4ProtocolAddress,
+			bindAddr:   otherIPv4Address,
+			dstAddr:    otherIPv4Address,
+			expectRx:   true,
+		},
+		{
+			name:       "IPv4 bind to assigned address and send to other subnet-local address",
+			addAddress: ipv4ProtocolAddress,
+			bindAddr:   ipv4Addr.Address,
+			dstAddr:    otherIPv4Address,
+			expectRx:   false,
+		},
+
+		{
+			name:       "IPv6 bind and send to assigned address",
+			addAddress: ipv6ProtocolAddress,
+			bindAddr:   ipv6Addr.Address,
+			dstAddr:    ipv6Addr.Address,
+			expectRx:   true,
+		},
+		{
+			name:       "IPv6 bind to wildcard and send to assigned address",
+			addAddress: ipv6ProtocolAddress,
+			dstAddr:    ipv6Addr.Address,
+			expectRx:   true,
+		},
+		{
+			name:       "IPv6 bind to wildcard and send to other subnet-local address",
+			addAddress: ipv6ProtocolAddress,
+			dstAddr:    otherIPv6Address,
+			expectRx:   true,
+		},
+		{
+			name:       "IPv6 bind to wildcard send to other address",
+			addAddress: ipv6ProtocolAddress,
+			dstAddr:    remoteIPv6Addr,
+			expectRx:   false,
+		},
+		{
+			name:       "IPv6 bind to other subnet-local address and send to assigned address",
+			addAddress: ipv6ProtocolAddress,
+			bindAddr:   otherIPv6Address,
+			dstAddr:    ipv6Addr.Address,
+			expectRx:   false,
+		},
+		{
+			name:       "IPv6 bind and send to other subnet-local address",
+			addAddress: ipv6ProtocolAddress,
+			bindAddr:   otherIPv6Address,
+			dstAddr:    otherIPv6Address,
+			expectRx:   true,
+		},
+		{
+			name:       "IPv6 bind to assigned address and send to other subnet-local address",
+			addAddress: ipv6ProtocolAddress,
+			bindAddr:   ipv6Addr.Address,
+			dstAddr:    otherIPv6Address,
+			expectRx:   false,
+		},
+		{
+			name:       "IPv6 bind and send to assigned address",
+			addAddress: ipv6ProtocolAddress,
+			bindAddr:   ipv6Addr.Address,
+			dstAddr:    ipv6Addr.Address,
+			expectRx:   true,
+		},
+	}
+
+	for _, test := range tests {
+		t.Run(test.name, func(t *testing.T) {
+			s := stack.New(stack.Options{
+				NetworkProtocols:   []stack.NetworkProtocol{ipv4.NewProtocol(), ipv6.NewProtocol()},
+				TransportProtocols: []stack.TransportProtocol{udp.NewProtocol()},
+			})
+			if err := s.CreateNIC(nicID, loopback.New()); err != nil {
+				t.Fatalf("CreateNIC(%d, _): %s", nicID, err)
+			}
+			if err := s.AddProtocolAddress(nicID, test.addAddress); err != nil {
+				t.Fatalf("AddProtocolAddress(%d, %+v): %s", nicID, test.addAddress, err)
+			}
+			s.SetRouteTable([]tcpip.Route{
+				tcpip.Route{
+					Destination: header.IPv4EmptySubnet,
+					NIC:         nicID,
+				},
+				tcpip.Route{
+					Destination: header.IPv6EmptySubnet,
+					NIC:         nicID,
+				},
+			})
+
+			wq := waiter.Queue{}
+			rep, err := s.NewEndpoint(udp.ProtocolNumber, test.addAddress.Protocol, &wq)
+			if err != nil {
+				t.Fatalf("NewEndpoint(%d, %d, _): %s", udp.ProtocolNumber, test.addAddress.Protocol, err)
+			}
+			defer rep.Close()
+
+			bindAddr := tcpip.FullAddress{Addr: test.bindAddr, Port: localPort}
+			if err := rep.Bind(bindAddr); err != nil {
+				t.Fatalf("rep.Bind(%+v): %s", bindAddr, err)
+			}
+
+			sep, err := s.NewEndpoint(udp.ProtocolNumber, test.addAddress.Protocol, &wq)
+			if err != nil {
+				t.Fatalf("NewEndpoint(%d, %d, _): %s", udp.ProtocolNumber, test.addAddress.Protocol, err)
+			}
+			defer sep.Close()
+
+			wopts := tcpip.WriteOptions{
+				To: &tcpip.FullAddress{
+					Addr: test.dstAddr,
+					Port: localPort,
+				},
+			}
+			n, _, err := sep.Write(tcpip.SlicePayload(data), wopts)
+			if err != nil {
+				t.Fatalf("sep.Write(_, _): %s", err)
+			}
+			if want := int64(len(data)); n != want {
+				t.Fatalf("got sep.Write(_, _) = (%d, _, nil), want = (%d, _, nil)", n, want)
+			}
+
+			if gotPayload, _, err := rep.Read(nil); test.expectRx {
+				if err != nil {
+					t.Fatalf("reep.Read(nil): %s", err)
+				}
+				if diff := cmp.Diff(buffer.View(data), gotPayload); diff != "" {
+					t.Errorf("got UDP payload mismatch (-want +got):\n%s", diff)
+				}
+			} else {
+				if err != tcpip.ErrWouldBlock {
+					t.Fatalf("got rep.Read(nil) = (%x, _, %s), want = (_, _, %s)", gotPayload, err, tcpip.ErrWouldBlock)
+				}
+			}
+		})
+	}
+}
diff --git a/pkg/tcpip/tests/integration/multicast_broadcast_test.go b/pkg/tcpip/tests/integration/multicast_broadcast_test.go
index 9f0dd4d6d3..52c27e0454 100644
--- a/pkg/tcpip/tests/integration/multicast_broadcast_test.go
+++ b/pkg/tcpip/tests/integration/multicast_broadcast_test.go
@@ -430,7 +430,7 @@ func TestIncomingMulticastAndBroadcast(t *testing.T) {
 				}
 			} else {
 				if err != tcpip.ErrWouldBlock {
-					t.Fatalf("got Read(nil) = (%x, _, %v), want = (_, _, %s)", gotPayload, err, tcpip.ErrWouldBlock)
+					t.Fatalf("got Read(nil) = (%x, _, %s), want = (_, _, %s)", gotPayload, err, tcpip.ErrWouldBlock)
 				}
 			}
 		})
diff --git a/test/syscalls/BUILD b/test/syscalls/BUILD
index ad53e92e5e..eea1401acf 100644
--- a/test/syscalls/BUILD
+++ b/test/syscalls/BUILD
@@ -674,6 +674,14 @@ syscall_test(
     test = "//test/syscalls/linux:socket_ipv4_udp_unbound_loopback_nogotsan_test",
 )
 
+syscall_test(
+    test = "//test/syscalls/linux:socket_ipv4_udp_unbound_loopback_netlink_test",
+)
+
+syscall_test(
+    test = "//test/syscalls/linux:socket_ipv6_udp_unbound_loopback_netlink_test",
+)
+
 syscall_test(
     test = "//test/syscalls/linux:socket_ip_unbound_test",
 )
diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD
index ecd2d8d2a7..ed0b6ecf4a 100644
--- a/test/syscalls/linux/BUILD
+++ b/test/syscalls/linux/BUILD
@@ -2402,6 +2402,57 @@ cc_library(
     alwayslink = 1,
 )
 
+cc_library(
+    name = "socket_ip_udp_unbound_netlink_test_utils",
+    testonly = 1,
+    srcs = [
+        "socket_ip_udp_unbound_netlink_util.cc",
+    ],
+    hdrs = [
+        "socket_ip_udp_unbound_netlink_util.h",
+    ],
+    deps = [
+        ":socket_test_util",
+    ],
+    alwayslink = 1,
+)
+
+cc_library(
+    name = "socket_ipv4_udp_unbound_netlink_test_cases",
+    testonly = 1,
+    srcs = [
+        "socket_ipv4_udp_unbound_netlink.cc",
+    ],
+    hdrs = [
+        "socket_ipv4_udp_unbound_netlink.h",
+    ],
+    deps = [
+        ":socket_ip_udp_unbound_netlink_test_utils",
+        ":socket_netlink_route_util",
+        "//test/util:capability_util",
+        gtest,
+    ],
+    alwayslink = 1,
+)
+
+cc_library(
+    name = "socket_ipv6_udp_unbound_netlink_test_cases",
+    testonly = 1,
+    srcs = [
+        "socket_ipv6_udp_unbound_netlink.cc",
+    ],
+    hdrs = [
+        "socket_ipv6_udp_unbound_netlink.h",
+    ],
+    deps = [
+        ":socket_ip_udp_unbound_netlink_test_utils",
+        ":socket_netlink_route_util",
+        "//test/util:capability_util",
+        gtest,
+    ],
+    alwayslink = 1,
+)
+
 cc_library(
     name = "socket_ipv4_udp_unbound_external_networking_test_cases",
     testonly = 1,
@@ -2755,6 +2806,38 @@ cc_binary(
     ],
 )
 
+cc_binary(
+    name = "socket_ipv4_udp_unbound_loopback_netlink_test",
+    testonly = 1,
+    srcs = [
+        "socket_ipv4_udp_unbound_loopback_netlink.cc",
+    ],
+    linkstatic = 1,
+    deps = [
+        ":ip_socket_test_util",
+        ":socket_ipv4_udp_unbound_netlink_test_cases",
+        ":socket_test_util",
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "socket_ipv6_udp_unbound_loopback_netlink_test",
+    testonly = 1,
+    srcs = [
+        "socket_ipv6_udp_unbound_loopback_netlink.cc",
+    ],
+    linkstatic = 1,
+    deps = [
+        ":ip_socket_test_util",
+        ":socket_ipv6_udp_unbound_netlink_test_cases",
+        ":socket_test_util",
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
 cc_binary(
     name = "socket_ip_unbound_test",
     testonly = 1,
diff --git a/test/syscalls/linux/socket_ip_udp_unbound_netlink_util.cc b/test/syscalls/linux/socket_ip_udp_unbound_netlink_util.cc
new file mode 100644
index 0000000000..13ffafde72
--- /dev/null
+++ b/test/syscalls/linux/socket_ip_udp_unbound_netlink_util.cc
@@ -0,0 +1,58 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "test/syscalls/linux/socket_ip_udp_unbound_netlink_util.h"
+
+namespace gvisor {
+namespace testing {
+
+const size_t kSendBufSize = 200;
+
+void IPUDPUnboundSocketNetlinkTest::TestSendRecv(TestAddress sender_addr,
+                                                 TestAddress receiver_addr) {
+  auto snd_sock = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+  auto rcv_sock = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
+  EXPECT_THAT(
+      bind(snd_sock->get(), reinterpret_cast<sockaddr*>(&sender_addr.addr),
+           sender_addr.addr_len),
+      SyscallSucceeds());
+
+  EXPECT_THAT(
+      bind(rcv_sock->get(), reinterpret_cast<sockaddr*>(&receiver_addr.addr),
+           receiver_addr.addr_len),
+      SyscallSucceeds());
+  socklen_t receiver_addr_len = receiver_addr.addr_len;
+  ASSERT_THAT(getsockname(rcv_sock->get(),
+                          reinterpret_cast<sockaddr*>(&receiver_addr.addr),
+                          &receiver_addr_len),
+              SyscallSucceeds());
+  EXPECT_EQ(receiver_addr_len, receiver_addr.addr_len);
+  char send_buf[kSendBufSize];
+  RandomizeBuffer(send_buf, kSendBufSize);
+  EXPECT_THAT(
+      RetryEINTR(sendto)(snd_sock->get(), send_buf, kSendBufSize, 0,
+                         reinterpret_cast<sockaddr*>(&receiver_addr.addr),
+                         receiver_addr.addr_len),
+      SyscallSucceedsWithValue(kSendBufSize));
+
+  // Check that we received the packet.
+  char recv_buf[kSendBufSize] = {};
+  ASSERT_THAT(RetryEINTR(recv)(rcv_sock->get(), recv_buf, kSendBufSize, 0),
+              SyscallSucceedsWithValue(kSendBufSize));
+  EXPECT_EQ(0, memcmp(send_buf, recv_buf, kSendBufSize));
+}
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/socket_ip_udp_unbound_netlink_util.h b/test/syscalls/linux/socket_ip_udp_unbound_netlink_util.h
new file mode 100644
index 0000000000..157fb0939b
--- /dev/null
+++ b/test/syscalls/linux/socket_ip_udp_unbound_netlink_util.h
@@ -0,0 +1,34 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef GVISOR_TEST_SYSCALLS_LINUX_SOCKET_IP_UDP_UNBOUND_NETLINK_UTIL_H_
+#define GVISOR_TEST_SYSCALLS_LINUX_SOCKET_IP_UDP_UNBOUND_NETLINK_UTIL_H_
+
+#include "test/syscalls/linux/socket_test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+// Test fixture for tests that apply to IP UDP sockets.
+class IPUDPUnboundSocketNetlinkTest : public SimpleSocketTest {
+ public:
+  // TestSendRecv tests sending and receiving a UDP packet from |sender_addr| to
+  // |receiver_addr|.
+  void TestSendRecv(TestAddress sender_addr, TestAddress receiver_addr);
+};
+
+}  // namespace testing
+}  // namespace gvisor
+
+#endif  // GVISOR_TEST_SYSCALLS_LINUX_SOCKET_IP_UDP_UNBOUND_NETLINK_UTIL_H_
diff --git a/test/syscalls/linux/socket_ipv4_udp_unbound_loopback_netlink.cc b/test/syscalls/linux/socket_ipv4_udp_unbound_loopback_netlink.cc
new file mode 100644
index 0000000000..8052bf4046
--- /dev/null
+++ b/test/syscalls/linux/socket_ipv4_udp_unbound_loopback_netlink.cc
@@ -0,0 +1,32 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <vector>
+
+#include "test/syscalls/linux/ip_socket_test_util.h"
+#include "test/syscalls/linux/socket_ipv4_udp_unbound_netlink.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+INSTANTIATE_TEST_SUITE_P(
+    IPv4UDPSockets, IPv4UDPUnboundSocketNetlinkTest,
+    ::testing::ValuesIn(ApplyVec<SocketKind>(IPv4UDPUnboundSocket,
+                                             AllBitwiseCombinations(List<int>{
+                                                 0, SOCK_NONBLOCK}))));
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/socket_ipv4_udp_unbound_netlink.cc b/test/syscalls/linux/socket_ipv4_udp_unbound_netlink.cc
new file mode 100644
index 0000000000..696fbb1898
--- /dev/null
+++ b/test/syscalls/linux/socket_ipv4_udp_unbound_netlink.cc
@@ -0,0 +1,60 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "test/syscalls/linux/socket_ipv4_udp_unbound_netlink.h"
+
+#include <arpa/inet.h>
+
+#include "gtest/gtest.h"
+#include "test/syscalls/linux/socket_netlink_route_util.h"
+#include "test/util/capability_util.h"
+
+namespace gvisor {
+namespace testing {
+
+// Checks that the loopback interface considers itself bound to all IPs in an
+// associated subnet.
+TEST_P(IPv4UDPUnboundSocketNetlinkTest, JoinSubnet) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_ADMIN)));
+
+  // Add an IP address to the loopback interface.
+  Link loopback_link = ASSERT_NO_ERRNO_AND_VALUE(LoopbackLink());
+  struct in_addr addr;
+  EXPECT_EQ(1, inet_pton(AF_INET, "192.0.2.1", &addr));
+  EXPECT_NO_ERRNO(LinkAddLocalAddr(loopback_link.index, AF_INET,
+                                   /*prefixlen=*/24, &addr, sizeof(addr)));
+
+  // Send from an unassigned address but an address that is in the subnet
+  // associated with the loopback interface.
+  TestAddress sender_addr("V4NotAssignd1");
+  sender_addr.addr.ss_family = AF_INET;
+  sender_addr.addr_len = sizeof(sockaddr_in);
+  EXPECT_EQ(1, inet_pton(AF_INET, "192.0.2.2",
+                         &(reinterpret_cast<sockaddr_in*>(&sender_addr.addr)
+                               ->sin_addr.s_addr)));
+
+  // Send the packet to an unassigned address but an address that is in the
+  // subnet associated with the loopback interface.
+  TestAddress receiver_addr("V4NotAssigned2");
+  receiver_addr.addr.ss_family = AF_INET;
+  receiver_addr.addr_len = sizeof(sockaddr_in);
+  EXPECT_EQ(1, inet_pton(AF_INET, "192.0.2.254",
+                         &(reinterpret_cast<sockaddr_in*>(&receiver_addr.addr)
+                               ->sin_addr.s_addr)));
+
+  TestSendRecv(sender_addr, receiver_addr);
+}
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/socket_ipv4_udp_unbound_netlink.h b/test/syscalls/linux/socket_ipv4_udp_unbound_netlink.h
new file mode 100644
index 0000000000..fcfb3318e2
--- /dev/null
+++ b/test/syscalls/linux/socket_ipv4_udp_unbound_netlink.h
@@ -0,0 +1,29 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef GVISOR_TEST_SYSCALLS_LINUX_SOCKET_IPV4_UDP_UNBOUND_NETLINK_UTIL_H_
+#define GVISOR_TEST_SYSCALLS_LINUX_SOCKET_IPV4_UDP_UNBOUND_NETLINK_UTIL_H_
+
+#include "test/syscalls/linux/socket_ip_udp_unbound_netlink_util.h"
+
+namespace gvisor {
+namespace testing {
+
+// Test fixture for tests that apply to IPv4 UDP sockets.
+using IPv4UDPUnboundSocketNetlinkTest = IPUDPUnboundSocketNetlinkTest;
+
+}  // namespace testing
+}  // namespace gvisor
+
+#endif  // GVISOR_TEST_SYSCALLS_LINUX_SOCKET_IPV4_UDP_UNBOUND_NETLINK_UTIL_H_
diff --git a/test/syscalls/linux/socket_ipv6_udp_unbound_loopback_netlink.cc b/test/syscalls/linux/socket_ipv6_udp_unbound_loopback_netlink.cc
new file mode 100644
index 0000000000..17021ff82c
--- /dev/null
+++ b/test/syscalls/linux/socket_ipv6_udp_unbound_loopback_netlink.cc
@@ -0,0 +1,32 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <vector>
+
+#include "test/syscalls/linux/ip_socket_test_util.h"
+#include "test/syscalls/linux/socket_ipv6_udp_unbound_netlink.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+INSTANTIATE_TEST_SUITE_P(
+    IPv6UDPSockets, IPv6UDPUnboundSocketNetlinkTest,
+    ::testing::ValuesIn(ApplyVec<SocketKind>(IPv6UDPUnboundSocket,
+                                             AllBitwiseCombinations(List<int>{
+                                                 0, SOCK_NONBLOCK}))));
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/socket_ipv6_udp_unbound_netlink.cc b/test/syscalls/linux/socket_ipv6_udp_unbound_netlink.cc
new file mode 100644
index 0000000000..6275b5aed3
--- /dev/null
+++ b/test/syscalls/linux/socket_ipv6_udp_unbound_netlink.cc
@@ -0,0 +1,60 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "test/syscalls/linux/socket_ipv6_udp_unbound_netlink.h"
+
+#include <arpa/inet.h>
+
+#include "gtest/gtest.h"
+#include "test/syscalls/linux/socket_netlink_route_util.h"
+#include "test/util/capability_util.h"
+
+namespace gvisor {
+namespace testing {
+
+// Checks that the loopback interface considers itself bound to all IPs in an
+// associated subnet.
+TEST_P(IPv6UDPUnboundSocketNetlinkTest, JoinSubnet) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_ADMIN)));
+
+  // Add an IP address to the loopback interface.
+  Link loopback_link = ASSERT_NO_ERRNO_AND_VALUE(LoopbackLink());
+  struct in6_addr addr;
+  EXPECT_EQ(1, inet_pton(AF_INET6, "2001:db8::1", &addr));
+  EXPECT_NO_ERRNO(LinkAddLocalAddr(loopback_link.index, AF_INET6,
+                                   /*prefixlen=*/64, &addr, sizeof(addr)));
+
+  // Send from an unassigned address but an address that is in the subnet
+  // associated with the loopback interface.
+  TestAddress sender_addr("V6NotAssignd1");
+  sender_addr.addr.ss_family = AF_INET6;
+  sender_addr.addr_len = sizeof(sockaddr_in6);
+  EXPECT_EQ(1, inet_pton(AF_INET6, "2001:db8::2",
+                         reinterpret_cast<sockaddr_in6*>(&sender_addr.addr)
+                             ->sin6_addr.s6_addr));
+
+  // Send the packet to an unassigned address but an address that is in the
+  // subnet associated with the loopback interface.
+  TestAddress receiver_addr("V6NotAssigned2");
+  receiver_addr.addr.ss_family = AF_INET6;
+  receiver_addr.addr_len = sizeof(sockaddr_in6);
+  EXPECT_EQ(1, inet_pton(AF_INET6, "2001:db8::ffff:ffff:ffff:ffff",
+                         reinterpret_cast<sockaddr_in6*>(&receiver_addr.addr)
+                             ->sin6_addr.s6_addr));
+
+  TestSendRecv(sender_addr, receiver_addr);
+}
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/socket_ipv6_udp_unbound_netlink.h b/test/syscalls/linux/socket_ipv6_udp_unbound_netlink.h
new file mode 100644
index 0000000000..6a2b0a5be4
--- /dev/null
+++ b/test/syscalls/linux/socket_ipv6_udp_unbound_netlink.h
@@ -0,0 +1,29 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef GVISOR_TEST_SYSCALLS_LINUX_SOCKET_IPV6_UDP_UNBOUND_NETLINK_UTIL_H_
+#define GVISOR_TEST_SYSCALLS_LINUX_SOCKET_IPV6_UDP_UNBOUND_NETLINK_UTIL_H_
+
+#include "test/syscalls/linux/socket_ip_udp_unbound_netlink_util.h"
+
+namespace gvisor {
+namespace testing {
+
+// Test fixture for tests that apply to IPv6 UDP sockets.
+using IPv6UDPUnboundSocketNetlinkTest = IPUDPUnboundSocketNetlinkTest;
+
+}  // namespace testing
+}  // namespace gvisor
+
+#endif  // GVISOR_TEST_SYSCALLS_LINUX_SOCKET_IPV6_UDP_UNBOUND_NETLINK_UTIL_H_

From 7eb284eca20b46570c3bd4e9a49113ac5165afbd Mon Sep 17 00:00:00 2001
From: Michael Pratt <mpratt@google.com>
Date: Mon, 24 Aug 2020 12:56:58 -0700
Subject: [PATCH 063/211] Bump build constraints to 1.17

This enables pre-release testing with 1.16. The intention is to replace these
with a nogo check before the next release.

PiperOrigin-RevId: 328193911
---
 pkg/procid/procid_amd64.s                           | 2 +-
 pkg/procid/procid_arm64.s                           | 2 +-
 pkg/sentry/platform/kvm/bluepill_unsafe.go          | 2 +-
 pkg/sentry/platform/kvm/machine_unsafe.go           | 2 +-
 pkg/sentry/platform/ptrace/subprocess_unsafe.go     | 2 +-
 pkg/sentry/vfs/mount_unsafe.go                      | 2 +-
 pkg/sleep/sleep_unsafe.go                           | 2 +-
 pkg/sync/memmove_unsafe.go                          | 2 +-
 pkg/sync/mutex_unsafe.go                            | 2 +-
 pkg/sync/rwmutex_unsafe.go                          | 2 +-
 pkg/syncevent/waiter_unsafe.go                      | 2 +-
 pkg/tcpip/link/rawfile/blockingpoll_yield_unsafe.go | 2 +-
 pkg/tcpip/time_unsafe.go                            | 2 +-
 13 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/pkg/procid/procid_amd64.s b/pkg/procid/procid_amd64.s
index 7c622e5d77..a45920040b 100644
--- a/pkg/procid/procid_amd64.s
+++ b/pkg/procid/procid_amd64.s
@@ -14,7 +14,7 @@
 
 // +build amd64
 // +build go1.8
-// +build !go1.16
+// +build !go1.17
 
 #include "textflag.h"
 
diff --git a/pkg/procid/procid_arm64.s b/pkg/procid/procid_arm64.s
index 48ebb5fd16..9d3b0666d2 100644
--- a/pkg/procid/procid_arm64.s
+++ b/pkg/procid/procid_arm64.s
@@ -14,7 +14,7 @@
 
 // +build arm64
 // +build go1.8
-// +build !go1.16
+// +build !go1.17
 
 #include "textflag.h"
 
diff --git a/pkg/sentry/platform/kvm/bluepill_unsafe.go b/pkg/sentry/platform/kvm/bluepill_unsafe.go
index bf357de1ac..979be5d892 100644
--- a/pkg/sentry/platform/kvm/bluepill_unsafe.go
+++ b/pkg/sentry/platform/kvm/bluepill_unsafe.go
@@ -13,7 +13,7 @@
 // limitations under the License.
 
 // +build go1.12
-// +build !go1.16
+// +build !go1.17
 
 // Check go:linkname function signatures when updating Go version.
 
diff --git a/pkg/sentry/platform/kvm/machine_unsafe.go b/pkg/sentry/platform/kvm/machine_unsafe.go
index 9f86f6a7a1..607c82156f 100644
--- a/pkg/sentry/platform/kvm/machine_unsafe.go
+++ b/pkg/sentry/platform/kvm/machine_unsafe.go
@@ -13,7 +13,7 @@
 // limitations under the License.
 
 // +build go1.12
-// +build !go1.16
+// +build !go1.17
 
 // Check go:linkname function signatures when updating Go version.
 
diff --git a/pkg/sentry/platform/ptrace/subprocess_unsafe.go b/pkg/sentry/platform/ptrace/subprocess_unsafe.go
index 0bee995e45..7ee20d89ae 100644
--- a/pkg/sentry/platform/ptrace/subprocess_unsafe.go
+++ b/pkg/sentry/platform/ptrace/subprocess_unsafe.go
@@ -13,7 +13,7 @@
 // limitations under the License.
 
 // +build go1.12
-// +build !go1.16
+// +build !go1.17
 
 // Check go:linkname function signatures when updating Go version.
 
diff --git a/pkg/sentry/vfs/mount_unsafe.go b/pkg/sentry/vfs/mount_unsafe.go
index 777d631cb9..da2a2e9c4e 100644
--- a/pkg/sentry/vfs/mount_unsafe.go
+++ b/pkg/sentry/vfs/mount_unsafe.go
@@ -13,7 +13,7 @@
 // limitations under the License.
 
 // +build go1.12
-// +build !go1.16
+// +build !go1.17
 
 // Check go:linkname function signatures when updating Go version.
 
diff --git a/pkg/sleep/sleep_unsafe.go b/pkg/sleep/sleep_unsafe.go
index 118805492b..19bce2afb0 100644
--- a/pkg/sleep/sleep_unsafe.go
+++ b/pkg/sleep/sleep_unsafe.go
@@ -13,7 +13,7 @@
 // limitations under the License.
 
 // +build go1.11
-// +build !go1.16
+// +build !go1.17
 
 // Check go:linkname function signatures when updating Go version.
 
diff --git a/pkg/sync/memmove_unsafe.go b/pkg/sync/memmove_unsafe.go
index 1d77806950..f5e6300098 100644
--- a/pkg/sync/memmove_unsafe.go
+++ b/pkg/sync/memmove_unsafe.go
@@ -4,7 +4,7 @@
 // license that can be found in the LICENSE file.
 
 // +build go1.12
-// +build !go1.16
+// +build !go1.17
 
 // Check go:linkname function signatures when updating Go version.
 
diff --git a/pkg/sync/mutex_unsafe.go b/pkg/sync/mutex_unsafe.go
index dc034d5612..f4c2e96420 100644
--- a/pkg/sync/mutex_unsafe.go
+++ b/pkg/sync/mutex_unsafe.go
@@ -4,7 +4,7 @@
 // license that can be found in the LICENSE file.
 
 // +build go1.13
-// +build !go1.16
+// +build !go1.17
 
 // When updating the build constraint (above), check that syncMutex matches the
 // standard library sync.Mutex definition.
diff --git a/pkg/sync/rwmutex_unsafe.go b/pkg/sync/rwmutex_unsafe.go
index 995c0346e6..b3b4dee78d 100644
--- a/pkg/sync/rwmutex_unsafe.go
+++ b/pkg/sync/rwmutex_unsafe.go
@@ -4,7 +4,7 @@
 // license that can be found in the LICENSE file.
 
 // +build go1.13
-// +build !go1.16
+// +build !go1.17
 
 // Check go:linkname function signatures when updating Go version.
 
diff --git a/pkg/syncevent/waiter_unsafe.go b/pkg/syncevent/waiter_unsafe.go
index ad271e1a07..518f18479a 100644
--- a/pkg/syncevent/waiter_unsafe.go
+++ b/pkg/syncevent/waiter_unsafe.go
@@ -13,7 +13,7 @@
 // limitations under the License.
 
 // +build go1.11
-// +build !go1.16
+// +build !go1.17
 
 // Check go:linkname function signatures when updating Go version.
 
diff --git a/pkg/tcpip/link/rawfile/blockingpoll_yield_unsafe.go b/pkg/tcpip/link/rawfile/blockingpoll_yield_unsafe.go
index 99313ee25e..5db4bf12be 100644
--- a/pkg/tcpip/link/rawfile/blockingpoll_yield_unsafe.go
+++ b/pkg/tcpip/link/rawfile/blockingpoll_yield_unsafe.go
@@ -14,7 +14,7 @@
 
 // +build linux,amd64 linux,arm64
 // +build go1.12
-// +build !go1.16
+// +build !go1.17
 
 // Check go:linkname function signatures when updating Go version.
 
diff --git a/pkg/tcpip/time_unsafe.go b/pkg/tcpip/time_unsafe.go
index f32d580914..6063635676 100644
--- a/pkg/tcpip/time_unsafe.go
+++ b/pkg/tcpip/time_unsafe.go
@@ -13,7 +13,7 @@
 // limitations under the License.
 
 // +build go1.9
-// +build !go1.16
+// +build !go1.17
 
 // Check go:linkname function signatures when updating Go version.
 

From 13d63f13f3b28e35c182c674c904520d7bd577db Mon Sep 17 00:00:00 2001
From: Zach Koopmans <zkoopmans@google.com>
Date: Mon, 24 Aug 2020 13:50:56 -0700
Subject: [PATCH 064/211] Remove go profiling flag from dockerutil.

Go profiling was removed from runsc debug in a previous change.

PiperOrigin-RevId: 328203826
---
 pkg/test/dockerutil/dockerutil.go | 1 -
 pkg/test/dockerutil/profile.go    | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/pkg/test/dockerutil/dockerutil.go b/pkg/test/dockerutil/dockerutil.go
index 952871f956..7027df1a5d 100644
--- a/pkg/test/dockerutil/dockerutil.go
+++ b/pkg/test/dockerutil/dockerutil.go
@@ -60,7 +60,6 @@ var (
 	// enabled for each run.
 	pprofBlock = flag.Bool("pprof-block", false, "enables block profiling with runsc debug")
 	pprofCPU   = flag.Bool("pprof-cpu", false, "enables CPU profiling with runsc debug")
-	pprofGo    = flag.Bool("pprof-go", false, "enables goroutine profiling with runsc debug")
 	pprofHeap  = flag.Bool("pprof-heap", false, "enables heap profiling with runsc debug")
 	pprofMutex = flag.Bool("pprof-mutex", false, "enables mutex profiling with runsc debug")
 )
diff --git a/pkg/test/dockerutil/profile.go b/pkg/test/dockerutil/profile.go
index f0396ef248..55f9496cde 100644
--- a/pkg/test/dockerutil/profile.go
+++ b/pkg/test/dockerutil/profile.go
@@ -63,7 +63,7 @@ type Pprof struct {
 
 // MakePprofFromFlags makes a Pprof profile from flags.
 func MakePprofFromFlags(c *Container) *Pprof {
-	if !(*pprofBlock || *pprofCPU || *pprofGo || *pprofHeap || *pprofMutex) {
+	if !(*pprofBlock || *pprofCPU || *pprofHeap || *pprofMutex) {
 		return nil
 	}
 	return &Pprof{

From 1ea284305f0aea9452dc590023b271f66a46e0b5 Mon Sep 17 00:00:00 2001
From: gVisor bot <gvisor-bot@google.com>
Date: Mon, 24 Aug 2020 16:32:26 -0700
Subject: [PATCH 065/211] Add check for same source in merkle tree lib

If the data is in the same Reader as the merkle tree, we should verify
from the first layer in the tree, instead of from the beginning.

PiperOrigin-RevId: 328230988
---
 pkg/merkletree/merkletree.go      |  61 +++++++-
 pkg/merkletree/merkletree_test.go | 251 ++++++++++++++++++------------
 2 files changed, 205 insertions(+), 107 deletions(-)

diff --git a/pkg/merkletree/merkletree.go b/pkg/merkletree/merkletree.go
index 955c9c473b..1a0477c6a3 100644
--- a/pkg/merkletree/merkletree.go
+++ b/pkg/merkletree/merkletree.go
@@ -45,12 +45,25 @@ type Layout struct {
 
 // InitLayout initializes and returns a new Layout object describing the structure
 // of a tree. dataSize specifies the size of input data in bytes.
-func InitLayout(dataSize int64) Layout {
+func InitLayout(dataSize int64, dataAndTreeInSameFile bool) Layout {
 	layout := Layout{
 		blockSize: usermem.PageSize,
 		// TODO(b/156980949): Allow config other hash methods (SHA384/SHA512).
 		digestSize: sha256DigestSize,
 	}
+
+	// treeStart is the offset (in bytes) of the first level of the tree in
+	// the file. If data and tree are in different files, treeStart should
+	// be zero. If data is in the same file as the tree, treeStart points
+	// to the block after the last data block (which may be zero-padded).
+	var treeStart int64
+	if dataAndTreeInSameFile {
+		treeStart = dataSize
+		if dataSize%layout.blockSize != 0 {
+			treeStart += layout.blockSize - dataSize%layout.blockSize
+		}
+	}
+
 	numBlocks := (dataSize + layout.blockSize - 1) / layout.blockSize
 	level := 0
 	offset := int64(0)
@@ -60,14 +73,15 @@ func InitLayout(dataSize int64) Layout {
 	// contain the hashes of the data blocks, while level numLevels - 1 is
 	// the root.
 	for numBlocks > 1 {
-		layout.levelOffset = append(layout.levelOffset, offset*layout.blockSize)
+		layout.levelOffset = append(layout.levelOffset, treeStart+offset*layout.blockSize)
 		// Round numBlocks up to fill up a block.
 		numBlocks += (layout.hashesPerBlock() - numBlocks%layout.hashesPerBlock()) % layout.hashesPerBlock()
 		offset += numBlocks / layout.hashesPerBlock()
 		numBlocks = numBlocks / layout.hashesPerBlock()
 		level++
 	}
-	layout.levelOffset = append(layout.levelOffset, offset*layout.blockSize)
+	layout.levelOffset = append(layout.levelOffset, treeStart+offset*layout.blockSize)
+
 	return layout
 }
 
@@ -107,11 +121,44 @@ func (layout Layout) blockOffset(level int, index int64) int64 {
 // written to treeWriter. The treeReader should be able to read the tree after
 // it has been written. That is, treeWriter and treeReader should point to the
 // same underlying data but have separate cursors.
-func Generate(data io.Reader, dataSize int64, treeReader io.Reader, treeWriter io.Writer) ([]byte, error) {
-	layout := InitLayout(dataSize)
+// Generate will modify the cursor for data, but always restores it to its
+// original position upon exit. The cursor for tree is modified and not
+// restored.
+func Generate(data io.ReadSeeker, dataSize int64, treeReader io.ReadSeeker, treeWriter io.WriteSeeker, dataAndTreeInSameFile bool) ([]byte, error) {
+	layout := InitLayout(dataSize, dataAndTreeInSameFile)
 
 	numBlocks := (dataSize + layout.blockSize - 1) / layout.blockSize
 
+	// If the data is in the same file as the tree, zero pad the last data
+	// block.
+	bytesInLastBlock := dataSize % layout.blockSize
+	if dataAndTreeInSameFile && bytesInLastBlock != 0 {
+		zeroBuf := make([]byte, layout.blockSize-bytesInLastBlock)
+		if _, err := treeWriter.Seek(0, io.SeekEnd); err != nil && err != io.EOF {
+			return nil, err
+		}
+		if _, err := treeWriter.Write(zeroBuf); err != nil {
+			return nil, err
+		}
+	}
+
+	// Store the current offset, so we can set it back once verification
+	// finishes.
+	origOffset, err := data.Seek(0, io.SeekCurrent)
+	if err != nil {
+		return nil, err
+	}
+	defer data.Seek(origOffset, io.SeekStart)
+
+	// Read from the beginning of both data and treeReader.
+	if _, err := data.Seek(0, io.SeekStart); err != nil && err != io.EOF {
+		return nil, err
+	}
+
+	if _, err := treeReader.Seek(0, io.SeekStart); err != nil && err != io.EOF {
+		return nil, err
+	}
+
 	var root []byte
 	for level := 0; level < layout.numLevels(); level++ {
 		for i := int64(0); i < numBlocks; i++ {
@@ -172,11 +219,11 @@ func Generate(data io.Reader, dataSize int64, treeReader io.Reader, treeWriter i
 // Verify will modify the cursor for data, but always restores it to its
 // original position upon exit. The cursor for tree is modified and not
 // restored.
-func Verify(w io.Writer, data, tree io.ReadSeeker, dataSize int64, readOffset int64, readSize int64, expectedRoot []byte) error {
+func Verify(w io.Writer, data, tree io.ReadSeeker, dataSize int64, readOffset int64, readSize int64, expectedRoot []byte, dataAndTreeInSameFile bool) error {
 	if readSize <= 0 {
 		return fmt.Errorf("Unexpected read size: %d", readSize)
 	}
-	layout := InitLayout(int64(dataSize))
+	layout := InitLayout(int64(dataSize), dataAndTreeInSameFile)
 
 	// Calculate the index of blocks that includes the target range in input
 	// data.
diff --git a/pkg/merkletree/merkletree_test.go b/pkg/merkletree/merkletree_test.go
index 911f61df96..ad50ba5f66 100644
--- a/pkg/merkletree/merkletree_test.go
+++ b/pkg/merkletree/merkletree_test.go
@@ -27,80 +27,58 @@ import (
 
 func TestLayout(t *testing.T) {
 	testCases := []struct {
-		dataSize            int64
-		expectedLevelOffset []int64
+		dataSize              int64
+		dataAndTreeInSameFile bool
+		expectedLevelOffset   []int64
 	}{
 		{
-			dataSize:            100,
-			expectedLevelOffset: []int64{0},
+			dataSize:              100,
+			dataAndTreeInSameFile: false,
+			expectedLevelOffset:   []int64{0},
 		},
 		{
-			dataSize:            1000000,
-			expectedLevelOffset: []int64{0, 2 * usermem.PageSize, 3 * usermem.PageSize},
+			dataSize:              100,
+			dataAndTreeInSameFile: true,
+			expectedLevelOffset:   []int64{usermem.PageSize},
 		},
 		{
-			dataSize:            4096 * int64(usermem.PageSize),
-			expectedLevelOffset: []int64{0, 32 * usermem.PageSize, 33 * usermem.PageSize},
+			dataSize:              1000000,
+			dataAndTreeInSameFile: false,
+			expectedLevelOffset:   []int64{0, 2 * usermem.PageSize, 3 * usermem.PageSize},
 		},
-	}
-
-	for _, tc := range testCases {
-		t.Run(fmt.Sprintf("%d", tc.dataSize), func(t *testing.T) {
-			p := InitLayout(tc.dataSize)
-			if p.blockSize != int64(usermem.PageSize) {
-				t.Errorf("got blockSize %d, want %d", p.blockSize, usermem.PageSize)
-			}
-			if p.digestSize != sha256DigestSize {
-				t.Errorf("got digestSize %d, want %d", p.digestSize, sha256DigestSize)
-			}
-			if p.numLevels() != len(tc.expectedLevelOffset) {
-				t.Errorf("got levels %d, want %d", p.numLevels(), len(tc.expectedLevelOffset))
-			}
-			for i := 0; i < p.numLevels() && i < len(tc.expectedLevelOffset); i++ {
-				if p.levelOffset[i] != tc.expectedLevelOffset[i] {
-					t.Errorf("got levelStart[%d] %d, want %d", i, p.levelOffset[i], tc.expectedLevelOffset[i])
-				}
-			}
-		})
-	}
-}
-
-func TestGenerate(t *testing.T) {
-	// The input data has size dataSize. It starts with the data in startWith,
-	// and all other bytes are zeroes.
-	testCases := []struct {
-		data         []byte
-		expectedRoot []byte
-	}{
 		{
-			data:         bytes.Repeat([]byte{0}, usermem.PageSize),
-			expectedRoot: []byte{173, 127, 172, 178, 88, 111, 198, 233, 102, 192, 4, 215, 209, 209, 107, 2, 79, 88, 5, 255, 124, 180, 124, 122, 133, 218, 189, 139, 72, 137, 44, 167},
-		},
-		{
-			data:         bytes.Repeat([]byte{0}, 128*usermem.PageSize+1),
-			expectedRoot: []byte{62, 93, 40, 92, 161, 241, 30, 223, 202, 99, 39, 2, 132, 113, 240, 139, 117, 99, 79, 243, 54, 18, 100, 184, 141, 121, 238, 46, 149, 202, 203, 132},
+			dataSize:              1000000,
+			dataAndTreeInSameFile: true,
+			expectedLevelOffset:   []int64{245 * usermem.PageSize, 247 * usermem.PageSize, 248 * usermem.PageSize},
 		},
 		{
-			data:         []byte{'a'},
-			expectedRoot: []byte{52, 75, 204, 142, 172, 129, 37, 14, 145, 137, 103, 203, 11, 162, 209, 205, 30, 169, 213, 72, 20, 28, 243, 24, 242, 2, 92, 43, 169, 59, 110, 210},
+			dataSize:              4096 * int64(usermem.PageSize),
+			dataAndTreeInSameFile: false,
+			expectedLevelOffset:   []int64{0, 32 * usermem.PageSize, 33 * usermem.PageSize},
 		},
 		{
-			data:         bytes.Repeat([]byte{'a'}, usermem.PageSize),
-			expectedRoot: []byte{201, 62, 238, 45, 13, 176, 47, 16, 172, 199, 70, 13, 149, 118, 225, 34, 220, 248, 205, 83, 196, 191, 141, 252, 174, 27, 62, 116, 235, 207, 255, 90},
+			dataSize:              4096 * int64(usermem.PageSize),
+			dataAndTreeInSameFile: true,
+			expectedLevelOffset:   []int64{4096 * usermem.PageSize, 4128 * usermem.PageSize, 4129 * usermem.PageSize},
 		},
 	}
 
 	for _, tc := range testCases {
-		t.Run(fmt.Sprintf("%d:%v", len(tc.data), tc.data[0]), func(t *testing.T) {
-			var tree bytes.Buffer
-
-			root, err := Generate(bytes.NewBuffer(tc.data), int64(len(tc.data)), &tree, &tree)
-			if err != nil {
-				t.Fatalf("Generate failed: %v", err)
+		t.Run(fmt.Sprintf("%d", tc.dataSize), func(t *testing.T) {
+			l := InitLayout(tc.dataSize, tc.dataAndTreeInSameFile)
+			if l.blockSize != int64(usermem.PageSize) {
+				t.Errorf("got blockSize %d, want %d", l.blockSize, usermem.PageSize)
 			}
-
-			if !bytes.Equal(root, tc.expectedRoot) {
-				t.Errorf("Unexpected root")
+			if l.digestSize != sha256DigestSize {
+				t.Errorf("got digestSize %d, want %d", l.digestSize, sha256DigestSize)
+			}
+			if l.numLevels() != len(tc.expectedLevelOffset) {
+				t.Errorf("got levels %d, want %d", l.numLevels(), len(tc.expectedLevelOffset))
+			}
+			for i := 0; i < l.numLevels() && i < len(tc.expectedLevelOffset); i++ {
+				if l.levelOffset[i] != tc.expectedLevelOffset[i] {
+					t.Errorf("got levelStart[%d] %d, want %d", i, l.levelOffset[i], tc.expectedLevelOffset[i])
+				}
 			}
 		})
 	}
@@ -151,6 +129,57 @@ func (brw *bytesReadWriter) Seek(offset int64, whence int) (int64, error) {
 	return off, nil
 }
 
+func TestGenerate(t *testing.T) {
+	// The input data has size dataSize. It starts with the data in startWith,
+	// and all other bytes are zeroes.
+	testCases := []struct {
+		data         []byte
+		expectedRoot []byte
+	}{
+		{
+			data:         bytes.Repeat([]byte{0}, usermem.PageSize),
+			expectedRoot: []byte{173, 127, 172, 178, 88, 111, 198, 233, 102, 192, 4, 215, 209, 209, 107, 2, 79, 88, 5, 255, 124, 180, 124, 122, 133, 218, 189, 139, 72, 137, 44, 167},
+		},
+		{
+			data:         bytes.Repeat([]byte{0}, 128*usermem.PageSize+1),
+			expectedRoot: []byte{62, 93, 40, 92, 161, 241, 30, 223, 202, 99, 39, 2, 132, 113, 240, 139, 117, 99, 79, 243, 54, 18, 100, 184, 141, 121, 238, 46, 149, 202, 203, 132},
+		},
+		{
+			data:         []byte{'a'},
+			expectedRoot: []byte{52, 75, 204, 142, 172, 129, 37, 14, 145, 137, 103, 203, 11, 162, 209, 205, 30, 169, 213, 72, 20, 28, 243, 24, 242, 2, 92, 43, 169, 59, 110, 210},
+		},
+		{
+			data:         bytes.Repeat([]byte{'a'}, usermem.PageSize),
+			expectedRoot: []byte{201, 62, 238, 45, 13, 176, 47, 16, 172, 199, 70, 13, 149, 118, 225, 34, 220, 248, 205, 83, 196, 191, 141, 252, 174, 27, 62, 116, 235, 207, 255, 90},
+		},
+	}
+
+	for _, tc := range testCases {
+		t.Run(fmt.Sprintf("%d:%v", len(tc.data), tc.data[0]), func(t *testing.T) {
+			for _, dataAndTreeInSameFile := range []bool{false, true} {
+				var tree bytesReadWriter
+				var root []byte
+				var err error
+				if dataAndTreeInSameFile {
+					tree.Write(tc.data)
+					root, err = Generate(&tree, int64(len(tc.data)), &tree, &tree, dataAndTreeInSameFile)
+				} else {
+					root, err = Generate(&bytesReadWriter{
+						bytes: tc.data,
+					}, int64(len(tc.data)), &tree, &tree, dataAndTreeInSameFile)
+				}
+				if err != nil {
+					t.Fatalf("got err: %v, want nil", err)
+				}
+
+				if !bytes.Equal(root, tc.expectedRoot) {
+					t.Errorf("got root: %v, want %v", root, tc.expectedRoot)
+				}
+			}
+		})
+	}
+}
+
 func TestVerify(t *testing.T) {
 	// The input data has size dataSize. The portion to be verified ranges from
 	// verifyStart with verifySize. A bit is flipped in outOfRangeByteIndex to
@@ -284,26 +313,37 @@ func TestVerify(t *testing.T) {
 			data := make([]byte, tc.dataSize)
 			// Generate random bytes in data.
 			rand.Read(data)
-			var tree bytesReadWriter
-
-			root, err := Generate(bytes.NewBuffer(data), int64(tc.dataSize), &tree, &tree)
-			if err != nil {
-				t.Fatalf("Generate failed: %v", err)
-			}
 
-			// Flip a bit in data and checks Verify results.
-			var buf bytes.Buffer
-			data[tc.modifyByte] ^= 1
-			if tc.shouldSucceed {
-				if err := Verify(&buf, bytes.NewReader(data), &tree, tc.dataSize, tc.verifyStart, tc.verifySize, root); err != nil && err != io.EOF {
-					t.Errorf("Verification failed when expected to succeed: %v", err)
+			for _, dataAndTreeInSameFile := range []bool{false, true} {
+				var tree bytesReadWriter
+				var root []byte
+				var err error
+				if dataAndTreeInSameFile {
+					tree.Write(data)
+					root, err = Generate(&tree, int64(len(data)), &tree, &tree, dataAndTreeInSameFile)
+				} else {
+					root, err = Generate(&bytesReadWriter{
+						bytes: data,
+					}, int64(tc.dataSize), &tree, &tree, false /* dataAndTreeInSameFile */)
 				}
-				if int64(buf.Len()) != tc.verifySize || !bytes.Equal(data[tc.verifyStart:tc.verifyStart+tc.verifySize], buf.Bytes()) {
-					t.Errorf("Incorrect output from Verify")
+				if err != nil {
+					t.Fatalf("Generate failed: %v", err)
 				}
-			} else {
-				if err := Verify(&buf, bytes.NewReader(data), &tree, tc.dataSize, tc.verifyStart, tc.verifySize, root); err == nil {
-					t.Errorf("Verification succeeded when expected to fail")
+
+				// Flip a bit in data and checks Verify results.
+				var buf bytes.Buffer
+				data[tc.modifyByte] ^= 1
+				if tc.shouldSucceed {
+					if err := Verify(&buf, bytes.NewReader(data), &tree, tc.dataSize, tc.verifyStart, tc.verifySize, root, dataAndTreeInSameFile); err != nil && err != io.EOF {
+						t.Errorf("Verification failed when expected to succeed: %v", err)
+					}
+					if int64(buf.Len()) != tc.verifySize || !bytes.Equal(data[tc.verifyStart:tc.verifyStart+tc.verifySize], buf.Bytes()) {
+						t.Errorf("Incorrect output from Verify")
+					}
+				} else {
+					if err := Verify(&buf, bytes.NewReader(data), &tree, tc.dataSize, tc.verifyStart, tc.verifySize, root, dataAndTreeInSameFile); err == nil {
+						t.Errorf("Verification succeeded when expected to fail")
+					}
 				}
 			}
 		})
@@ -318,36 +358,47 @@ func TestVerifyRandom(t *testing.T) {
 	data := make([]byte, dataSize)
 	// Generate random bytes in data.
 	rand.Read(data)
-	var tree bytesReadWriter
 
-	root, err := Generate(bytes.NewBuffer(data), int64(dataSize), &tree, &tree)
-	if err != nil {
-		t.Fatalf("Generate failed: %v", err)
-	}
+	for _, dataAndTreeInSameFile := range []bool{false, true} {
+		var tree bytesReadWriter
+		var root []byte
+		var err error
+		if dataAndTreeInSameFile {
+			tree.Write(data)
+			root, err = Generate(&tree, int64(len(data)), &tree, &tree, dataAndTreeInSameFile)
+		} else {
+			root, err = Generate(&bytesReadWriter{
+				bytes: data,
+			}, int64(dataSize), &tree, &tree, dataAndTreeInSameFile)
+		}
+		if err != nil {
+			t.Fatalf("Generate failed: %v", err)
+		}
 
-	// Pick a random portion of data.
-	start := rand.Int63n(dataSize - 1)
-	size := rand.Int63n(dataSize) + 1
+		// Pick a random portion of data.
+		start := rand.Int63n(dataSize - 1)
+		size := rand.Int63n(dataSize) + 1
 
-	var buf bytes.Buffer
-	// Checks that the random portion of data from the original data is
-	// verified successfully.
-	if err := Verify(&buf, bytes.NewReader(data), &tree, dataSize, start, size, root); err != nil && err != io.EOF {
-		t.Errorf("Verification failed for correct data: %v", err)
-	}
-	if size > dataSize-start {
-		size = dataSize - start
-	}
-	if int64(buf.Len()) != size || !bytes.Equal(data[start:start+size], buf.Bytes()) {
-		t.Errorf("Incorrect output from Verify")
-	}
+		var buf bytes.Buffer
+		// Checks that the random portion of data from the original data is
+		// verified successfully.
+		if err := Verify(&buf, bytes.NewReader(data), &tree, dataSize, start, size, root, dataAndTreeInSameFile); err != nil && err != io.EOF {
+			t.Errorf("Verification failed for correct data: %v", err)
+		}
+		if size > dataSize-start {
+			size = dataSize - start
+		}
+		if int64(buf.Len()) != size || !bytes.Equal(data[start:start+size], buf.Bytes()) {
+			t.Errorf("Incorrect output from Verify")
+		}
 
-	buf.Reset()
-	// Flip a random bit in randPortion, and check that verification fails.
-	randBytePos := rand.Int63n(size)
-	data[start+randBytePos] ^= 1
+		buf.Reset()
+		// Flip a random bit in randPortion, and check that verification fails.
+		randBytePos := rand.Int63n(size)
+		data[start+randBytePos] ^= 1
 
-	if err := Verify(&buf, bytes.NewReader(data), &tree, dataSize, start, size, root); err == nil {
-		t.Errorf("Verification succeeded for modified data")
+		if err := Verify(&buf, bytes.NewReader(data), &tree, dataSize, start, size, root, dataAndTreeInSameFile); err == nil {
+			t.Errorf("Verification succeeded for modified data")
+		}
 	}
 }

From 62af21c7f31fc3a4dca20df1d0cded197cf68ee8 Mon Sep 17 00:00:00 2001
From: Jamie Liu <jamieliu@google.com>
Date: Mon, 24 Aug 2020 20:04:12 -0700
Subject: [PATCH 066/211] Flush in fsimpl/gofer.regularFileFD.OnClose() if
 there are no dirty pages.

This is closer to indistinguishable from VFS1 behavior.

PiperOrigin-RevId: 328256068
---
 pkg/sentry/fsimpl/gofer/regular_file.go | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/pkg/sentry/fsimpl/gofer/regular_file.go b/pkg/sentry/fsimpl/gofer/regular_file.go
index 7e1cbf0652..3b5462682c 100644
--- a/pkg/sentry/fsimpl/gofer/regular_file.go
+++ b/pkg/sentry/fsimpl/gofer/regular_file.go
@@ -56,10 +56,16 @@ func (fd *regularFileFD) OnClose(ctx context.Context) error {
 	if !fd.vfsfd.IsWritable() {
 		return nil
 	}
-	// Skip flushing if writes may be buffered by the client, since (as with
-	// the VFS1 client) we don't flush buffered writes on close anyway.
+	// Skip flushing if there are client-buffered writes, since (as with the
+	// VFS1 client) we don't flush buffered writes on close anyway.
 	d := fd.dentry()
-	if d.fs.opts.interop == InteropModeExclusive {
+	if d.fs.opts.interop != InteropModeExclusive {
+		return nil
+	}
+	d.dataMu.RLock()
+	haveDirtyPages := !d.dirty.IsEmpty()
+	d.dataMu.RUnlock()
+	if haveDirtyPages {
 		return nil
 	}
 	d.handleMu.RLock()

From 886d8f64d962a9c34145414c8c41e3d19f886ce1 Mon Sep 17 00:00:00 2001
From: Ghanan Gowripalan <ghanan@google.com>
Date: Mon, 24 Aug 2020 20:39:26 -0700
Subject: [PATCH 067/211] Automated rollback of changelist 327325153

PiperOrigin-RevId: 328259353
---
 pkg/tcpip/transport/tcp/BUILD       | 17 +-----------
 pkg/tcpip/transport/tcp/connect.go  | 11 --------
 pkg/tcpip/transport/tcp/endpoint.go |  4 +--
 pkg/tcpip/transport/tcp/segment.go  | 23 +++++-----------
 pkg/tcpip/transport/tcp/snd.go      | 41 +++++++++++------------------
 5 files changed, 24 insertions(+), 72 deletions(-)

diff --git a/pkg/tcpip/transport/tcp/BUILD b/pkg/tcpip/transport/tcp/BUILD
index bde071f2a6..234fb95ce2 100644
--- a/pkg/tcpip/transport/tcp/BUILD
+++ b/pkg/tcpip/transport/tcp/BUILD
@@ -11,8 +11,7 @@ go_template_instance(
     template = "//pkg/ilist:generic_list",
     types = {
         "Element": "*segment",
-        "ElementMapper": "segmentMapper",
-        "Linker": "*segmentEntry",
+        "Linker": "*segment",
     },
 )
 
@@ -28,19 +27,6 @@ go_template_instance(
     },
 )
 
-go_template_instance(
-    name = "tcp_rack_segment_list",
-    out = "tcp_rack_segment_list.go",
-    package = "tcp",
-    prefix = "rackSegment",
-    template = "//pkg/ilist:generic_list",
-    types = {
-        "Element": "*segment",
-        "ElementMapper": "rackSegmentMapper",
-        "Linker": "*rackSegmentEntry",
-    },
-)
-
 go_library(
     name = "tcp",
     srcs = [
@@ -69,7 +55,6 @@ go_library(
         "snd.go",
         "snd_state.go",
         "tcp_endpoint_list.go",
-        "tcp_rack_segment_list.go",
         "tcp_segment_list.go",
         "timer.go",
     ],
diff --git a/pkg/tcpip/transport/tcp/connect.go b/pkg/tcpip/transport/tcp/connect.go
index 87980c0a19..290172ac98 100644
--- a/pkg/tcpip/transport/tcp/connect.go
+++ b/pkg/tcpip/transport/tcp/connect.go
@@ -924,18 +924,7 @@ func (e *endpoint) handleWrite() *tcpip.Error {
 
 	first := e.sndQueue.Front()
 	if first != nil {
-		lastSeg := e.snd.writeList.Back()
 		e.snd.writeList.PushBackList(&e.sndQueue)
-		if lastSeg == nil {
-			lastSeg = e.snd.writeList.Front()
-		} else {
-			lastSeg = lastSeg.segEntry.Next()
-		}
-		// Add new segments to rcList, as rcList and writeList should
-		// be consistent.
-		for seg := lastSeg; seg != nil; seg = seg.segEntry.Next() {
-			e.snd.rcList.PushBack(seg)
-		}
 		e.sndBufInQueue = 0
 	}
 
diff --git a/pkg/tcpip/transport/tcp/endpoint.go b/pkg/tcpip/transport/tcp/endpoint.go
index 9df22ac842..4ba0ea1c0a 100644
--- a/pkg/tcpip/transport/tcp/endpoint.go
+++ b/pkg/tcpip/transport/tcp/endpoint.go
@@ -1428,7 +1428,7 @@ func (e *endpoint) Peek(vec [][]byte) (int64, tcpip.ControlMessages, *tcpip.Erro
 	vec = append([][]byte(nil), vec...)
 
 	var num int64
-	for s := e.rcvList.Front(); s != nil; s = s.segEntry.Next() {
+	for s := e.rcvList.Front(); s != nil; s = s.Next() {
 		views := s.data.Views()
 
 		for i := s.viewToDeliver; i < len(views); i++ {
@@ -2249,7 +2249,7 @@ func (e *endpoint) connect(addr tcpip.FullAddress, handshake bool, run bool) *tc
 	if !handshake {
 		e.segmentQueue.mu.Lock()
 		for _, l := range []segmentList{e.segmentQueue.list, e.sndQueue, e.snd.writeList} {
-			for s := l.Front(); s != nil; s = s.segEntry.Next() {
+			for s := l.Front(); s != nil; s = s.Next() {
 				s.id = e.ID
 				s.route = r.Clone()
 				e.sndWaker.Assert()
diff --git a/pkg/tcpip/transport/tcp/segment.go b/pkg/tcpip/transport/tcp/segment.go
index a20755f785..94307d31a8 100644
--- a/pkg/tcpip/transport/tcp/segment.go
+++ b/pkg/tcpip/transport/tcp/segment.go
@@ -30,13 +30,12 @@ import (
 //
 // +stateify savable
 type segment struct {
-	segEntry     segmentEntry
-	rackSegEntry rackSegmentEntry
-	refCnt       int32
-	id           stack.TransportEndpointID `state:"manual"`
-	route        stack.Route               `state:"manual"`
-	data         buffer.VectorisedView     `state:".(buffer.VectorisedView)"`
-	hdr          header.TCP
+	segmentEntry
+	refCnt int32
+	id     stack.TransportEndpointID `state:"manual"`
+	route  stack.Route               `state:"manual"`
+	data   buffer.VectorisedView     `state:".(buffer.VectorisedView)"`
+	hdr    header.TCP
 	// views is used as buffer for data when its length is large
 	// enough to store a VectorisedView.
 	views [8]buffer.View `state:"nosave"`
@@ -62,16 +61,6 @@ type segment struct {
 	xmitCount uint32
 }
 
-// segmentMapper is the ElementMapper for the writeList.
-type segmentMapper struct{}
-
-func (segmentMapper) linkerFor(seg *segment) *segmentEntry { return &seg.segEntry }
-
-// rackSegmentMapper is the ElementMapper for the rcList.
-type rackSegmentMapper struct{}
-
-func (rackSegmentMapper) linkerFor(seg *segment) *rackSegmentEntry { return &seg.rackSegEntry }
-
 func newSegment(r *stack.Route, id stack.TransportEndpointID, pkt *stack.PacketBuffer) *segment {
 	s := &segment{
 		refCnt: 1,
diff --git a/pkg/tcpip/transport/tcp/snd.go b/pkg/tcpip/transport/tcp/snd.go
index 31151f23d4..c55589c459 100644
--- a/pkg/tcpip/transport/tcp/snd.go
+++ b/pkg/tcpip/transport/tcp/snd.go
@@ -154,7 +154,6 @@ type sender struct {
 	closed      bool
 	writeNext   *segment
 	writeList   segmentList
-	rcList      rackSegmentList
 	resendTimer timer       `state:"nosave"`
 	resendWaker sleep.Waker `state:"nosave"`
 
@@ -368,7 +367,7 @@ func (s *sender) updateMaxPayloadSize(mtu, count int) {
 
 	// Rewind writeNext to the first segment exceeding the MTU. Do nothing
 	// if it is already before such a packet.
-	for seg := s.writeList.Front(); seg != nil; seg = seg.segEntry.Next() {
+	for seg := s.writeList.Front(); seg != nil; seg = seg.Next() {
 		if seg == s.writeNext {
 			// We got to writeNext before we could find a segment
 			// exceeding the MTU.
@@ -623,7 +622,6 @@ func (s *sender) splitSeg(seg *segment, size int) {
 	nSeg.data.TrimFront(size)
 	nSeg.sequenceNumber.UpdateForward(seqnum.Size(size))
 	s.writeList.InsertAfter(seg, nSeg)
-	s.rcList.InsertAfter(seg, nSeg)
 
 	// The segment being split does not carry PUSH flag because it is
 	// followed by the newly split segment.
@@ -655,7 +653,7 @@ func (s *sender) NextSeg(nextSegHint *segment) (nextSeg, hint *segment, rescueRt
 	var s3 *segment
 	var s4 *segment
 	// Step 1.
-	for seg := nextSegHint; seg != nil; seg = seg.segEntry.Next() {
+	for seg := nextSegHint; seg != nil; seg = seg.Next() {
 		// Stop iteration if we hit a segment that has never been
 		// transmitted (i.e. either it has no assigned sequence number
 		// or if it does have one, it's >= the next sequence number
@@ -685,7 +683,7 @@ func (s *sender) NextSeg(nextSegHint *segment) (nextSeg, hint *segment, rescueRt
 				// NextSeg():
 				//     (1.c) IsLost(S2) returns true.
 				if s.ep.scoreboard.IsLost(segSeq) {
-					return seg, seg.segEntry.Next(), false
+					return seg, seg.Next(), false
 				}
 
 				// NextSeg():
@@ -699,7 +697,7 @@ func (s *sender) NextSeg(nextSegHint *segment) (nextSeg, hint *segment, rescueRt
 				// SHOULD be returned.
 				if s3 == nil {
 					s3 = seg
-					hint = seg.segEntry.Next()
+					hint = seg.Next()
 				}
 			}
 			// NextSeg():
@@ -733,7 +731,7 @@ func (s *sender) NextSeg(nextSegHint *segment) (nextSeg, hint *segment, rescueRt
 	// range of one segment of up to SMSS octets of
 	// previously unsent data starting with sequence number
 	// HighData+1 MUST be returned."
-	for seg := s.writeNext; seg != nil; seg = seg.segEntry.Next() {
+	for seg := s.writeNext; seg != nil; seg = seg.Next() {
 		if s.isAssignedSequenceNumber(seg) && seg.sequenceNumber.LessThan(s.sndNxt) {
 			continue
 		}
@@ -775,16 +773,15 @@ func (s *sender) maybeSendSegment(seg *segment, limit int, end seqnum.Value) (se
 			// triggering bugs in poorly written DNS
 			// implementations.
 			var nextTooBig bool
-			for seg.segEntry.Next() != nil && seg.segEntry.Next().data.Size() != 0 {
-				if seg.data.Size()+seg.segEntry.Next().data.Size() > available {
+			for seg.Next() != nil && seg.Next().data.Size() != 0 {
+				if seg.data.Size()+seg.Next().data.Size() > available {
 					nextTooBig = true
 					break
 				}
-				seg.data.Append(seg.segEntry.Next().data)
+				seg.data.Append(seg.Next().data)
 
 				// Consume the segment that we just merged in.
-				s.writeList.Remove(seg.segEntry.Next())
-				s.rcList.Remove(seg.rackSegEntry.Next())
+				s.writeList.Remove(seg.Next())
 			}
 			if !nextTooBig && seg.data.Size() < available {
 				// Segment is not full.
@@ -951,7 +948,7 @@ func (s *sender) handleSACKRecovery(limit int, end seqnum.Value) (dataSent bool)
 			}
 			dataSent = true
 			s.outstanding++
-			s.writeNext = nextSeg.segEntry.Next()
+			s.writeNext = nextSeg.Next()
 			continue
 		}
 
@@ -964,7 +961,6 @@ func (s *sender) handleSACKRecovery(limit int, end seqnum.Value) (dataSent bool)
 		// transmitted in (C.1)."
 		s.outstanding++
 		dataSent = true
-
 		s.sendSegment(nextSeg)
 
 		segEnd := nextSeg.sequenceNumber.Add(nextSeg.logicalLen())
@@ -1039,7 +1035,7 @@ func (s *sender) sendData() {
 	if s.fr.active && s.ep.sackPermitted {
 		dataSent = s.handleSACKRecovery(s.maxPayloadSize, end)
 	} else {
-		for seg := s.writeNext; seg != nil && s.outstanding < s.sndCwnd; seg = seg.segEntry.Next() {
+		for seg := s.writeNext; seg != nil && s.outstanding < s.sndCwnd; seg = seg.Next() {
 			cwndLimit := (s.sndCwnd - s.outstanding) * s.maxPayloadSize
 			if cwndLimit < limit {
 				limit = cwndLimit
@@ -1047,7 +1043,7 @@ func (s *sender) sendData() {
 			if s.isAssignedSequenceNumber(seg) && s.ep.sackPermitted && s.ep.scoreboard.IsSACKED(seg.sackBlock()) {
 				// Move writeNext along so that we don't try and scan data that
 				// has already been SACKED.
-				s.writeNext = seg.segEntry.Next()
+				s.writeNext = seg.Next()
 				continue
 			}
 			if sent := s.maybeSendSegment(seg, limit, end); !sent {
@@ -1055,7 +1051,7 @@ func (s *sender) sendData() {
 			}
 			dataSent = true
 			s.outstanding += s.pCount(seg)
-			s.writeNext = seg.segEntry.Next()
+			s.writeNext = seg.Next()
 		}
 	}
 
@@ -1186,7 +1182,7 @@ func (s *sender) SetPipe() {
 	}
 	pipe := 0
 	smss := seqnum.Size(s.ep.scoreboard.SMSS())
-	for s1 := s.writeList.Front(); s1 != nil && s1.data.Size() != 0 && s.isAssignedSequenceNumber(s1); s1 = s1.segEntry.Next() {
+	for s1 := s.writeList.Front(); s1 != nil && s1.data.Size() != 0 && s.isAssignedSequenceNumber(s1); s1 = s1.Next() {
 		// With GSO each segment can be much larger than SMSS. So check the segment
 		// in SMSS sized ranges.
 		segEnd := s1.sequenceNumber.Add(seqnum.Size(s1.data.Size()))
@@ -1388,7 +1384,7 @@ func (s *sender) handleRcvdSegment(rcvdSeg *segment) {
 			}
 
 			if s.writeNext == seg {
-				s.writeNext = seg.segEntry.Next()
+				s.writeNext = seg.Next()
 			}
 
 			// Update the RACK fields if SACK is enabled.
@@ -1397,7 +1393,6 @@ func (s *sender) handleRcvdSegment(rcvdSeg *segment) {
 			}
 
 			s.writeList.Remove(seg)
-			s.rcList.Remove(seg)
 
 			// if SACK is enabled then Only reduce outstanding if
 			// the segment was not previously SACKED as these have
@@ -1465,12 +1460,6 @@ func (s *sender) sendSegment(seg *segment) *tcpip.Error {
 		if s.sndCwnd < s.sndSsthresh {
 			s.ep.stack.Stats().TCP.SlowStartRetransmits.Increment()
 		}
-
-		// Move the segment which has to be retransmitted to the end of the list, as
-		// RACK requires the segments in the order of their transmission times.
-		// See: https://tools.ietf.org/html/draft-ietf-tcpm-rack-09#section-6.2
-		// Step 5
-		s.rcList.PushBack(seg)
 	}
 	seg.xmitTime = time.Now()
 	seg.xmitCount++

From 2ddd883a9459ab0f5ef22b81b8efbd1733fda035 Mon Sep 17 00:00:00 2001
From: Dean Deng <deandeng@google.com>
Date: Tue, 25 Aug 2020 00:24:16 -0700
Subject: [PATCH 068/211] Fix deadlock in gofer direct IO.

Fixes several java runtime tests:
java/nio/channels/FileChannel/directio/ReadDirect.java
java/nio/channels/FileChannel/directio/PreadDirect.java

Updates #3576.

PiperOrigin-RevId: 328281849
---
 pkg/sentry/fsimpl/gofer/regular_file.go | 31 ++++++++++++++++---------
 pkg/sentry/fsimpl/gofer/time.go         | 14 +++++++++++
 2 files changed, 34 insertions(+), 11 deletions(-)

diff --git a/pkg/sentry/fsimpl/gofer/regular_file.go b/pkg/sentry/fsimpl/gofer/regular_file.go
index 3b5462682c..a2e9342d59 100644
--- a/pkg/sentry/fsimpl/gofer/regular_file.go
+++ b/pkg/sentry/fsimpl/gofer/regular_file.go
@@ -123,6 +123,10 @@ func (fd *regularFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offs
 		return 0, io.EOF
 	}
 
+	var (
+		n       int64
+		readErr error
+	)
 	if fd.vfsfd.StatusFlags()&linux.O_DIRECT != 0 {
 		// Lock d.metadataMu for the rest of the read to prevent d.size from
 		// changing.
@@ -133,20 +137,25 @@ func (fd *regularFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offs
 		if err := d.writeback(ctx, offset, dst.NumBytes()); err != nil {
 			return 0, err
 		}
-	}
-
-	rw := getDentryReadWriter(ctx, d, offset)
-	if fd.vfsfd.StatusFlags()&linux.O_DIRECT != 0 {
+		rw := getDentryReadWriter(ctx, d, offset)
 		// Require the read to go to the remote file.
 		rw.direct = true
+		n, readErr = dst.CopyOutFrom(ctx, rw)
+		putDentryReadWriter(rw)
+		if d.fs.opts.interop != InteropModeShared {
+			// Compare Linux's mm/filemap.c:do_generic_file_read() => file_accessed().
+			d.touchAtimeLocked(fd.vfsfd.Mount())
+		}
+	} else {
+		rw := getDentryReadWriter(ctx, d, offset)
+		n, readErr = dst.CopyOutFrom(ctx, rw)
+		putDentryReadWriter(rw)
+		if d.fs.opts.interop != InteropModeShared {
+			// Compare Linux's mm/filemap.c:do_generic_file_read() => file_accessed().
+			d.touchAtime(fd.vfsfd.Mount())
+		}
 	}
-	n, err := dst.CopyOutFrom(ctx, rw)
-	putDentryReadWriter(rw)
-	if d.fs.opts.interop != InteropModeShared {
-		// Compare Linux's mm/filemap.c:do_generic_file_read() => file_accessed().
-		d.touchAtime(fd.vfsfd.Mount())
-	}
-	return n, err
+	return n, readErr
 }
 
 // Read implements vfs.FileDescriptionImpl.Read.
diff --git a/pkg/sentry/fsimpl/gofer/time.go b/pkg/sentry/fsimpl/gofer/time.go
index 98733253d9..7e825caaed 100644
--- a/pkg/sentry/fsimpl/gofer/time.go
+++ b/pkg/sentry/fsimpl/gofer/time.go
@@ -52,6 +52,20 @@ func (d *dentry) touchAtime(mnt *vfs.Mount) {
 	mnt.EndWrite()
 }
 
+// Preconditions: d.metadataMu is locked. d.cachedMetadataAuthoritative() == true.
+func (d *dentry) touchAtimeLocked(mnt *vfs.Mount) {
+	if mnt.Flags.NoATime || mnt.ReadOnly() {
+		return
+	}
+	if err := mnt.CheckBeginWrite(); err != nil {
+		return
+	}
+	now := d.fs.clock.Now().Nanoseconds()
+	atomic.StoreInt64(&d.atime, now)
+	atomic.StoreUint32(&d.atimeDirty, 1)
+	mnt.EndWrite()
+}
+
 // Preconditions:
 // * d.cachedMetadataAuthoritative() == true.
 // * The caller has successfully called vfs.Mount.CheckBeginWrite().

From 086f085660b73e8ead7ca0bfef5835a6aaad8866 Mon Sep 17 00:00:00 2001
From: Bhasker Hariharan <bhaskerh@google.com>
Date: Tue, 25 Aug 2020 07:15:50 -0700
Subject: [PATCH 069/211] Fix TCP_LINGER2 behavior to match linux.

We still deviate a bit from linux in how long we will actually wait in
FIN-WAIT-2. Linux seems to cap it with TIME_WAIT_LEN and it's not completely
obvious as to why it's done that way. For now I think we can ignore that and
fix it if it really is an issue.

PiperOrigin-RevId: 328324922
---
 pkg/sentry/socket/netstack/netstack.go       | 10 +++++--
 pkg/tcpip/transport/tcp/endpoint.go          | 23 ++++++++++-----
 pkg/tcpip/transport/tcp/tcp_test.go          |  7 +++--
 test/syscalls/linux/socket_ip_tcp_generic.cc | 31 ++++++++++++++++----
 4 files changed, 52 insertions(+), 19 deletions(-)

diff --git a/pkg/sentry/socket/netstack/netstack.go b/pkg/sentry/socket/netstack/netstack.go
index 4d0e336961..921464f5da 100644
--- a/pkg/sentry/socket/netstack/netstack.go
+++ b/pkg/sentry/socket/netstack/netstack.go
@@ -1409,8 +1409,12 @@ func getSockOptTCP(t *kernel.Task, ep commonEndpoint, name, outLen int) (marshal
 		if err := ep.GetSockOpt(&v); err != nil {
 			return nil, syserr.TranslateNetstackError(err)
 		}
-
-		lingerTimeout := primitive.Int32(time.Duration(v) / time.Second)
+		var lingerTimeout primitive.Int32
+		if v >= 0 {
+			lingerTimeout = primitive.Int32(time.Duration(v) / time.Second)
+		} else {
+			lingerTimeout = -1
+		}
 		return &lingerTimeout, nil
 
 	case linux.TCP_DEFER_ACCEPT:
@@ -1967,7 +1971,7 @@ func setSockOptTCP(t *kernel.Task, ep commonEndpoint, name int, optVal []byte) *
 			return syserr.ErrInvalidArgument
 		}
 
-		v := usermem.ByteOrder.Uint32(optVal)
+		v := int32(usermem.ByteOrder.Uint32(optVal))
 		return syserr.TranslateNetstackError(ep.SetSockOpt(tcpip.TCPLingerTimeoutOption(time.Second * time.Duration(v))))
 
 	case linux.TCP_DEFER_ACCEPT:
diff --git a/pkg/tcpip/transport/tcp/endpoint.go b/pkg/tcpip/transport/tcp/endpoint.go
index 4ba0ea1c0a..9c0f4c9f44 100644
--- a/pkg/tcpip/transport/tcp/endpoint.go
+++ b/pkg/tcpip/transport/tcp/endpoint.go
@@ -1775,15 +1775,24 @@ func (e *endpoint) SetSockOpt(opt interface{}) *tcpip.Error {
 
 	case tcpip.TCPLingerTimeoutOption:
 		e.LockUser()
-		if v < 0 {
+
+		switch {
+		case v < 0:
 			// Same as effectively disabling TCPLinger timeout.
-			v = 0
-		}
-		// Cap it to MaxTCPLingerTimeout.
-		stkTCPLingerTimeout := tcpip.TCPLingerTimeoutOption(MaxTCPLingerTimeout)
-		if v > stkTCPLingerTimeout {
-			v = stkTCPLingerTimeout
+			v = -1
+		case v == 0:
+			// Same as the stack default.
+			var stackLingerTimeout tcpip.TCPLingerTimeoutOption
+			if err := e.stack.TransportProtocolOption(ProtocolNumber, &stackLingerTimeout); err != nil {
+				panic(fmt.Sprintf("e.stack.TransportProtocolOption(%d, %+v) = %v", ProtocolNumber, &stackLingerTimeout, err))
+			}
+			v = stackLingerTimeout
+		case v > tcpip.TCPLingerTimeoutOption(MaxTCPLingerTimeout):
+			// Cap it to Stack's default TCP_LINGER2 timeout.
+			v = tcpip.TCPLingerTimeoutOption(MaxTCPLingerTimeout)
+		default:
 		}
+
 		e.tcpLingerTimeout = time.Duration(v)
 		e.UnlockUser()
 
diff --git a/pkg/tcpip/transport/tcp/tcp_test.go b/pkg/tcpip/transport/tcp/tcp_test.go
index 55ae09a2fc..9650bb06c0 100644
--- a/pkg/tcpip/transport/tcp/tcp_test.go
+++ b/pkg/tcpip/transport/tcp/tcp_test.go
@@ -6206,12 +6206,13 @@ func TestTCPLingerTimeout(t *testing.T) {
 		tcpLingerTimeout time.Duration
 		want             time.Duration
 	}{
-		{"NegativeLingerTimeout", -123123, 0},
-		{"ZeroLingerTimeout", 0, 0},
+		{"NegativeLingerTimeout", -123123, -1},
+		// Zero is treated same as the stack's default TCP_LINGER2 timeout.
+		{"ZeroLingerTimeout", 0, tcp.DefaultTCPLingerTimeout},
 		{"InRangeLingerTimeout", 10 * time.Second, 10 * time.Second},
 		// Values > stack's TCPLingerTimeout are capped to the stack's
 		// value. Defaults to tcp.DefaultTCPLingerTimeout(60 seconds)
-		{"AboveMaxLingerTimeout", 125 * time.Second, 120 * time.Second},
+		{"AboveMaxLingerTimeout", tcp.MaxTCPLingerTimeout + 5*time.Second, tcp.MaxTCPLingerTimeout},
 	}
 	for _, tc := range testCases {
 		t.Run(tc.name, func(t *testing.T) {
diff --git a/test/syscalls/linux/socket_ip_tcp_generic.cc b/test/syscalls/linux/socket_ip_tcp_generic.cc
index 53c0767870..04356b780a 100644
--- a/test/syscalls/linux/socket_ip_tcp_generic.cc
+++ b/test/syscalls/linux/socket_ip_tcp_generic.cc
@@ -819,18 +819,37 @@ TEST_P(TCPSocketPairTest, TCPLingerTimeoutDefault) {
   EXPECT_EQ(get, kDefaultTCPLingerTimeout);
 }
 
-TEST_P(TCPSocketPairTest, SetTCPLingerTimeoutZeroOrLess) {
+TEST_P(TCPSocketPairTest, SetTCPLingerTimeoutLessThanZero) {
   auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
 
-  constexpr int kZero = 0;
-  EXPECT_THAT(setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_LINGER2, &kZero,
-                         sizeof(kZero)),
-              SyscallSucceedsWithValue(0));
-
   constexpr int kNegative = -1234;
   EXPECT_THAT(setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_LINGER2,
                          &kNegative, sizeof(kNegative)),
               SyscallSucceedsWithValue(0));
+  int get = INT_MAX;
+  socklen_t get_len = sizeof(get);
+  EXPECT_THAT(
+      getsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_LINGER2, &get, &get_len),
+      SyscallSucceedsWithValue(0));
+  EXPECT_EQ(get_len, sizeof(get));
+  EXPECT_EQ(get, -1);
+}
+
+TEST_P(TCPSocketPairTest, SetTCPLingerTimeoutZero) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  constexpr int kZero = 0;
+  EXPECT_THAT(setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_LINGER2, &kZero,
+                         sizeof(kZero)),
+              SyscallSucceedsWithValue(0));
+  int get = -1;
+  socklen_t get_len = sizeof(get);
+  EXPECT_THAT(
+      getsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_LINGER2, &get, &get_len),
+      SyscallSucceedsWithValue(0));
+  EXPECT_EQ(get_len, sizeof(get));
+  EXPECT_THAT(get,
+              AnyOf(Eq(kMaxTCPLingerTimeout), Eq(kOldMaxTCPLingerTimeout)));
 }
 
 TEST_P(TCPSocketPairTest, SetTCPLingerTimeoutAboveMax) {

From d04e4579ec37cb66bc756749bfb7235501225c2f Mon Sep 17 00:00:00 2001
From: Ayush Ranjan <ayushranjan@google.com>
Date: Tue, 25 Aug 2020 09:21:59 -0700
Subject: [PATCH 070/211] [go-marshal] Support marshalling for structs with
 names starting with W.

Due to how marshallable interface implementation was generated, we could not
marshal a struct whose named started with W because there was a naming
collision with parameter (w io.Writer) and type (w *StuctName).

Used "writer" as parameter name to avoid collision.

PiperOrigin-RevId: 328343930
---
 tools/go_marshal/gomarshal/generator_interfaces_struct.go | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tools/go_marshal/gomarshal/generator_interfaces_struct.go b/tools/go_marshal/gomarshal/generator_interfaces_struct.go
index 4b9cea08a5..44fbb425c7 100644
--- a/tools/go_marshal/gomarshal/generator_interfaces_struct.go
+++ b/tools/go_marshal/gomarshal/generator_interfaces_struct.go
@@ -400,13 +400,13 @@ func (g *interfaceGenerator) emitMarshallableForStruct(st *ast.StructType) {
 
 	g.emit("// WriteTo implements io.WriterTo.WriteTo.\n")
 	g.recordUsedImport("io")
-	g.emit("func (%s *%s) WriteTo(w io.Writer) (int64, error) {\n", g.r, g.typeName())
+	g.emit("func (%s *%s) WriteTo(writer io.Writer) (int64, error) {\n", g.r, g.typeName())
 	g.inIndent(func() {
 		fallback := func() {
 			g.emit("// Type %s doesn't have a packed layout in memory, fall back to MarshalBytes.\n", g.typeName())
 			g.emit("buf := make([]byte, %s.SizeBytes())\n", g.r)
 			g.emit("%s.MarshalBytes(buf)\n", g.r)
-			g.emit("length, err := w.Write(buf)\n")
+			g.emit("length, err := writer.Write(buf)\n")
 			g.emit("return int64(length), err\n")
 		}
 		if thisPacked {
@@ -421,7 +421,7 @@ func (g *interfaceGenerator) emitMarshallableForStruct(st *ast.StructType) {
 			// Fast serialization.
 			g.emitCastToByteSlice(g.r, "buf", fmt.Sprintf("%s.SizeBytes()", g.r))
 
-			g.emit("length, err := w.Write(buf)\n")
+			g.emit("length, err := writer.Write(buf)\n")
 			g.emitKeepAlive(g.r)
 			g.emit("return int64(length), err\n")
 		} else {

From 4480329d091cf2e8fc16cbe0ed155783e53db20f Mon Sep 17 00:00:00 2001
From: Jamie Liu <jamieliu@google.com>
Date: Tue, 25 Aug 2020 09:56:07 -0700
Subject: [PATCH 071/211] Disable PHP disk space tests.

These tests print disk_free_space()/disk_total_space() and expect the printed
result to be an integer (despite the fact that both the documented and returned
type is float). After cl/297213789, free/total disk space on tmpfs is
sufficiently large that PHP prints the result in scientific notation instead:

        ========DIFF========
        012+ float(9.2233720368548E+18)
        013+ float(9.2233720368548E+18)
        012- float(%d)
        013- float(%d)
        ========DONE========
        FAIL disk_total_space() and disk_free_space() tests [ext/standard/tests/file/disk.phpt]

PiperOrigin-RevId: 328349906
---
 test/runtimes/exclude_php7.3.6.csv | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/test/runtimes/exclude_php7.3.6.csv b/test/runtimes/exclude_php7.3.6.csv
index 2ce979dc85..d252383a00 100644
--- a/test/runtimes/exclude_php7.3.6.csv
+++ b/test/runtimes/exclude_php7.3.6.csv
@@ -13,6 +13,13 @@ ext/session/tests/session_set_save_handler_class_018.phpt,,
 ext/session/tests/session_set_save_handler_iface_003.phpt,,
 ext/session/tests/session_set_save_handler_sid_001.phpt,,
 ext/session/tests/session_set_save_handler_variation4.phpt,,
+ext/standard/tests/file/disk.phpt,,Test bug
+ext/standard/tests/file/disk_free_space_basic.phpt,,Test bug
+ext/standard/tests/file/disk_free_space_error.phpt,,Test bug
+ext/standard/tests/file/disk_free_space_variation.phpt,,Test bug
+ext/standard/tests/file/disk_total_space_basic.phpt,,Test bug
+ext/standard/tests/file/disk_total_space_error.phpt,,Test bug
+ext/standard/tests/file/disk_total_space_variation.phpt,,Test bug
 ext/standard/tests/file/fopen_variation19.phpt,b/162894964,
 ext/standard/tests/file/lstat_stat_variation14.phpt,,Flaky
 ext/standard/tests/file/php_fd_wrapper_01.phpt,,

From 98e652f6f1d8f3d0bbc4600b1ef2ce471d8e6406 Mon Sep 17 00:00:00 2001
From: Nayana Bidari <nybidari@google.com>
Date: Tue, 25 Aug 2020 09:59:42 -0700
Subject: [PATCH 072/211] Support SO_LINGER socket option.

When SO_LINGER option is enabled, the close will not return until all the
queued messages are sent and acknowledged for the socket or linger timeout is
reached. If the option is not set, close will return immediately. This option
is mainly supported for connection oriented protocols such as TCP.

PiperOrigin-RevId: 328350576
---
 pkg/sentry/socket/netstack/netstack.go       |  45 +++-
 pkg/sentry/socket/unix/transport/unix.go     |   2 +-
 pkg/tcpip/tcpip.go                           |   9 +
 pkg/tcpip/transport/tcp/endpoint.go          |  33 +++
 test/packetimpact/dut/posix_server.cc        |   7 +
 test/packetimpact/proto/posix_server.proto   |  11 +
 test/packetimpact/testbench/dut.go           |  42 +++
 test/packetimpact/tests/BUILD                |  10 +
 test/packetimpact/tests/tcp_linger_test.go   | 253 +++++++++++++++++++
 test/syscalls/linux/socket_ip_tcp_generic.cc | 119 +++++++++
 test/syscalls/linux/socket_ip_udp_generic.cc |  30 +++
 11 files changed, 557 insertions(+), 4 deletions(-)
 create mode 100644 test/packetimpact/tests/tcp_linger_test.go

diff --git a/pkg/sentry/socket/netstack/netstack.go b/pkg/sentry/socket/netstack/netstack.go
index 921464f5da..626195be24 100644
--- a/pkg/sentry/socket/netstack/netstack.go
+++ b/pkg/sentry/socket/netstack/netstack.go
@@ -479,8 +479,35 @@ func (s *socketOpsCommon) fetchReadView() *syserr.Error {
 }
 
 // Release implements fs.FileOperations.Release.
-func (s *socketOpsCommon) Release(context.Context) {
+func (s *socketOpsCommon) Release(ctx context.Context) {
+	e, ch := waiter.NewChannelEntry(nil)
+	s.EventRegister(&e, waiter.EventHUp|waiter.EventErr)
+	defer s.EventUnregister(&e)
+
 	s.Endpoint.Close()
+
+	// SO_LINGER option is valid only for TCP. For other socket types
+	// return after endpoint close.
+	if family, skType, _ := s.Type(); skType != linux.SOCK_STREAM || (family != linux.AF_INET && family != linux.AF_INET6) {
+		return
+	}
+
+	var v tcpip.LingerOption
+	if err := s.Endpoint.GetSockOpt(&v); err != nil {
+		return
+	}
+
+	// The case for zero timeout is handled in tcp endpoint close function.
+	// Close is blocked until either:
+	// 1. The endpoint state is not in any of the states: FIN-WAIT1,
+	// CLOSING and LAST_ACK.
+	// 2. Timeout is reached.
+	if v.Enabled && v.Timeout != 0 {
+		t := kernel.TaskFromContext(ctx)
+		start := t.Kernel().MonotonicClock().Now()
+		deadline := start.Add(v.Timeout)
+		t.BlockWithDeadline(ch, true, deadline)
+	}
 }
 
 // Read implements fs.FileOperations.Read.
@@ -1195,7 +1222,16 @@ func getSockOptSocket(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, fam
 			return nil, syserr.ErrInvalidArgument
 		}
 
-		linger := linux.Linger{}
+		var v tcpip.LingerOption
+		var linger linux.Linger
+		if err := ep.GetSockOpt(&v); err != nil {
+			return &linger, nil
+		}
+
+		if v.Enabled {
+			linger.OnOff = 1
+		}
+		linger.Linger = int32(v.Timeout.Seconds())
 		return &linger, nil
 
 	case linux.SO_SNDTIMEO:
@@ -1865,7 +1901,10 @@ func setSockOptSocket(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, nam
 			socket.SetSockOptEmitUnimplementedEvent(t, name)
 		}
 
-		return nil
+		return syserr.TranslateNetstackError(
+			ep.SetSockOpt(tcpip.LingerOption{
+				Enabled: v.OnOff != 0,
+				Timeout: time.Second * time.Duration(v.Linger)}))
 
 	case linux.SO_DETACH_FILTER:
 		// optval is ignored.
diff --git a/pkg/sentry/socket/unix/transport/unix.go b/pkg/sentry/socket/unix/transport/unix.go
index 475d7177e5..ab7bab5cd3 100644
--- a/pkg/sentry/socket/unix/transport/unix.go
+++ b/pkg/sentry/socket/unix/transport/unix.go
@@ -942,7 +942,7 @@ func (e *baseEndpoint) GetSockOptInt(opt tcpip.SockOptInt) (int, *tcpip.Error) {
 // GetSockOpt implements tcpip.Endpoint.GetSockOpt.
 func (e *baseEndpoint) GetSockOpt(opt interface{}) *tcpip.Error {
 	switch opt.(type) {
-	case tcpip.ErrorOption:
+	case tcpip.ErrorOption, *tcpip.LingerOption:
 		return nil
 
 	default:
diff --git a/pkg/tcpip/tcpip.go b/pkg/tcpip/tcpip.go
index 07c85ce595..290c4e138b 100644
--- a/pkg/tcpip/tcpip.go
+++ b/pkg/tcpip/tcpip.go
@@ -978,6 +978,15 @@ const (
 	TCPTimeWaitReuseLoopbackOnly
 )
 
+// LingerOption is used by SetSockOpt/GetSockOpt to set/get the
+// duration for which a socket lingers before returning from Close.
+//
+// +stateify savable
+type LingerOption struct {
+	Enabled bool
+	Timeout time.Duration
+}
+
 // IPPacketInfo is the message structure for IP_PKTINFO.
 //
 // +stateify savable
diff --git a/pkg/tcpip/transport/tcp/endpoint.go b/pkg/tcpip/transport/tcp/endpoint.go
index 9c0f4c9f44..ff9b8804db 100644
--- a/pkg/tcpip/transport/tcp/endpoint.go
+++ b/pkg/tcpip/transport/tcp/endpoint.go
@@ -654,6 +654,9 @@ type endpoint struct {
 
 	// owner is used to get uid and gid of the packet.
 	owner tcpip.PacketOwner
+
+	// linger is used for SO_LINGER socket option.
+	linger tcpip.LingerOption
 }
 
 // UniqueID implements stack.TransportEndpoint.UniqueID.
@@ -1007,6 +1010,26 @@ func (e *endpoint) Close() {
 		return
 	}
 
+	if e.linger.Enabled && e.linger.Timeout == 0 {
+		s := e.EndpointState()
+		isResetState := s == StateEstablished || s == StateCloseWait || s == StateFinWait1 || s == StateFinWait2 || s == StateSynRecv
+		if isResetState {
+			// Close the endpoint without doing full shutdown and
+			// send a RST.
+			e.resetConnectionLocked(tcpip.ErrConnectionAborted)
+			e.closeNoShutdownLocked()
+
+			// Wake up worker to close the endpoint.
+			switch s {
+			case StateSynRecv:
+				e.notifyProtocolGoroutine(notifyClose)
+			default:
+				e.notifyProtocolGoroutine(notifyTickleWorker)
+			}
+			return
+		}
+	}
+
 	// Issue a shutdown so that the peer knows we won't send any more data
 	// if we're connected, or stop accepting if we're listening.
 	e.shutdownLocked(tcpip.ShutdownWrite | tcpip.ShutdownRead)
@@ -1807,6 +1830,11 @@ func (e *endpoint) SetSockOpt(opt interface{}) *tcpip.Error {
 	case tcpip.SocketDetachFilterOption:
 		return nil
 
+	case tcpip.LingerOption:
+		e.LockUser()
+		e.linger = v
+		e.UnlockUser()
+
 	default:
 		return nil
 	}
@@ -2032,6 +2060,11 @@ func (e *endpoint) GetSockOpt(opt interface{}) *tcpip.Error {
 			Port: port,
 		}
 
+	case *tcpip.LingerOption:
+		e.LockUser()
+		*o = e.linger
+		e.UnlockUser()
+
 	default:
 		return tcpip.ErrUnknownProtocolOption
 	}
diff --git a/test/packetimpact/dut/posix_server.cc b/test/packetimpact/dut/posix_server.cc
index 76ba701dad..0f8e279f8a 100644
--- a/test/packetimpact/dut/posix_server.cc
+++ b/test/packetimpact/dut/posix_server.cc
@@ -312,6 +312,13 @@ class PosixImpl final : public posix_server::Posix::Service {
     return ::grpc::Status::OK;
   }
 
+  ::grpc::Status Shutdown(grpc_impl::ServerContext *context,
+                          const ::posix_server::ShutdownRequest *request,
+                          ::posix_server::ShutdownResponse *response) override {
+    response->set_errno_(shutdown(request->fd(), request->how()));
+    return ::grpc::Status::OK;
+  }
+
   ::grpc::Status Recv(::grpc::ServerContext *context,
                       const ::posix_server::RecvRequest *request,
                       ::posix_server::RecvResponse *response) override {
diff --git a/test/packetimpact/proto/posix_server.proto b/test/packetimpact/proto/posix_server.proto
index ccd20b10d4..f32ed54eff 100644
--- a/test/packetimpact/proto/posix_server.proto
+++ b/test/packetimpact/proto/posix_server.proto
@@ -188,6 +188,15 @@ message SocketResponse {
   int32 errno_ = 2;  // "errno" may fail to compile in c++.
 }
 
+message ShutdownRequest {
+  int32 fd = 1;
+  int32 how = 2;
+}
+
+message ShutdownResponse {
+  int32 errno_ = 1;  // "errno" may fail to compile in c++.
+}
+
 message RecvRequest {
   int32 sockfd = 1;
   int32 len = 2;
@@ -225,6 +234,8 @@ service Posix {
   rpc SetSockOpt(SetSockOptRequest) returns (SetSockOptResponse);
   // Call socket() on the DUT.
   rpc Socket(SocketRequest) returns (SocketResponse);
+  // Call shutdown() on the DUT.
+  rpc Shutdown(ShutdownRequest) returns (ShutdownResponse);
   // Call recv() on the DUT.
   rpc Recv(RecvRequest) returns (RecvResponse);
 }
diff --git a/test/packetimpact/testbench/dut.go b/test/packetimpact/testbench/dut.go
index 73c532e75e..6165ab2937 100644
--- a/test/packetimpact/testbench/dut.go
+++ b/test/packetimpact/testbench/dut.go
@@ -16,11 +16,13 @@ package testbench
 
 import (
 	"context"
+	"encoding/binary"
 	"flag"
 	"net"
 	"strconv"
 	"syscall"
 	"testing"
+	"time"
 
 	pb "gvisor.dev/gvisor/test/packetimpact/proto/posix_server_go_proto"
 
@@ -700,3 +702,43 @@ func (dut *DUT) RecvWithErrno(ctx context.Context, t *testing.T, sockfd, len, fl
 	}
 	return resp.GetRet(), resp.GetBuf(), syscall.Errno(resp.GetErrno_())
 }
+
+// SetSockLingerOption sets SO_LINGER socket option on the DUT.
+func (dut *DUT) SetSockLingerOption(t *testing.T, sockfd int32, timeout time.Duration, enable bool) {
+	var linger unix.Linger
+	if enable {
+		linger.Onoff = 1
+	}
+	linger.Linger = int32(timeout / time.Second)
+
+	buf := make([]byte, 8)
+	binary.LittleEndian.PutUint32(buf, uint32(linger.Onoff))
+	binary.LittleEndian.PutUint32(buf[4:], uint32(linger.Linger))
+	dut.SetSockOpt(t, sockfd, unix.SOL_SOCKET, unix.SO_LINGER, buf)
+}
+
+// Shutdown calls shutdown on the DUT and causes a fatal test failure if it doesn't
+// succeed. If more control over the timeout or error handling is needed, use
+// ShutdownWithErrno.
+func (dut *DUT) Shutdown(t *testing.T, fd, how int32) error {
+	t.Helper()
+
+	ctx, cancel := context.WithTimeout(context.Background(), RPCTimeout)
+	defer cancel()
+	return dut.ShutdownWithErrno(ctx, t, fd, how)
+}
+
+// ShutdownWithErrno calls shutdown on the DUT.
+func (dut *DUT) ShutdownWithErrno(ctx context.Context, t *testing.T, fd, how int32) error {
+	t.Helper()
+
+	req := pb.ShutdownRequest{
+		Fd:  fd,
+		How: how,
+	}
+	resp, err := dut.posixServer.Shutdown(ctx, &req)
+	if err != nil {
+		t.Fatalf("failed to call Shutdown: %s", err)
+	}
+	return syscall.Errno(resp.GetErrno_())
+}
diff --git a/test/packetimpact/tests/BUILD b/test/packetimpact/tests/BUILD
index 74658fea03..7a7152fa5b 100644
--- a/test/packetimpact/tests/BUILD
+++ b/test/packetimpact/tests/BUILD
@@ -308,3 +308,13 @@ packetimpact_go_test(
         "@org_golang_x_sys//unix:go_default_library",
     ],
 )
+
+packetimpact_go_test(
+    name = "tcp_linger",
+    srcs = ["tcp_linger_test.go"],
+    deps = [
+        "//pkg/tcpip/header",
+        "//test/packetimpact/testbench",
+        "@org_golang_x_sys//unix:go_default_library",
+    ],
+)
diff --git a/test/packetimpact/tests/tcp_linger_test.go b/test/packetimpact/tests/tcp_linger_test.go
new file mode 100644
index 0000000000..913e49e063
--- /dev/null
+++ b/test/packetimpact/tests/tcp_linger_test.go
@@ -0,0 +1,253 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package tcp_linger_test
+
+import (
+	"context"
+	"flag"
+	"syscall"
+	"testing"
+	"time"
+
+	"golang.org/x/sys/unix"
+	"gvisor.dev/gvisor/pkg/tcpip/header"
+	"gvisor.dev/gvisor/test/packetimpact/testbench"
+)
+
+func init() {
+	testbench.RegisterFlags(flag.CommandLine)
+}
+
+func createSocket(t *testing.T, dut testbench.DUT) (int32, int32, testbench.TCPIPv4) {
+	listenFD, remotePort := dut.CreateListener(t, unix.SOCK_STREAM, unix.IPPROTO_TCP, 1)
+	conn := testbench.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort}, testbench.TCP{SrcPort: &remotePort})
+	conn.Connect(t)
+	acceptFD, _ := dut.Accept(t, listenFD)
+	return acceptFD, listenFD, conn
+}
+
+func closeAll(t *testing.T, dut testbench.DUT, listenFD int32, conn testbench.TCPIPv4) {
+	conn.Close(t)
+	dut.Close(t, listenFD)
+	dut.TearDown()
+}
+
+// lingerDuration is the timeout value used with SO_LINGER socket option.
+const lingerDuration = 3 * time.Second
+
+// TestTCPLingerZeroTimeout tests when SO_LINGER is set with zero timeout. DUT
+// should send RST-ACK when socket is closed.
+func TestTCPLingerZeroTimeout(t *testing.T) {
+	// Create a socket, listen, TCP connect, and accept.
+	dut := testbench.NewDUT(t)
+	acceptFD, listenFD, conn := createSocket(t, dut)
+	defer closeAll(t, dut, listenFD, conn)
+
+	dut.SetSockLingerOption(t, acceptFD, 0, true)
+	dut.Close(t, acceptFD)
+
+	// If the linger timeout is set to zero, the DUT should send a RST.
+	if _, err := conn.Expect(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagRst | header.TCPFlagAck)}, time.Second); err != nil {
+		t.Errorf("expected RST-ACK packet within a second but got none: %s", err)
+	}
+	conn.Send(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)})
+}
+
+// TestTCPLingerOff tests when SO_LINGER is not set. DUT should send FIN-ACK
+// when socket is closed.
+func TestTCPLingerOff(t *testing.T) {
+	// Create a socket, listen, TCP connect, and accept.
+	dut := testbench.NewDUT(t)
+	acceptFD, listenFD, conn := createSocket(t, dut)
+	defer closeAll(t, dut, listenFD, conn)
+
+	dut.Close(t, acceptFD)
+
+	// If SO_LINGER is not set, DUT should send a FIN-ACK.
+	if _, err := conn.Expect(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagFin | header.TCPFlagAck)}, time.Second); err != nil {
+		t.Errorf("expected FIN-ACK packet within a second but got none: %s", err)
+	}
+	conn.Send(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)})
+}
+
+// TestTCPLingerNonZeroTimeout tests when SO_LINGER is set with non-zero timeout.
+// DUT should close the socket after timeout.
+func TestTCPLingerNonZeroTimeout(t *testing.T) {
+	for _, tt := range []struct {
+		description string
+		lingerOn    bool
+	}{
+		{"WithNonZeroLinger", true},
+		{"WithoutLinger", false},
+	} {
+		t.Run(tt.description, func(t *testing.T) {
+			// Create a socket, listen, TCP connect, and accept.
+			dut := testbench.NewDUT(t)
+			acceptFD, listenFD, conn := createSocket(t, dut)
+			defer closeAll(t, dut, listenFD, conn)
+
+			dut.SetSockLingerOption(t, acceptFD, lingerDuration, tt.lingerOn)
+
+			// Increase timeout as Close will take longer time to
+			// return when SO_LINGER is set with non-zero timeout.
+			timeout := lingerDuration + 1*time.Second
+			ctx, cancel := context.WithTimeout(context.Background(), timeout)
+			defer cancel()
+			start := time.Now()
+			dut.CloseWithErrno(ctx, t, acceptFD)
+			end := time.Now()
+			diff := end.Sub(start)
+
+			if tt.lingerOn && diff < lingerDuration {
+				t.Errorf("expected close to return after %v seconds, but returned sooner", lingerDuration)
+			} else if !tt.lingerOn && diff > 1*time.Second {
+				t.Errorf("expected close to return within a second, but returned later")
+			}
+
+			if _, err := conn.Expect(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagFin | header.TCPFlagAck)}, time.Second); err != nil {
+				t.Errorf("expected FIN-ACK packet within a second but got none: %s", err)
+			}
+			conn.Send(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)})
+		})
+	}
+}
+
+// TestTCPLingerSendNonZeroTimeout tests when SO_LINGER is set with non-zero
+// timeout and send a packet. DUT should close the socket after timeout.
+func TestTCPLingerSendNonZeroTimeout(t *testing.T) {
+	for _, tt := range []struct {
+		description string
+		lingerOn    bool
+	}{
+		{"WithSendNonZeroLinger", true},
+		{"WithoutLinger", false},
+	} {
+		t.Run(tt.description, func(t *testing.T) {
+			// Create a socket, listen, TCP connect, and accept.
+			dut := testbench.NewDUT(t)
+			acceptFD, listenFD, conn := createSocket(t, dut)
+			defer closeAll(t, dut, listenFD, conn)
+
+			dut.SetSockLingerOption(t, acceptFD, lingerDuration, tt.lingerOn)
+
+			// Send data.
+			sampleData := []byte("Sample Data")
+			dut.Send(t, acceptFD, sampleData, 0)
+
+			// Increase timeout as Close will take longer time to
+			// return when SO_LINGER is set with non-zero timeout.
+			timeout := lingerDuration + 1*time.Second
+			ctx, cancel := context.WithTimeout(context.Background(), timeout)
+			defer cancel()
+			start := time.Now()
+			dut.CloseWithErrno(ctx, t, acceptFD)
+			end := time.Now()
+			diff := end.Sub(start)
+
+			if tt.lingerOn && diff < lingerDuration {
+				t.Errorf("expected close to return after %v seconds, but returned sooner", lingerDuration)
+			} else if !tt.lingerOn && diff > 1*time.Second {
+				t.Errorf("expected close to return within a second, but returned later")
+			}
+
+			samplePayload := &testbench.Payload{Bytes: sampleData}
+			if _, err := conn.ExpectData(t, &testbench.TCP{}, samplePayload, time.Second); err != nil {
+				t.Fatalf("expected a packet with payload %v: %s", samplePayload, err)
+			}
+
+			if _, err := conn.Expect(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagFin | header.TCPFlagAck)}, time.Second); err != nil {
+				t.Errorf("expected FIN-ACK packet within a second but got none: %s", err)
+			}
+			conn.Send(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)})
+		})
+	}
+}
+
+// TestTCPLingerShutdownZeroTimeout tests SO_LINGER with shutdown() and zero
+// timeout. DUT should send RST-ACK when socket is closed.
+func TestTCPLingerShutdownZeroTimeout(t *testing.T) {
+	// Create a socket, listen, TCP connect, and accept.
+	dut := testbench.NewDUT(t)
+	acceptFD, listenFD, conn := createSocket(t, dut)
+	defer closeAll(t, dut, listenFD, conn)
+
+	dut.SetSockLingerOption(t, acceptFD, 0, true)
+	dut.Shutdown(t, acceptFD, syscall.SHUT_RDWR)
+	dut.Close(t, acceptFD)
+
+	// Shutdown will send FIN-ACK with read/write option.
+	if _, err := conn.Expect(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagFin | header.TCPFlagAck)}, time.Second); err != nil {
+		t.Errorf("expected FIN-ACK packet within a second but got none: %s", err)
+	}
+
+	// If the linger timeout is set to zero, the DUT should send a RST.
+	if _, err := conn.Expect(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagRst | header.TCPFlagAck)}, time.Second); err != nil {
+		t.Errorf("expected RST-ACK packet within a second but got none: %s", err)
+	}
+	conn.Send(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)})
+}
+
+// TestTCPLingerShutdownSendNonZeroTimeout tests SO_LINGER with shutdown() and
+// non-zero timeout. DUT should close the socket after timeout.
+func TestTCPLingerShutdownSendNonZeroTimeout(t *testing.T) {
+	for _, tt := range []struct {
+		description string
+		lingerOn    bool
+	}{
+		{"shutdownRDWR", true},
+		{"shutdownRDWR", false},
+	} {
+		t.Run(tt.description, func(t *testing.T) {
+			// Create a socket, listen, TCP connect, and accept.
+			dut := testbench.NewDUT(t)
+			acceptFD, listenFD, conn := createSocket(t, dut)
+			defer closeAll(t, dut, listenFD, conn)
+
+			dut.SetSockLingerOption(t, acceptFD, lingerDuration, tt.lingerOn)
+
+			// Send data.
+			sampleData := []byte("Sample Data")
+			dut.Send(t, acceptFD, sampleData, 0)
+
+			dut.Shutdown(t, acceptFD, syscall.SHUT_RDWR)
+
+			// Increase timeout as Close will take longer time to
+			// return when SO_LINGER is set with non-zero timeout.
+			timeout := lingerDuration + 1*time.Second
+			ctx, cancel := context.WithTimeout(context.Background(), timeout)
+			defer cancel()
+			start := time.Now()
+			dut.CloseWithErrno(ctx, t, acceptFD)
+			end := time.Now()
+			diff := end.Sub(start)
+
+			if tt.lingerOn && diff < lingerDuration {
+				t.Errorf("expected close to return after %v seconds, but returned sooner", lingerDuration)
+			} else if !tt.lingerOn && diff > 1*time.Second {
+				t.Errorf("expected close to return within a second, but returned later")
+			}
+
+			samplePayload := &testbench.Payload{Bytes: sampleData}
+			if _, err := conn.ExpectData(t, &testbench.TCP{}, samplePayload, time.Second); err != nil {
+				t.Fatalf("expected a packet with payload %v: %s", samplePayload, err)
+			}
+
+			if _, err := conn.Expect(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagFin | header.TCPFlagAck)}, time.Second); err != nil {
+				t.Errorf("expected FIN-ACK packet within a second but got none: %s", err)
+			}
+			conn.Send(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)})
+		})
+	}
+}
diff --git a/test/syscalls/linux/socket_ip_tcp_generic.cc b/test/syscalls/linux/socket_ip_tcp_generic.cc
index 04356b780a..f4b69c46c3 100644
--- a/test/syscalls/linux/socket_ip_tcp_generic.cc
+++ b/test/syscalls/linux/socket_ip_tcp_generic.cc
@@ -1080,5 +1080,124 @@ TEST_P(TCPSocketPairTest, TCPResetDuringClose_NoRandomSave) {
   }
 }
 
+// Test setsockopt and getsockopt for a socket with SO_LINGER option.
+TEST_P(TCPSocketPairTest, SetAndGetLingerOption) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  // Check getsockopt before SO_LINGER option is set.
+  struct linger got_linger = {-1, -1};
+  socklen_t got_len = sizeof(got_linger);
+
+  ASSERT_THAT(getsockopt(sockets->first_fd(), SOL_SOCKET, SO_LINGER,
+                         &got_linger, &got_len),
+              SyscallSucceeds());
+  ASSERT_THAT(got_len, sizeof(got_linger));
+  struct linger want_linger = {};
+  EXPECT_EQ(0, memcmp(&want_linger, &got_linger, got_len));
+
+  // Set and get SO_LINGER with negative values.
+  struct linger sl;
+  sl.l_onoff = 1;
+  sl.l_linger = -3;
+  ASSERT_THAT(
+      setsockopt(sockets->first_fd(), SOL_SOCKET, SO_LINGER, &sl, sizeof(sl)),
+      SyscallSucceeds());
+  ASSERT_THAT(getsockopt(sockets->first_fd(), SOL_SOCKET, SO_LINGER,
+                         &got_linger, &got_len),
+              SyscallSucceeds());
+  ASSERT_EQ(got_len, sizeof(got_linger));
+  EXPECT_EQ(sl.l_onoff, got_linger.l_onoff);
+  // Linux returns a different value as it uses HZ to convert the seconds to
+  // jiffies which overflows for negative values. We want to be compatible with
+  // linux for getsockopt return value.
+  if (IsRunningOnGvisor()) {
+    EXPECT_EQ(sl.l_linger, got_linger.l_linger);
+  }
+
+  // Set and get SO_LINGER option with positive values.
+  sl.l_onoff = 1;
+  sl.l_linger = 5;
+  ASSERT_THAT(
+      setsockopt(sockets->first_fd(), SOL_SOCKET, SO_LINGER, &sl, sizeof(sl)),
+      SyscallSucceeds());
+  ASSERT_THAT(getsockopt(sockets->first_fd(), SOL_SOCKET, SO_LINGER,
+                         &got_linger, &got_len),
+              SyscallSucceeds());
+  ASSERT_EQ(got_len, sizeof(got_linger));
+  EXPECT_EQ(0, memcmp(&sl, &got_linger, got_len));
+}
+
+// Test socket to disable SO_LINGER option.
+TEST_P(TCPSocketPairTest, SetOffLingerOption) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  // Set the SO_LINGER option.
+  struct linger sl;
+  sl.l_onoff = 1;
+  sl.l_linger = 5;
+  ASSERT_THAT(
+      setsockopt(sockets->first_fd(), SOL_SOCKET, SO_LINGER, &sl, sizeof(sl)),
+      SyscallSucceeds());
+
+  // Check getsockopt after SO_LINGER option is set.
+  struct linger got_linger = {-1, -1};
+  socklen_t got_len = sizeof(got_linger);
+  ASSERT_THAT(getsockopt(sockets->first_fd(), SOL_SOCKET, SO_LINGER,
+                         &got_linger, &got_len),
+              SyscallSucceeds());
+  ASSERT_EQ(got_len, sizeof(got_linger));
+  EXPECT_EQ(0, memcmp(&sl, &got_linger, got_len));
+
+  sl.l_onoff = 0;
+  sl.l_linger = 5;
+  ASSERT_THAT(
+      setsockopt(sockets->first_fd(), SOL_SOCKET, SO_LINGER, &sl, sizeof(sl)),
+      SyscallSucceeds());
+
+  // Check getsockopt after SO_LINGER option is set to zero.
+  ASSERT_THAT(getsockopt(sockets->first_fd(), SOL_SOCKET, SO_LINGER,
+                         &got_linger, &got_len),
+              SyscallSucceeds());
+  ASSERT_EQ(got_len, sizeof(got_linger));
+  EXPECT_EQ(0, memcmp(&sl, &got_linger, got_len));
+}
+
+// Test close on dup'd socket with SO_LINGER option set.
+TEST_P(TCPSocketPairTest, CloseWithLingerOption) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  // Set the SO_LINGER option.
+  struct linger sl;
+  sl.l_onoff = 1;
+  sl.l_linger = 5;
+  ASSERT_THAT(
+      setsockopt(sockets->first_fd(), SOL_SOCKET, SO_LINGER, &sl, sizeof(sl)),
+      SyscallSucceeds());
+
+  // Check getsockopt after SO_LINGER option is set.
+  struct linger got_linger = {-1, -1};
+  socklen_t got_len = sizeof(got_linger);
+  ASSERT_THAT(getsockopt(sockets->first_fd(), SOL_SOCKET, SO_LINGER,
+                         &got_linger, &got_len),
+              SyscallSucceeds());
+  ASSERT_EQ(got_len, sizeof(got_linger));
+  EXPECT_EQ(0, memcmp(&sl, &got_linger, got_len));
+
+  FileDescriptor dupFd = FileDescriptor(dup(sockets->first_fd()));
+  ASSERT_THAT(close(sockets->release_first_fd()), SyscallSucceeds());
+  char buf[10] = {};
+  // Write on dupFd should succeed as socket will not be closed until
+  // all references are removed.
+  ASSERT_THAT(RetryEINTR(write)(dupFd.get(), buf, sizeof(buf)),
+              SyscallSucceedsWithValue(sizeof(buf)));
+  ASSERT_THAT(RetryEINTR(write)(sockets->first_fd(), buf, sizeof(buf)),
+              SyscallFailsWithErrno(EBADF));
+
+  // Close the socket.
+  dupFd.reset();
+  // Write on dupFd should fail as all references for socket are removed.
+  ASSERT_THAT(RetryEINTR(write)(dupFd.get(), buf, sizeof(buf)),
+              SyscallFailsWithErrno(EBADF));
+}
 }  // namespace testing
 }  // namespace gvisor
diff --git a/test/syscalls/linux/socket_ip_udp_generic.cc b/test/syscalls/linux/socket_ip_udp_generic.cc
index edb86aded4..5cad6f0174 100644
--- a/test/syscalls/linux/socket_ip_udp_generic.cc
+++ b/test/syscalls/linux/socket_ip_udp_generic.cc
@@ -448,5 +448,35 @@ TEST_P(UDPSocketPairTest, TClassRecvMismatch) {
               SyscallFailsWithErrno(EOPNOTSUPP));
 }
 
+// Test the SO_LINGER option can be set/get on udp socket.
+TEST_P(UDPSocketPairTest, SoLingerFail) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+  int level = SOL_SOCKET;
+  int type = SO_LINGER;
+
+  struct linger sl;
+  sl.l_onoff = 1;
+  sl.l_linger = 5;
+  ASSERT_THAT(setsockopt(sockets->first_fd(), level, type, &sl, sizeof(sl)),
+              SyscallSucceedsWithValue(0));
+
+  struct linger got_linger = {};
+  socklen_t length = sizeof(sl);
+  ASSERT_THAT(
+      getsockopt(sockets->first_fd(), level, type, &got_linger, &length),
+      SyscallSucceedsWithValue(0));
+
+  ASSERT_EQ(length, sizeof(got_linger));
+  // Linux returns the values which are set in the SetSockOpt for SO_LINGER.
+  // In gVisor, we do not store the linger values for UDP as SO_LINGER for UDP
+  // is a no-op.
+  if (IsRunningOnGvisor()) {
+    struct linger want_linger = {};
+    EXPECT_EQ(0, memcmp(&want_linger, &got_linger, length));
+  } else {
+    EXPECT_EQ(0, memcmp(&sl, &got_linger, length));
+  }
+}
+
 }  // namespace testing
 }  // namespace gvisor

From 232587304de02d5d0634fe8b6118529cfd04bcad Mon Sep 17 00:00:00 2001
From: Sam Balana <sbalana@google.com>
Date: Tue, 25 Aug 2020 11:07:32 -0700
Subject: [PATCH 073/211] Add option to replace linkAddrCache with
 neighborCache

This change adds an option to replace the current implementation of ARP through
linkAddrCache, with an implementation of NUD through neighborCache. Switching
to using NUD for both ARP and NDP is beneficial for the reasons described by
RFC 4861 Section 3.1:

  "[Using NUD] significantly improves the robustness of packet delivery in the
  presence of failing routers, partially failing or partitioned links, or nodes
  that change their link-layer addresses. For instance, mobile nodes can move
  off-link without losing any connectivity due to stale ARP caches."

  "Unlike ARP, Neighbor Unreachability Detection detects half-link failures and
  avoids sending traffic to neighbors with which two-way connectivity is
  absent."

Along with these changes exposes the API for querying and operating the
neighbor cache. Operations include:
  - Create a static entry
  - List all entries
  - Delete all entries
  - Remove an entry by address

This also exposes the API to change the NUD protocol constants on a per-NIC
basis to allow Neighbor Discovery to operate over links with widely varying
performance characteristics. See [RFC 4861 Section 10][1] for the list of
constants.

Finally, an API for subscribing to NUD state changes is exposed through
NUDDispatcher. See [RFC 4861 Appendix C][3] for the list of edges.

Tests:
 pkg/tcpip/network/arp:arp_test
 + TestDirectRequest

 pkg/tcpip/network/ipv6:ipv6_test
 + TestLinkResolution
 + TestNDPValidation
 + TestNeighorAdvertisementWithTargetLinkLayerOption
 + TestNeighorSolicitationResponse
 + TestNeighorSolicitationWithSourceLinkLayerOption
 + TestRouterAdvertValidation

 pkg/tcpip/stack:stack_test
 + TestCacheWaker
 + TestForwardingWithFakeResolver
 + TestForwardingWithFakeResolverManyPackets
 + TestForwardingWithFakeResolverManyResolutions
 + TestForwardingWithFakeResolverPartialTimeout
 + TestForwardingWithFakeResolverTwoPackets
 + TestIPv6SourceAddressSelectionScopeAndSameAddress

[1]: https://tools.ietf.org/html/rfc4861#section-10
[2]: https://tools.ietf.org/html/rfc4861#appendix-C

Fixes #1889
Fixes #1894
Fixes #1895
Fixes #1947
Fixes #1948
Fixes #1949
Fixes #1950

PiperOrigin-RevId: 328365034
---
 pkg/tcpip/network/arp/BUILD           |   1 +
 pkg/tcpip/network/arp/arp.go          |  47 +-
 pkg/tcpip/network/arp/arp_test.go     | 331 +++++++++-
 pkg/tcpip/network/ip_test.go          |  14 +-
 pkg/tcpip/network/ipv4/ipv4.go        |   2 +-
 pkg/tcpip/network/ipv6/icmp.go        | 278 +++++---
 pkg/tcpip/network/ipv6/icmp_test.go   | 447 +++++++++----
 pkg/tcpip/network/ipv6/ipv6.go        |   4 +-
 pkg/tcpip/network/ipv6/ndp_test.go    | 875 ++++++++++++++++++--------
 pkg/tcpip/stack/forwarder_test.go     | 655 ++++++++++++-------
 pkg/tcpip/stack/linkaddrcache_test.go |  68 ++
 pkg/tcpip/stack/ndp_test.go           | 792 ++++++++++++-----------
 pkg/tcpip/stack/nic.go                |  94 ++-
 pkg/tcpip/stack/nic_test.go           |   2 +-
 pkg/tcpip/stack/nud_test.go           |  16 +-
 pkg/tcpip/stack/registration.go       |   4 +-
 pkg/tcpip/stack/route.go              |  19 +
 pkg/tcpip/stack/stack.go              | 100 ++-
 pkg/tcpip/stack/stack_test.go         |   2 +-
 pkg/tcpip/tcpip.go                    |   4 +
 pkg/tcpip/transport/udp/udp_test.go   |   4 +-
 21 files changed, 2611 insertions(+), 1148 deletions(-)

diff --git a/pkg/tcpip/network/arp/BUILD b/pkg/tcpip/network/arp/BUILD
index eddf7b7252..82c073e32e 100644
--- a/pkg/tcpip/network/arp/BUILD
+++ b/pkg/tcpip/network/arp/BUILD
@@ -28,5 +28,6 @@ go_test(
         "//pkg/tcpip/network/ipv4",
         "//pkg/tcpip/stack",
         "//pkg/tcpip/transport/icmp",
+        "@com_github_google_go_cmp//cmp:go_default_library",
     ],
 )
diff --git a/pkg/tcpip/network/arp/arp.go b/pkg/tcpip/network/arp/arp.go
index 920872c3f2..cbbe5b77f2 100644
--- a/pkg/tcpip/network/arp/arp.go
+++ b/pkg/tcpip/network/arp/arp.go
@@ -46,6 +46,7 @@ type endpoint struct {
 	nicID         tcpip.NICID
 	linkEP        stack.LinkEndpoint
 	linkAddrCache stack.LinkAddressCache
+	nud           stack.NUDHandler
 }
 
 // DefaultTTL is unused for ARP. It implements stack.NetworkEndpoint.
@@ -78,7 +79,7 @@ func (e *endpoint) WritePacket(*stack.Route, *stack.GSO, stack.NetworkHeaderPara
 
 // NetworkProtocolNumber implements stack.NetworkEndpoint.NetworkProtocolNumber.
 func (e *endpoint) NetworkProtocolNumber() tcpip.NetworkProtocolNumber {
-	return e.protocol.Number()
+	return ProtocolNumber
 }
 
 // WritePackets implements stack.NetworkEndpoint.WritePackets.
@@ -99,9 +100,25 @@ func (e *endpoint) HandlePacket(r *stack.Route, pkt *stack.PacketBuffer) {
 	switch h.Op() {
 	case header.ARPRequest:
 		localAddr := tcpip.Address(h.ProtocolAddressTarget())
-		if e.linkAddrCache.CheckLocalAddress(e.nicID, header.IPv4ProtocolNumber, localAddr) == 0 {
-			return // we have no useful answer, ignore the request
+
+		if e.nud == nil {
+			if e.linkAddrCache.CheckLocalAddress(e.nicID, header.IPv4ProtocolNumber, localAddr) == 0 {
+				return // we have no useful answer, ignore the request
+			}
+
+			addr := tcpip.Address(h.ProtocolAddressSender())
+			linkAddr := tcpip.LinkAddress(h.HardwareAddressSender())
+			e.linkAddrCache.AddLinkAddress(e.nicID, addr, linkAddr)
+		} else {
+			if r.Stack().CheckLocalAddress(e.nicID, header.IPv4ProtocolNumber, localAddr) == 0 {
+				return // we have no useful answer, ignore the request
+			}
+
+			remoteAddr := tcpip.Address(h.ProtocolAddressSender())
+			remoteLinkAddr := tcpip.LinkAddress(h.HardwareAddressSender())
+			e.nud.HandleProbe(remoteAddr, localAddr, ProtocolNumber, remoteLinkAddr, e.protocol)
 		}
+
 		pkt := stack.NewPacketBuffer(stack.PacketBufferOptions{
 			ReserveHeaderBytes: int(e.linkEP.MaxHeaderLength()) + header.ARPSize,
 		})
@@ -113,11 +130,28 @@ func (e *endpoint) HandlePacket(r *stack.Route, pkt *stack.PacketBuffer) {
 		copy(packet.HardwareAddressTarget(), h.HardwareAddressSender())
 		copy(packet.ProtocolAddressTarget(), h.ProtocolAddressSender())
 		_ = e.linkEP.WritePacket(r, nil /* gso */, ProtocolNumber, pkt)
-		fallthrough // also fill the cache from requests
+
 	case header.ARPReply:
 		addr := tcpip.Address(h.ProtocolAddressSender())
 		linkAddr := tcpip.LinkAddress(h.HardwareAddressSender())
-		e.linkAddrCache.AddLinkAddress(e.nicID, addr, linkAddr)
+
+		if e.nud == nil {
+			e.linkAddrCache.AddLinkAddress(e.nicID, addr, linkAddr)
+			return
+		}
+
+		// The solicited, override, and isRouter flags are not available for ARP;
+		// they are only available for IPv6 Neighbor Advertisements.
+		e.nud.HandleConfirmation(addr, linkAddr, stack.ReachabilityConfirmationFlags{
+			// Solicited and unsolicited (also referred to as gratuitous) ARP Replies
+			// are handled equivalently to a solicited Neighbor Advertisement.
+			Solicited: true,
+			// If a different link address is received than the one cached, the entry
+			// should always go to Stale.
+			Override: false,
+			// ARP does not distinguish between router and non-router hosts.
+			IsRouter: false,
+		})
 	}
 }
 
@@ -134,12 +168,13 @@ func (*protocol) ParseAddresses(v buffer.View) (src, dst tcpip.Address) {
 	return tcpip.Address(h.ProtocolAddressSender()), ProtocolAddress
 }
 
-func (p *protocol) NewEndpoint(nicID tcpip.NICID, linkAddrCache stack.LinkAddressCache, dispatcher stack.TransportDispatcher, sender stack.LinkEndpoint, st *stack.Stack) stack.NetworkEndpoint {
+func (p *protocol) NewEndpoint(nicID tcpip.NICID, linkAddrCache stack.LinkAddressCache, nud stack.NUDHandler, dispatcher stack.TransportDispatcher, sender stack.LinkEndpoint, st *stack.Stack) stack.NetworkEndpoint {
 	return &endpoint{
 		protocol:      p,
 		nicID:         nicID,
 		linkEP:        sender,
 		linkAddrCache: linkAddrCache,
+		nud:           nud,
 	}
 }
 
diff --git a/pkg/tcpip/network/arp/arp_test.go b/pkg/tcpip/network/arp/arp_test.go
index c2c3e6891e..9c9a859e38 100644
--- a/pkg/tcpip/network/arp/arp_test.go
+++ b/pkg/tcpip/network/arp/arp_test.go
@@ -16,10 +16,12 @@ package arp_test
 
 import (
 	"context"
+	"fmt"
 	"strconv"
 	"testing"
 	"time"
 
+	"github.com/google/go-cmp/cmp"
 	"gvisor.dev/gvisor/pkg/tcpip"
 	"gvisor.dev/gvisor/pkg/tcpip/buffer"
 	"gvisor.dev/gvisor/pkg/tcpip/header"
@@ -32,57 +34,192 @@ import (
 )
 
 const (
-	stackLinkAddr1 = tcpip.LinkAddress("\x0a\x0a\x0b\x0b\x0c\x0c")
-	stackLinkAddr2 = tcpip.LinkAddress("\x0b\x0b\x0c\x0c\x0d\x0d")
-	stackAddr1     = tcpip.Address("\x0a\x00\x00\x01")
-	stackAddr2     = tcpip.Address("\x0a\x00\x00\x02")
-	stackAddrBad   = tcpip.Address("\x0a\x00\x00\x03")
+	nicID = 1
+
+	stackAddr     = tcpip.Address("\x0a\x00\x00\x01")
+	stackLinkAddr = tcpip.LinkAddress("\x0a\x0a\x0b\x0b\x0c\x0c")
+
+	remoteAddr     = tcpip.Address("\x0a\x00\x00\x02")
+	remoteLinkAddr = tcpip.LinkAddress("\x01\x02\x03\x04\x05\x06")
+
+	unknownAddr = tcpip.Address("\x0a\x00\x00\x03")
 
 	defaultChannelSize = 1
 	defaultMTU         = 65536
+
+	// eventChanSize defines the size of event channels used by the neighbor
+	// cache's event dispatcher. The size chosen here needs to be sufficient to
+	// queue all the events received during tests before consumption.
+	// If eventChanSize is too small, the tests may deadlock.
+	eventChanSize = 32
+)
+
+type eventType uint8
+
+const (
+	entryAdded eventType = iota
+	entryChanged
+	entryRemoved
 )
 
+func (t eventType) String() string {
+	switch t {
+	case entryAdded:
+		return "add"
+	case entryChanged:
+		return "change"
+	case entryRemoved:
+		return "remove"
+	default:
+		return fmt.Sprintf("unknown (%d)", t)
+	}
+}
+
+type eventInfo struct {
+	eventType eventType
+	nicID     tcpip.NICID
+	addr      tcpip.Address
+	linkAddr  tcpip.LinkAddress
+	state     stack.NeighborState
+}
+
+func (e eventInfo) String() string {
+	return fmt.Sprintf("%s event for NIC #%d, addr=%q, linkAddr=%q, state=%q", e.eventType, e.nicID, e.addr, e.linkAddr, e.state)
+}
+
+// arpDispatcher implements NUDDispatcher to validate the dispatching of
+// events upon certain NUD state machine events.
+type arpDispatcher struct {
+	// C is where events are queued
+	C chan eventInfo
+}
+
+var _ stack.NUDDispatcher = (*arpDispatcher)(nil)
+
+func (d *arpDispatcher) OnNeighborAdded(nicID tcpip.NICID, addr tcpip.Address, linkAddr tcpip.LinkAddress, state stack.NeighborState, updatedAt time.Time) {
+	e := eventInfo{
+		eventType: entryAdded,
+		nicID:     nicID,
+		addr:      addr,
+		linkAddr:  linkAddr,
+		state:     state,
+	}
+	d.C <- e
+}
+
+func (d *arpDispatcher) OnNeighborChanged(nicID tcpip.NICID, addr tcpip.Address, linkAddr tcpip.LinkAddress, state stack.NeighborState, updatedAt time.Time) {
+	e := eventInfo{
+		eventType: entryChanged,
+		nicID:     nicID,
+		addr:      addr,
+		linkAddr:  linkAddr,
+		state:     state,
+	}
+	d.C <- e
+}
+
+func (d *arpDispatcher) OnNeighborRemoved(nicID tcpip.NICID, addr tcpip.Address, linkAddr tcpip.LinkAddress, state stack.NeighborState, updatedAt time.Time) {
+	e := eventInfo{
+		eventType: entryRemoved,
+		nicID:     nicID,
+		addr:      addr,
+		linkAddr:  linkAddr,
+		state:     state,
+	}
+	d.C <- e
+}
+
+func (d *arpDispatcher) waitForEvent(ctx context.Context, want eventInfo) error {
+	select {
+	case got := <-d.C:
+		if diff := cmp.Diff(got, want, cmp.AllowUnexported(got)); diff != "" {
+			return fmt.Errorf("got invalid event (-got +want):\n%s", diff)
+		}
+	case <-ctx.Done():
+		return fmt.Errorf("%s for %s", ctx.Err(), want)
+	}
+	return nil
+}
+
+func (d *arpDispatcher) waitForEventWithTimeout(want eventInfo, timeout time.Duration) error {
+	ctx, cancel := context.WithTimeout(context.Background(), timeout)
+	defer cancel()
+	return d.waitForEvent(ctx, want)
+}
+
+func (d *arpDispatcher) nextEvent() (eventInfo, bool) {
+	select {
+	case event := <-d.C:
+		return event, true
+	default:
+		return eventInfo{}, false
+	}
+}
+
 type testContext struct {
-	t      *testing.T
-	linkEP *channel.Endpoint
-	s      *stack.Stack
+	s       *stack.Stack
+	linkEP  *channel.Endpoint
+	nudDisp *arpDispatcher
 }
 
-func newTestContext(t *testing.T) *testContext {
+func newTestContext(t *testing.T, useNeighborCache bool) *testContext {
+	c := stack.DefaultNUDConfigurations()
+	// Transition from Reachable to Stale almost immediately to test if receiving
+	// probes refreshes positive reachability.
+	c.BaseReachableTime = time.Microsecond
+
+	d := arpDispatcher{
+		// Create an event channel large enough so the neighbor cache doesn't block
+		// while dispatching events. Blocking could interfere with the timing of
+		// NUD transitions.
+		C: make(chan eventInfo, eventChanSize),
+	}
+
 	s := stack.New(stack.Options{
 		NetworkProtocols:   []stack.NetworkProtocol{ipv4.NewProtocol(), arp.NewProtocol()},
 		TransportProtocols: []stack.TransportProtocol{icmp.NewProtocol4()},
+		NUDConfigs:         c,
+		NUDDisp:            &d,
+		UseNeighborCache:   useNeighborCache,
 	})
 
-	ep := channel.New(defaultChannelSize, defaultMTU, stackLinkAddr1)
+	ep := channel.New(defaultChannelSize, defaultMTU, stackLinkAddr)
+	ep.LinkEPCapabilities |= stack.CapabilityResolutionRequired
+
 	wep := stack.LinkEndpoint(ep)
 
 	if testing.Verbose() {
 		wep = sniffer.New(ep)
 	}
-	if err := s.CreateNIC(1, wep); err != nil {
+	if err := s.CreateNIC(nicID, wep); err != nil {
 		t.Fatalf("CreateNIC failed: %v", err)
 	}
 
-	if err := s.AddAddress(1, ipv4.ProtocolNumber, stackAddr1); err != nil {
+	if err := s.AddAddress(nicID, ipv4.ProtocolNumber, stackAddr); err != nil {
 		t.Fatalf("AddAddress for ipv4 failed: %v", err)
 	}
-	if err := s.AddAddress(1, ipv4.ProtocolNumber, stackAddr2); err != nil {
-		t.Fatalf("AddAddress for ipv4 failed: %v", err)
+	if !useNeighborCache {
+		// The remote address needs to be assigned to the NIC so we can receive and
+		// verify outgoing ARP packets. The neighbor cache isn't concerned with
+		// this; the tests that use linkAddrCache expect the ARP responses to be
+		// received by the same NIC.
+		if err := s.AddAddress(nicID, ipv4.ProtocolNumber, remoteAddr); err != nil {
+			t.Fatalf("AddAddress for ipv4 failed: %v", err)
+		}
 	}
-	if err := s.AddAddress(1, arp.ProtocolNumber, arp.ProtocolAddress); err != nil {
+	if err := s.AddAddress(nicID, arp.ProtocolNumber, arp.ProtocolAddress); err != nil {
 		t.Fatalf("AddAddress for arp failed: %v", err)
 	}
 
 	s.SetRouteTable([]tcpip.Route{{
 		Destination: header.IPv4EmptySubnet,
-		NIC:         1,
+		NIC:         nicID,
 	}})
 
 	return &testContext{
-		t:      t,
-		s:      s,
-		linkEP: ep,
+		s:       s,
+		linkEP:  ep,
+		nudDisp: &d,
 	}
 }
 
@@ -91,7 +228,7 @@ func (c *testContext) cleanup() {
 }
 
 func TestDirectRequest(t *testing.T) {
-	c := newTestContext(t)
+	c := newTestContext(t, false /* useNeighborCache */)
 	defer c.cleanup()
 
 	const senderMAC = "\x01\x02\x03\x04\x05\x06"
@@ -111,7 +248,7 @@ func TestDirectRequest(t *testing.T) {
 		}))
 	}
 
-	for i, address := range []tcpip.Address{stackAddr1, stackAddr2} {
+	for i, address := range []tcpip.Address{stackAddr, remoteAddr} {
 		t.Run(strconv.Itoa(i), func(t *testing.T) {
 			inject(address)
 			pi, _ := c.linkEP.ReadContext(context.Background())
@@ -122,7 +259,7 @@ func TestDirectRequest(t *testing.T) {
 			if !rep.IsValid() {
 				t.Fatalf("invalid ARP response: len = %d; response = %x", len(rep), rep)
 			}
-			if got, want := tcpip.LinkAddress(rep.HardwareAddressSender()), stackLinkAddr1; got != want {
+			if got, want := tcpip.LinkAddress(rep.HardwareAddressSender()), stackLinkAddr; got != want {
 				t.Errorf("got HardwareAddressSender = %s, want = %s", got, want)
 			}
 			if got, want := tcpip.Address(rep.ProtocolAddressSender()), tcpip.Address(h.ProtocolAddressTarget()); got != want {
@@ -137,7 +274,7 @@ func TestDirectRequest(t *testing.T) {
 		})
 	}
 
-	inject(stackAddrBad)
+	inject(unknownAddr)
 	// Sleep tests are gross, but this will only potentially flake
 	// if there's a bug. If there is no bug this will reliably
 	// succeed.
@@ -148,6 +285,144 @@ func TestDirectRequest(t *testing.T) {
 	}
 }
 
+func TestDirectRequestWithNeighborCache(t *testing.T) {
+	c := newTestContext(t, true /* useNeighborCache */)
+	defer c.cleanup()
+
+	tests := []struct {
+		name           string
+		senderAddr     tcpip.Address
+		senderLinkAddr tcpip.LinkAddress
+		targetAddr     tcpip.Address
+		isValid        bool
+	}{
+		{
+			name:           "Loopback",
+			senderAddr:     stackAddr,
+			senderLinkAddr: stackLinkAddr,
+			targetAddr:     stackAddr,
+			isValid:        true,
+		},
+		{
+			name:           "Remote",
+			senderAddr:     remoteAddr,
+			senderLinkAddr: remoteLinkAddr,
+			targetAddr:     stackAddr,
+			isValid:        true,
+		},
+		{
+			name:           "RemoteInvalidTarget",
+			senderAddr:     remoteAddr,
+			senderLinkAddr: remoteLinkAddr,
+			targetAddr:     unknownAddr,
+			isValid:        false,
+		},
+	}
+
+	for _, test := range tests {
+		t.Run(test.name, func(t *testing.T) {
+			// Inject an incoming ARP request.
+			v := make(buffer.View, header.ARPSize)
+			h := header.ARP(v)
+			h.SetIPv4OverEthernet()
+			h.SetOp(header.ARPRequest)
+			copy(h.HardwareAddressSender(), test.senderLinkAddr)
+			copy(h.ProtocolAddressSender(), test.senderAddr)
+			copy(h.ProtocolAddressTarget(), test.targetAddr)
+			c.linkEP.InjectInbound(arp.ProtocolNumber, &stack.PacketBuffer{
+				Data: v.ToVectorisedView(),
+			})
+
+			if !test.isValid {
+				// No packets should be sent after receiving an invalid ARP request.
+				// There is no need to perform a blocking read here, since packets are
+				// sent in the same function that handles ARP requests.
+				if pkt, ok := c.linkEP.Read(); ok {
+					t.Errorf("unexpected packet sent with network protocol number %d", pkt.Proto)
+				}
+				return
+			}
+
+			// Verify an ARP response was sent.
+			pi, ok := c.linkEP.Read()
+			if !ok {
+				t.Fatal("expected ARP response to be sent, got none")
+			}
+
+			if pi.Proto != arp.ProtocolNumber {
+				t.Fatalf("expected ARP response, got network protocol number %d", pi.Proto)
+			}
+			rep := header.ARP(pi.Pkt.NetworkHeader().View())
+			if !rep.IsValid() {
+				t.Fatalf("invalid ARP response: len = %d; response = %x", len(rep), rep)
+			}
+			if got, want := tcpip.LinkAddress(rep.HardwareAddressSender()), stackLinkAddr; got != want {
+				t.Errorf("got HardwareAddressSender() = %s, want = %s", got, want)
+			}
+			if got, want := tcpip.Address(rep.ProtocolAddressSender()), tcpip.Address(h.ProtocolAddressTarget()); got != want {
+				t.Errorf("got ProtocolAddressSender() = %s, want = %s", got, want)
+			}
+			if got, want := tcpip.LinkAddress(rep.HardwareAddressTarget()), tcpip.LinkAddress(h.HardwareAddressSender()); got != want {
+				t.Errorf("got HardwareAddressTarget() = %s, want = %s", got, want)
+			}
+			if got, want := tcpip.Address(rep.ProtocolAddressTarget()), tcpip.Address(h.ProtocolAddressSender()); got != want {
+				t.Errorf("got ProtocolAddressTarget() = %s, want = %s", got, want)
+			}
+
+			// Verify the sender was saved in the neighbor cache.
+			wantEvent := eventInfo{
+				eventType: entryAdded,
+				nicID:     nicID,
+				addr:      test.senderAddr,
+				linkAddr:  tcpip.LinkAddress(test.senderLinkAddr),
+				state:     stack.Stale,
+			}
+			if err := c.nudDisp.waitForEventWithTimeout(wantEvent, time.Second); err != nil {
+				t.Fatal(err)
+			}
+
+			neighbors, err := c.s.Neighbors(nicID)
+			if err != nil {
+				t.Fatalf("c.s.Neighbors(%d): %s", nicID, err)
+			}
+
+			neighborByAddr := make(map[tcpip.Address]stack.NeighborEntry)
+			for _, n := range neighbors {
+				if existing, ok := neighborByAddr[n.Addr]; ok {
+					if diff := cmp.Diff(existing, n); diff != "" {
+						t.Fatalf("duplicate neighbor entry found (-existing +got):\n%s", diff)
+					}
+					t.Fatalf("exact neighbor entry duplicate found for addr=%s", n.Addr)
+				}
+				neighborByAddr[n.Addr] = n
+			}
+
+			neigh, ok := neighborByAddr[test.senderAddr]
+			if !ok {
+				t.Fatalf("expected neighbor entry with Addr = %s", test.senderAddr)
+			}
+			if got, want := neigh.LinkAddr, test.senderLinkAddr; got != want {
+				t.Errorf("got neighbor LinkAddr = %s, want = %s", got, want)
+			}
+			if got, want := neigh.LocalAddr, stackAddr; got != want {
+				t.Errorf("got neighbor LocalAddr = %s, want = %s", got, want)
+			}
+			if got, want := neigh.State, stack.Stale; got != want {
+				t.Errorf("got neighbor State = %s, want = %s", got, want)
+			}
+
+			// No more events should be dispatched
+			for {
+				event, ok := c.nudDisp.nextEvent()
+				if !ok {
+					break
+				}
+				t.Errorf("unexpected %s", event)
+			}
+		})
+	}
+}
+
 func TestLinkAddressRequest(t *testing.T) {
 	tests := []struct {
 		name           string
@@ -156,8 +431,8 @@ func TestLinkAddressRequest(t *testing.T) {
 	}{
 		{
 			name:           "Unicast",
-			remoteLinkAddr: stackLinkAddr2,
-			expectLinkAddr: stackLinkAddr2,
+			remoteLinkAddr: remoteLinkAddr,
+			expectLinkAddr: remoteLinkAddr,
 		},
 		{
 			name:           "Multicast",
@@ -173,9 +448,9 @@ func TestLinkAddressRequest(t *testing.T) {
 			t.Fatal("expected ARP protocol to implement stack.LinkAddressResolver")
 		}
 
-		linkEP := channel.New(defaultChannelSize, defaultMTU, stackLinkAddr1)
-		if err := linkRes.LinkAddressRequest(stackAddr1, stackAddr2, test.remoteLinkAddr, linkEP); err != nil {
-			t.Errorf("got p.LinkAddressRequest(%s, %s, %s, _) = %s", stackAddr1, stackAddr2, test.remoteLinkAddr, err)
+		linkEP := channel.New(defaultChannelSize, defaultMTU, stackLinkAddr)
+		if err := linkRes.LinkAddressRequest(stackAddr, remoteAddr, test.remoteLinkAddr, linkEP); err != nil {
+			t.Errorf("got p.LinkAddressRequest(%s, %s, %s, _) = %s", stackAddr, remoteAddr, test.remoteLinkAddr, err)
 		}
 
 		pkt, ok := linkEP.Read()
diff --git a/pkg/tcpip/network/ip_test.go b/pkg/tcpip/network/ip_test.go
index 9007346fe0..e45dd17f89 100644
--- a/pkg/tcpip/network/ip_test.go
+++ b/pkg/tcpip/network/ip_test.go
@@ -250,7 +250,7 @@ func buildDummyStack(t *testing.T) *stack.Stack {
 func TestIPv4Send(t *testing.T) {
 	o := testObject{t: t, v4: true}
 	proto := ipv4.NewProtocol()
-	ep := proto.NewEndpoint(nicID, nil, nil, &o, buildDummyStack(t))
+	ep := proto.NewEndpoint(nicID, nil, nil, nil, &o, buildDummyStack(t))
 	defer ep.Close()
 
 	// Allocate and initialize the payload view.
@@ -287,7 +287,7 @@ func TestIPv4Send(t *testing.T) {
 func TestIPv4Receive(t *testing.T) {
 	o := testObject{t: t, v4: true}
 	proto := ipv4.NewProtocol()
-	ep := proto.NewEndpoint(nicID, nil, &o, nil, buildDummyStack(t))
+	ep := proto.NewEndpoint(nicID, nil, nil, &o, nil, buildDummyStack(t))
 	defer ep.Close()
 
 	totalLen := header.IPv4MinimumSize + 30
@@ -357,7 +357,7 @@ func TestIPv4ReceiveControl(t *testing.T) {
 		t.Run(c.name, func(t *testing.T) {
 			o := testObject{t: t}
 			proto := ipv4.NewProtocol()
-			ep := proto.NewEndpoint(nicID, nil, &o, nil, buildDummyStack(t))
+			ep := proto.NewEndpoint(nicID, nil, nil, &o, nil, buildDummyStack(t))
 			defer ep.Close()
 
 			const dataOffset = header.IPv4MinimumSize*2 + header.ICMPv4MinimumSize
@@ -418,7 +418,7 @@ func TestIPv4ReceiveControl(t *testing.T) {
 func TestIPv4FragmentationReceive(t *testing.T) {
 	o := testObject{t: t, v4: true}
 	proto := ipv4.NewProtocol()
-	ep := proto.NewEndpoint(nicID, nil, &o, nil, buildDummyStack(t))
+	ep := proto.NewEndpoint(nicID, nil, nil, &o, nil, buildDummyStack(t))
 	defer ep.Close()
 
 	totalLen := header.IPv4MinimumSize + 24
@@ -495,7 +495,7 @@ func TestIPv4FragmentationReceive(t *testing.T) {
 func TestIPv6Send(t *testing.T) {
 	o := testObject{t: t}
 	proto := ipv6.NewProtocol()
-	ep := proto.NewEndpoint(nicID, nil, &o, channel.New(0, 1280, ""), buildDummyStack(t))
+	ep := proto.NewEndpoint(nicID, nil, nil, &o, channel.New(0, 1280, ""), buildDummyStack(t))
 	defer ep.Close()
 
 	// Allocate and initialize the payload view.
@@ -532,7 +532,7 @@ func TestIPv6Send(t *testing.T) {
 func TestIPv6Receive(t *testing.T) {
 	o := testObject{t: t}
 	proto := ipv6.NewProtocol()
-	ep := proto.NewEndpoint(nicID, nil, &o, nil, buildDummyStack(t))
+	ep := proto.NewEndpoint(nicID, nil, nil, &o, nil, buildDummyStack(t))
 	defer ep.Close()
 
 	totalLen := header.IPv6MinimumSize + 30
@@ -611,7 +611,7 @@ func TestIPv6ReceiveControl(t *testing.T) {
 		t.Run(c.name, func(t *testing.T) {
 			o := testObject{t: t}
 			proto := ipv6.NewProtocol()
-			ep := proto.NewEndpoint(nicID, nil, &o, nil, buildDummyStack(t))
+			ep := proto.NewEndpoint(nicID, nil, nil, &o, nil, buildDummyStack(t))
 			defer ep.Close()
 
 			dataOffset := header.IPv6MinimumSize*2 + header.ICMPv6MinimumSize
diff --git a/pkg/tcpip/network/ipv4/ipv4.go b/pkg/tcpip/network/ipv4/ipv4.go
index 63ffb36608..55ca94268c 100644
--- a/pkg/tcpip/network/ipv4/ipv4.go
+++ b/pkg/tcpip/network/ipv4/ipv4.go
@@ -59,7 +59,7 @@ type endpoint struct {
 }
 
 // NewEndpoint creates a new ipv4 endpoint.
-func (p *protocol) NewEndpoint(nicID tcpip.NICID, linkAddrCache stack.LinkAddressCache, dispatcher stack.TransportDispatcher, linkEP stack.LinkEndpoint, st *stack.Stack) stack.NetworkEndpoint {
+func (p *protocol) NewEndpoint(nicID tcpip.NICID, _ stack.LinkAddressCache, _ stack.NUDHandler, dispatcher stack.TransportDispatcher, linkEP stack.LinkEndpoint, st *stack.Stack) stack.NetworkEndpoint {
 	return &endpoint{
 		nicID:      nicID,
 		linkEP:     linkEP,
diff --git a/pkg/tcpip/network/ipv6/icmp.go b/pkg/tcpip/network/ipv6/icmp.go
index 66d3a953a1..2b83c421e0 100644
--- a/pkg/tcpip/network/ipv6/icmp.go
+++ b/pkg/tcpip/network/ipv6/icmp.go
@@ -15,8 +15,6 @@
 package ipv6
 
 import (
-	"fmt"
-
 	"gvisor.dev/gvisor/pkg/tcpip"
 	"gvisor.dev/gvisor/pkg/tcpip/buffer"
 	"gvisor.dev/gvisor/pkg/tcpip/header"
@@ -71,6 +69,59 @@ func (e *endpoint) handleControl(typ stack.ControlType, extra uint32, pkt *stack
 	e.dispatcher.DeliverTransportControlPacket(src, hdr.DestinationAddress(), ProtocolNumber, p, typ, extra, pkt)
 }
 
+// getLinkAddrOption searches NDP options for a given link address option using
+// the provided getAddr function as a filter. Returns the link address if
+// found; otherwise, returns the zero link address value. Also returns true if
+// the options are valid as per the wire format, false otherwise.
+func getLinkAddrOption(it header.NDPOptionIterator, getAddr func(header.NDPOption) tcpip.LinkAddress) (tcpip.LinkAddress, bool) {
+	var linkAddr tcpip.LinkAddress
+	for {
+		opt, done, err := it.Next()
+		if err != nil {
+			return "", false
+		}
+		if done {
+			break
+		}
+		if addr := getAddr(opt); len(addr) != 0 {
+			// No RFCs define what to do when an NDP message has multiple Link-Layer
+			// Address options. Since no interface can have multiple link-layer
+			// addresses, we consider such messages invalid.
+			if len(linkAddr) != 0 {
+				return "", false
+			}
+			linkAddr = addr
+		}
+	}
+	return linkAddr, true
+}
+
+// getSourceLinkAddr searches NDP options for the source link address option.
+// Returns the link address if found; otherwise, returns the zero link address
+// value. Also returns true if the options are valid as per the wire format,
+// false otherwise.
+func getSourceLinkAddr(it header.NDPOptionIterator) (tcpip.LinkAddress, bool) {
+	return getLinkAddrOption(it, func(opt header.NDPOption) tcpip.LinkAddress {
+		if src, ok := opt.(header.NDPSourceLinkLayerAddressOption); ok {
+			return src.EthernetAddress()
+		}
+		return ""
+	})
+}
+
+// getTargetLinkAddr searches NDP options for the target link address option.
+// Returns the link address if found; otherwise, returns the zero link address
+// value. Also returns true if the options are valid as per the wire format,
+// false otherwise.
+func getTargetLinkAddr(it header.NDPOptionIterator) (tcpip.LinkAddress, bool) {
+	return getLinkAddrOption(it, func(opt header.NDPOption) tcpip.LinkAddress {
+		if dst, ok := opt.(header.NDPTargetLinkLayerAddressOption); ok {
+			return dst.EthernetAddress()
+		}
+		return ""
+	})
+}
+
 func (e *endpoint) handleICMP(r *stack.Route, pkt *stack.PacketBuffer, hasFragmentHeader bool) {
 	stats := r.Stats().ICMP
 	sent := stats.V6PacketsSent
@@ -137,7 +188,7 @@ func (e *endpoint) handleICMP(r *stack.Route, pkt *stack.PacketBuffer, hasFragme
 
 	case header.ICMPv6NeighborSolicit:
 		received.NeighborSolicit.Increment()
-		if pkt.Data.Size() < header.ICMPv6NeighborSolicitMinimumSize || !isNDPValid() {
+		if !isNDPValid() || pkt.Data.Size() < header.ICMPv6NeighborSolicitMinimumSize {
 			received.Invalid.Increment()
 			return
 		}
@@ -147,14 +198,15 @@ func (e *endpoint) handleICMP(r *stack.Route, pkt *stack.PacketBuffer, hasFragme
 		// NDP messages cannot be fragmented. Also note that in the common case NDP
 		// datagrams are very small and ToView() will not incur allocations.
 		ns := header.NDPNeighborSolicit(payload.ToView())
-		it, err := ns.Options().Iter(true)
-		if err != nil {
-			// If we have a malformed NDP NS option, drop the packet.
+		targetAddr := ns.TargetAddress()
+
+		// As per RFC 4861 section 4.3, the Target Address MUST NOT be a multicast
+		// address.
+		if header.IsV6MulticastAddress(targetAddr) {
 			received.Invalid.Increment()
 			return
 		}
 
-		targetAddr := ns.TargetAddress()
 		s := r.Stack()
 		if isTentative, err := s.IsAddrTentative(e.nicID, targetAddr); err != nil {
 			// We will only get an error if the NIC is unrecognized, which should not
@@ -187,39 +239,22 @@ func (e *endpoint) handleICMP(r *stack.Route, pkt *stack.PacketBuffer, hasFragme
 		// so the packet is processed as defined in RFC 4861, as per RFC 4862
 		// section 5.4.3.
 
-		// Is the NS targetting us?
-		if e.linkAddrCache.CheckLocalAddress(e.nicID, ProtocolNumber, targetAddr) == 0 {
+		// Is the NS targeting us?
+		if s.CheckLocalAddress(e.nicID, ProtocolNumber, targetAddr) == 0 {
 			return
 		}
 
-		// If the NS message contains the Source Link-Layer Address option, update
-		// the link address cache with the value of the option.
-		//
-		// TODO(b/148429853): Properly process the NS message and do Neighbor
-		// Unreachability Detection.
-		var sourceLinkAddr tcpip.LinkAddress
-		for {
-			opt, done, err := it.Next()
-			if err != nil {
-				// This should never happen as Iter(true) above did not return an error.
-				panic(fmt.Sprintf("unexpected error when iterating over NDP options: %s", err))
-			}
-			if done {
-				break
-			}
+		it, err := ns.Options().Iter(false /* check */)
+		if err != nil {
+			// Options are not valid as per the wire format, silently drop the packet.
+			received.Invalid.Increment()
+			return
+		}
 
-			switch opt := opt.(type) {
-			case header.NDPSourceLinkLayerAddressOption:
-				// No RFCs define what to do when an NS message has multiple Source
-				// Link-Layer Address options. Since no interface can have multiple
-				// link-layer addresses, we consider such messages invalid.
-				if len(sourceLinkAddr) != 0 {
-					received.Invalid.Increment()
-					return
-				}
-
-				sourceLinkAddr = opt.EthernetAddress()
-			}
+		sourceLinkAddr, ok := getSourceLinkAddr(it)
+		if !ok {
+			received.Invalid.Increment()
+			return
 		}
 
 		unspecifiedSource := r.RemoteAddress == header.IPv6Any
@@ -237,6 +272,8 @@ func (e *endpoint) handleICMP(r *stack.Route, pkt *stack.PacketBuffer, hasFragme
 		} else if unspecifiedSource {
 			received.Invalid.Increment()
 			return
+		} else if e.nud != nil {
+			e.nud.HandleProbe(r.RemoteAddress, r.LocalAddress, header.IPv6ProtocolNumber, sourceLinkAddr, e.protocol)
 		} else {
 			e.linkAddrCache.AddLinkAddress(e.nicID, r.RemoteAddress, sourceLinkAddr)
 		}
@@ -304,7 +341,7 @@ func (e *endpoint) handleICMP(r *stack.Route, pkt *stack.PacketBuffer, hasFragme
 
 	case header.ICMPv6NeighborAdvert:
 		received.NeighborAdvert.Increment()
-		if pkt.Data.Size() < header.ICMPv6NeighborAdvertSize || !isNDPValid() {
+		if !isNDPValid() || pkt.Data.Size() < header.ICMPv6NeighborAdvertSize {
 			received.Invalid.Increment()
 			return
 		}
@@ -314,17 +351,10 @@ func (e *endpoint) handleICMP(r *stack.Route, pkt *stack.PacketBuffer, hasFragme
 		// 5, NDP messages cannot be fragmented. Also note that in the common case
 		// NDP datagrams are very small and ToView() will not incur allocations.
 		na := header.NDPNeighborAdvert(payload.ToView())
-		it, err := na.Options().Iter(true)
-		if err != nil {
-			// If we have a malformed NDP NA option, drop the packet.
-			received.Invalid.Increment()
-			return
-		}
-
 		targetAddr := na.TargetAddress()
-		stack := r.Stack()
+		s := r.Stack()
 
-		if isTentative, err := stack.IsAddrTentative(e.nicID, targetAddr); err != nil {
+		if isTentative, err := s.IsAddrTentative(e.nicID, targetAddr); err != nil {
 			// We will only get an error if the NIC is unrecognized, which should not
 			// happen. For now short-circuit this packet.
 			//
@@ -335,7 +365,14 @@ func (e *endpoint) handleICMP(r *stack.Route, pkt *stack.PacketBuffer, hasFragme
 			// DAD on, implying the address is not unique. In this case we let the
 			// stack know so it can handle such a scenario and do nothing furthur with
 			// the NDP NA.
-			stack.DupTentativeAddrDetected(e.nicID, targetAddr)
+			s.DupTentativeAddrDetected(e.nicID, targetAddr)
+			return
+		}
+
+		it, err := na.Options().Iter(false /* check */)
+		if err != nil {
+			// If we have a malformed NDP NA option, drop the packet.
+			received.Invalid.Increment()
 			return
 		}
 
@@ -348,39 +385,25 @@ func (e *endpoint) handleICMP(r *stack.Route, pkt *stack.PacketBuffer, hasFragme
 		// TODO(b/143147598): Handle the scenario described above. Also inform the
 		// netstack integration that a duplicate address was detected outside of
 		// DAD.
+		targetLinkAddr, ok := getTargetLinkAddr(it)
+		if !ok {
+			received.Invalid.Increment()
+			return
+		}
 
 		// If the NA message has the target link layer option, update the link
 		// address cache with the link address for the target of the message.
-		//
-		// TODO(b/148429853): Properly process the NA message and do Neighbor
-		// Unreachability Detection.
-		var targetLinkAddr tcpip.LinkAddress
-		for {
-			opt, done, err := it.Next()
-			if err != nil {
-				// This should never happen as Iter(true) above did not return an error.
-				panic(fmt.Sprintf("unexpected error when iterating over NDP options: %s", err))
-			}
-			if done {
-				break
+		if len(targetLinkAddr) != 0 {
+			if e.nud == nil {
+				e.linkAddrCache.AddLinkAddress(e.nicID, targetAddr, targetLinkAddr)
+				return
 			}
 
-			switch opt := opt.(type) {
-			case header.NDPTargetLinkLayerAddressOption:
-				// No RFCs define what to do when an NA message has multiple Target
-				// Link-Layer Address options. Since no interface can have multiple
-				// link-layer addresses, we consider such messages invalid.
-				if len(targetLinkAddr) != 0 {
-					received.Invalid.Increment()
-					return
-				}
-
-				targetLinkAddr = opt.EthernetAddress()
-			}
-		}
-
-		if len(targetLinkAddr) != 0 {
-			e.linkAddrCache.AddLinkAddress(e.nicID, targetAddr, targetLinkAddr)
+			e.nud.HandleConfirmation(targetAddr, targetLinkAddr, stack.ReachabilityConfirmationFlags{
+				Solicited: na.SolicitedFlag(),
+				Override:  na.OverrideFlag(),
+				IsRouter:  na.RouterFlag(),
+			})
 		}
 
 	case header.ICMPv6EchoRequest:
@@ -440,27 +463,75 @@ func (e *endpoint) handleICMP(r *stack.Route, pkt *stack.PacketBuffer, hasFragme
 
 	case header.ICMPv6RouterSolicit:
 		received.RouterSolicit.Increment()
-		if !isNDPValid() {
+
+		//
+		// Validate the RS as per RFC 4861 section 6.1.1.
+		//
+
+		// Is the NDP payload of sufficient size to hold a Router Solictation?
+		if !isNDPValid() || pkt.Data.Size()-header.ICMPv6HeaderSize < header.NDPRSMinimumSize {
 			received.Invalid.Increment()
 			return
 		}
 
-	case header.ICMPv6RouterAdvert:
-		received.RouterAdvert.Increment()
+		stack := r.Stack()
 
-		// Is the NDP payload of sufficient size to hold a Router
-		// Advertisement?
-		if pkt.Data.Size()-header.ICMPv6HeaderSize < header.NDPRAMinimumSize || !isNDPValid() {
+		// Is the networking stack operating as a router?
+		if !stack.Forwarding() {
+			// ... No, silently drop the packet.
+			received.RouterOnlyPacketsDroppedByHost.Increment()
+			return
+		}
+
+		// Note that in the common case NDP datagrams are very small and ToView()
+		// will not incur allocations.
+		rs := header.NDPRouterSolicit(payload.ToView())
+		it, err := rs.Options().Iter(false /* check */)
+		if err != nil {
+			// Options are not valid as per the wire format, silently drop the packet.
 			received.Invalid.Increment()
 			return
 		}
 
-		routerAddr := iph.SourceAddress()
+		sourceLinkAddr, ok := getSourceLinkAddr(it)
+		if !ok {
+			received.Invalid.Increment()
+			return
+		}
+
+		// If the RS message has the source link layer option, update the link
+		// address cache with the link address for the source of the message.
+		if len(sourceLinkAddr) != 0 {
+			// As per RFC 4861 section 4.1, the Source Link-Layer Address Option MUST
+			// NOT be included when the source IP address is the unspecified address.
+			// Otherwise, it SHOULD be included on link layers that have addresses.
+			if r.RemoteAddress == header.IPv6Any {
+				received.Invalid.Increment()
+				return
+			}
+
+			if e.nud != nil {
+				// A RS with a specified source IP address modifies the NUD state
+				// machine in the same way a reachability probe would.
+				e.nud.HandleProbe(r.RemoteAddress, r.LocalAddress, header.IPv6ProtocolNumber, sourceLinkAddr, e.protocol)
+			}
+		}
+
+	case header.ICMPv6RouterAdvert:
+		received.RouterAdvert.Increment()
 
 		//
 		// Validate the RA as per RFC 4861 section 6.1.2.
 		//
 
+		// Is the NDP payload of sufficient size to hold a Router Advertisement?
+		if !isNDPValid() || pkt.Data.Size()-header.ICMPv6HeaderSize < header.NDPRAMinimumSize {
+			received.Invalid.Increment()
+			return
+		}
+
+		routerAddr := iph.SourceAddress()
+
 		// Is the IP Source Address a link-local address?
 		if !header.IsV6LinkLocalAddress(routerAddr) {
 			// ...No, silently drop the packet.
@@ -468,16 +539,18 @@ func (e *endpoint) handleICMP(r *stack.Route, pkt *stack.PacketBuffer, hasFragme
 			return
 		}
 
-		// The remainder of payload must be only the router advertisement, so
-		// payload.ToView() always returns the advertisement. Per RFC 6980 section
-		// 5, NDP messages cannot be fragmented. Also note that in the common case
-		// NDP datagrams are very small and ToView() will not incur allocations.
+		// Note that in the common case NDP datagrams are very small and ToView()
+		// will not incur allocations.
 		ra := header.NDPRouterAdvert(payload.ToView())
-		opts := ra.Options()
+		it, err := ra.Options().Iter(false /* check */)
+		if err != nil {
+			// Options are not valid as per the wire format, silently drop the packet.
+			received.Invalid.Increment()
+			return
+		}
 
-		// Are options valid as per the wire format?
-		if _, err := opts.Iter(true); err != nil {
-			// ...No, silently drop the packet.
+		sourceLinkAddr, ok := getSourceLinkAddr(it)
+		if !ok {
 			received.Invalid.Increment()
 			return
 		}
@@ -487,12 +560,33 @@ func (e *endpoint) handleICMP(r *stack.Route, pkt *stack.PacketBuffer, hasFragme
 		// as RFC 4861 section 6.1.2 is concerned.
 		//
 
+		// If the RA has the source link layer option, update the link address
+		// cache with the link address for the advertised router.
+		if len(sourceLinkAddr) != 0 && e.nud != nil {
+			e.nud.HandleProbe(routerAddr, r.LocalAddress, header.IPv6ProtocolNumber, sourceLinkAddr, e.protocol)
+		}
+
 		// Tell the NIC to handle the RA.
 		stack := r.Stack()
-		rxNICID := r.NICID()
-		stack.HandleNDPRA(rxNICID, routerAddr, ra)
+		stack.HandleNDPRA(e.nicID, routerAddr, ra)
 
 	case header.ICMPv6RedirectMsg:
+		// TODO(gvisor.dev/issue/2285): Call `e.nud.HandleProbe` after validating
+		// this redirect message, as per RFC 4871 section 7.3.3:
+		//
+		//    "A Neighbor Cache entry enters the STALE state when created as a
+		//    result of receiving packets other than solicited Neighbor
+		//    Advertisements (i.e., Router Solicitations, Router Advertisements,
+		//    Redirects, and Neighbor Solicitations).  These packets contain the
+		//    link-layer address of either the sender or, in the case of Redirect,
+		//    the redirection target.  However, receipt of these link-layer
+		//    addresses does not confirm reachability of the forward-direction path
+		//    to that node.  Placing a newly created Neighbor Cache entry for which
+		//    the link-layer address is known in the STALE state provides assurance
+		//    that path failures are detected quickly. In addition, should a cached
+		//    link-layer address be modified due to receiving one of the above
+		//    messages, the state SHOULD also be set to STALE to provide prompt
+		//    verification that the path to the new link-layer address is working."
 		received.RedirectMsg.Increment()
 		if !isNDPValid() {
 			received.Invalid.Increment()
diff --git a/pkg/tcpip/network/ipv6/icmp_test.go b/pkg/tcpip/network/ipv6/icmp_test.go
index 9e4eeea77f..8112ed0518 100644
--- a/pkg/tcpip/network/ipv6/icmp_test.go
+++ b/pkg/tcpip/network/ipv6/icmp_test.go
@@ -31,6 +31,8 @@ import (
 )
 
 const (
+	nicID = 1
+
 	linkAddr0 = tcpip.LinkAddress("\x02\x02\x03\x04\x05\x06")
 	linkAddr1 = tcpip.LinkAddress("\x0a\x0b\x0c\x0d\x0e\x0e")
 	linkAddr2 = tcpip.LinkAddress("\x0a\x0b\x0c\x0d\x0e\x0f")
@@ -49,7 +51,10 @@ type stubLinkEndpoint struct {
 }
 
 func (*stubLinkEndpoint) Capabilities() stack.LinkEndpointCapabilities {
-	return 0
+	// Indicate that resolution for link layer addresses is required to send
+	// packets over this link. This is needed so the NIC knows to allocate a
+	// neighbor table.
+	return stack.CapabilityResolutionRequired
 }
 
 func (*stubLinkEndpoint) MaxHeaderLength() uint16 {
@@ -84,16 +89,184 @@ func (*stubLinkAddressCache) CheckLocalAddress(tcpip.NICID, tcpip.NetworkProtoco
 func (*stubLinkAddressCache) AddLinkAddress(tcpip.NICID, tcpip.Address, tcpip.LinkAddress) {
 }
 
+type stubNUDHandler struct{}
+
+var _ stack.NUDHandler = (*stubNUDHandler)(nil)
+
+func (*stubNUDHandler) HandleProbe(remoteAddr, localAddr tcpip.Address, protocol tcpip.NetworkProtocolNumber, remoteLinkAddr tcpip.LinkAddress, linkRes stack.LinkAddressResolver) {
+}
+
+func (*stubNUDHandler) HandleConfirmation(addr tcpip.Address, linkAddr tcpip.LinkAddress, flags stack.ReachabilityConfirmationFlags) {
+}
+
+func (*stubNUDHandler) HandleUpperLevelConfirmation(addr tcpip.Address) {
+}
+
 func TestICMPCounts(t *testing.T) {
+	tests := []struct {
+		name             string
+		useNeighborCache bool
+	}{
+		{
+			name:             "linkAddrCache",
+			useNeighborCache: false,
+		},
+		{
+			name:             "neighborCache",
+			useNeighborCache: true,
+		},
+	}
+
+	for _, test := range tests {
+		t.Run(test.name, func(t *testing.T) {
+			s := stack.New(stack.Options{
+				NetworkProtocols:   []stack.NetworkProtocol{NewProtocol()},
+				TransportProtocols: []stack.TransportProtocol{icmp.NewProtocol6()},
+				UseNeighborCache:   test.useNeighborCache,
+			})
+			{
+				if err := s.CreateNIC(nicID, &stubLinkEndpoint{}); err != nil {
+					t.Fatalf("CreateNIC(_, _) = %s", err)
+				}
+				if err := s.AddAddress(nicID, ProtocolNumber, lladdr0); err != nil {
+					t.Fatalf("AddAddress(_, %d, %s) = %s", ProtocolNumber, lladdr0, err)
+				}
+			}
+			{
+				subnet, err := tcpip.NewSubnet(lladdr1, tcpip.AddressMask(strings.Repeat("\xff", len(lladdr1))))
+				if err != nil {
+					t.Fatal(err)
+				}
+				s.SetRouteTable(
+					[]tcpip.Route{{
+						Destination: subnet,
+						NIC:         nicID,
+					}},
+				)
+			}
+
+			netProto := s.NetworkProtocolInstance(ProtocolNumber)
+			if netProto == nil {
+				t.Fatalf("cannot find protocol instance for network protocol %d", ProtocolNumber)
+			}
+			ep := netProto.NewEndpoint(0, &stubLinkAddressCache{}, &stubNUDHandler{}, &stubDispatcher{}, nil, s)
+			defer ep.Close()
+
+			r, err := s.FindRoute(nicID, lladdr0, lladdr1, ProtocolNumber, false /* multicastLoop */)
+			if err != nil {
+				t.Fatalf("FindRoute(%d, %s, %s, _, false) = (_, %s), want = (_, nil)", nicID, lladdr0, lladdr1, err)
+			}
+			defer r.Release()
+
+			var tllData [header.NDPLinkLayerAddressSize]byte
+			header.NDPOptions(tllData[:]).Serialize(header.NDPOptionsSerializer{
+				header.NDPTargetLinkLayerAddressOption(linkAddr1),
+			})
+
+			types := []struct {
+				typ       header.ICMPv6Type
+				size      int
+				extraData []byte
+			}{
+				{
+					typ:  header.ICMPv6DstUnreachable,
+					size: header.ICMPv6DstUnreachableMinimumSize,
+				},
+				{
+					typ:  header.ICMPv6PacketTooBig,
+					size: header.ICMPv6PacketTooBigMinimumSize,
+				},
+				{
+					typ:  header.ICMPv6TimeExceeded,
+					size: header.ICMPv6MinimumSize,
+				},
+				{
+					typ:  header.ICMPv6ParamProblem,
+					size: header.ICMPv6MinimumSize,
+				},
+				{
+					typ:  header.ICMPv6EchoRequest,
+					size: header.ICMPv6EchoMinimumSize,
+				},
+				{
+					typ:  header.ICMPv6EchoReply,
+					size: header.ICMPv6EchoMinimumSize,
+				},
+				{
+					typ:  header.ICMPv6RouterSolicit,
+					size: header.ICMPv6MinimumSize,
+				},
+				{
+					typ:  header.ICMPv6RouterAdvert,
+					size: header.ICMPv6HeaderSize + header.NDPRAMinimumSize,
+				},
+				{
+					typ:  header.ICMPv6NeighborSolicit,
+					size: header.ICMPv6NeighborSolicitMinimumSize,
+				},
+				{
+					typ:       header.ICMPv6NeighborAdvert,
+					size:      header.ICMPv6NeighborAdvertMinimumSize,
+					extraData: tllData[:],
+				},
+				{
+					typ:  header.ICMPv6RedirectMsg,
+					size: header.ICMPv6MinimumSize,
+				},
+			}
+
+			handleIPv6Payload := func(icmp header.ICMPv6) {
+				pkt := stack.NewPacketBuffer(stack.PacketBufferOptions{
+					ReserveHeaderBytes: header.IPv6MinimumSize,
+					Data:               buffer.View(icmp).ToVectorisedView(),
+				})
+				ip := header.IPv6(pkt.NetworkHeader().Push(header.IPv6MinimumSize))
+				ip.Encode(&header.IPv6Fields{
+					PayloadLength: uint16(len(icmp)),
+					NextHeader:    uint8(header.ICMPv6ProtocolNumber),
+					HopLimit:      header.NDPHopLimit,
+					SrcAddr:       r.LocalAddress,
+					DstAddr:       r.RemoteAddress,
+				})
+				ep.HandlePacket(&r, pkt)
+			}
+
+			for _, typ := range types {
+				icmp := header.ICMPv6(buffer.NewView(typ.size + len(typ.extraData)))
+				copy(icmp[typ.size:], typ.extraData)
+				icmp.SetType(typ.typ)
+				icmp.SetChecksum(header.ICMPv6Checksum(icmp[:typ.size], r.LocalAddress, r.RemoteAddress, buffer.View(typ.extraData).ToVectorisedView()))
+				handleIPv6Payload(icmp)
+			}
+
+			// Construct an empty ICMP packet so that
+			// Stats().ICMP.ICMPv6ReceivedPacketStats.Invalid is incremented.
+			handleIPv6Payload(header.ICMPv6(buffer.NewView(header.IPv6MinimumSize)))
+
+			icmpv6Stats := s.Stats().ICMP.V6PacketsReceived
+			visitStats(reflect.ValueOf(&icmpv6Stats).Elem(), func(name string, s *tcpip.StatCounter) {
+				if got, want := s.Value(), uint64(1); got != want {
+					t.Errorf("got %s = %d, want = %d", name, got, want)
+				}
+			})
+			if t.Failed() {
+				t.Logf("stats:\n%+v", s.Stats())
+			}
+		})
+	}
+}
+
+func TestICMPCountsWithNeighborCache(t *testing.T) {
 	s := stack.New(stack.Options{
 		NetworkProtocols:   []stack.NetworkProtocol{NewProtocol()},
 		TransportProtocols: []stack.TransportProtocol{icmp.NewProtocol6()},
+		UseNeighborCache:   true,
 	})
 	{
-		if err := s.CreateNIC(1, &stubLinkEndpoint{}); err != nil {
-			t.Fatalf("CreateNIC(_) = %s", err)
+		if err := s.CreateNIC(nicID, &stubLinkEndpoint{}); err != nil {
+			t.Fatalf("CreateNIC(_, _) = %s", err)
 		}
-		if err := s.AddAddress(1, ProtocolNumber, lladdr0); err != nil {
+		if err := s.AddAddress(nicID, ProtocolNumber, lladdr0); err != nil {
 			t.Fatalf("AddAddress(_, %d, %s) = %s", ProtocolNumber, lladdr0, err)
 		}
 	}
@@ -105,7 +278,7 @@ func TestICMPCounts(t *testing.T) {
 		s.SetRouteTable(
 			[]tcpip.Route{{
 				Destination: subnet,
-				NIC:         1,
+				NIC:         nicID,
 			}},
 		)
 	}
@@ -114,12 +287,12 @@ func TestICMPCounts(t *testing.T) {
 	if netProto == nil {
 		t.Fatalf("cannot find protocol instance for network protocol %d", ProtocolNumber)
 	}
-	ep := netProto.NewEndpoint(0, &stubLinkAddressCache{}, &stubDispatcher{}, nil, s)
+	ep := netProto.NewEndpoint(0, nil, &stubNUDHandler{}, &stubDispatcher{}, nil, s)
 	defer ep.Close()
 
-	r, err := s.FindRoute(1, lladdr0, lladdr1, ProtocolNumber, false /* multicastLoop */)
+	r, err := s.FindRoute(nicID, lladdr0, lladdr1, ProtocolNumber, false /* multicastLoop */)
 	if err != nil {
-		t.Fatalf("FindRoute(_) = _, %s, want = _, nil", err)
+		t.Fatalf("FindRoute(%d, %s, %s, _, false) = (_, %s), want = (_, nil)", nicID, lladdr0, lladdr1, err)
 	}
 	defer r.Release()
 
@@ -265,19 +438,19 @@ func newTestContext(t *testing.T) *testContext {
 	if testing.Verbose() {
 		wrappedEP0 = sniffer.New(wrappedEP0)
 	}
-	if err := c.s0.CreateNIC(1, wrappedEP0); err != nil {
+	if err := c.s0.CreateNIC(nicID, wrappedEP0); err != nil {
 		t.Fatalf("CreateNIC s0: %v", err)
 	}
-	if err := c.s0.AddAddress(1, ProtocolNumber, lladdr0); err != nil {
+	if err := c.s0.AddAddress(nicID, ProtocolNumber, lladdr0); err != nil {
 		t.Fatalf("AddAddress lladdr0: %v", err)
 	}
 
 	c.linkEP1 = channel.New(defaultChannelSize, defaultMTU, linkAddr1)
 	wrappedEP1 := stack.LinkEndpoint(endpointWithResolutionCapability{LinkEndpoint: c.linkEP1})
-	if err := c.s1.CreateNIC(1, wrappedEP1); err != nil {
+	if err := c.s1.CreateNIC(nicID, wrappedEP1); err != nil {
 		t.Fatalf("CreateNIC failed: %v", err)
 	}
-	if err := c.s1.AddAddress(1, ProtocolNumber, lladdr1); err != nil {
+	if err := c.s1.AddAddress(nicID, ProtocolNumber, lladdr1); err != nil {
 		t.Fatalf("AddAddress lladdr1: %v", err)
 	}
 
@@ -288,7 +461,7 @@ func newTestContext(t *testing.T) *testContext {
 	c.s0.SetRouteTable(
 		[]tcpip.Route{{
 			Destination: subnet0,
-			NIC:         1,
+			NIC:         nicID,
 		}},
 	)
 	subnet1, err := tcpip.NewSubnet(lladdr0, tcpip.AddressMask(strings.Repeat("\xff", len(lladdr0))))
@@ -298,7 +471,7 @@ func newTestContext(t *testing.T) *testContext {
 	c.s1.SetRouteTable(
 		[]tcpip.Route{{
 			Destination: subnet1,
-			NIC:         1,
+			NIC:         nicID,
 		}},
 	)
 
@@ -359,9 +532,9 @@ func TestLinkResolution(t *testing.T) {
 	c := newTestContext(t)
 	defer c.cleanup()
 
-	r, err := c.s0.FindRoute(1, lladdr0, lladdr1, ProtocolNumber, false /* multicastLoop */)
+	r, err := c.s0.FindRoute(nicID, lladdr0, lladdr1, ProtocolNumber, false /* multicastLoop */)
 	if err != nil {
-		t.Fatalf("FindRoute(_) = _, %s, want = _, nil", err)
+		t.Fatalf("FindRoute(%d, %s, %s, _, false) = (_, %s), want = (_, nil)", nicID, lladdr0, lladdr1, err)
 	}
 	defer r.Release()
 
@@ -376,14 +549,14 @@ func TestLinkResolution(t *testing.T) {
 	var wq waiter.Queue
 	ep, err := c.s0.NewEndpoint(header.ICMPv6ProtocolNumber, ProtocolNumber, &wq)
 	if err != nil {
-		t.Fatalf("NewEndpoint(_) = _, %s, want = _, nil", err)
+		t.Fatalf("NewEndpoint(_) = (_, %s), want = (_, nil)", err)
 	}
 
 	for {
-		_, resCh, err := ep.Write(payload, tcpip.WriteOptions{To: &tcpip.FullAddress{NIC: 1, Addr: lladdr1}})
+		_, resCh, err := ep.Write(payload, tcpip.WriteOptions{To: &tcpip.FullAddress{NIC: nicID, Addr: lladdr1}})
 		if resCh != nil {
 			if err != tcpip.ErrNoLinkAddress {
-				t.Fatalf("ep.Write(_) = _, <non-nil>, %s, want = _, <non-nil>, tcpip.ErrNoLinkAddress", err)
+				t.Fatalf("ep.Write(_) = (_, <non-nil>, %s), want = (_, <non-nil>, tcpip.ErrNoLinkAddress)", err)
 			}
 			for _, args := range []routeArgs{
 				{src: c.linkEP0, dst: c.linkEP1, typ: header.ICMPv6NeighborSolicit, remoteLinkAddr: header.EthernetAddressFromMulticastIPv6Address(header.SolicitedNodeAddr(lladdr1))},
@@ -399,7 +572,7 @@ func TestLinkResolution(t *testing.T) {
 			continue
 		}
 		if err != nil {
-			t.Fatalf("ep.Write(_) = _, _, %s", err)
+			t.Fatalf("ep.Write(_) = (_, _, %s)", err)
 		}
 		break
 	}
@@ -424,6 +597,7 @@ func TestICMPChecksumValidationSimple(t *testing.T) {
 		size        int
 		extraData   []byte
 		statCounter func(tcpip.ICMPv6ReceivedPacketStats) *tcpip.StatCounter
+		routerOnly  bool
 	}{
 		{
 			name: "DstUnreachable",
@@ -480,6 +654,8 @@ func TestICMPChecksumValidationSimple(t *testing.T) {
 			statCounter: func(stats tcpip.ICMPv6ReceivedPacketStats) *tcpip.StatCounter {
 				return stats.RouterSolicit
 			},
+			// Hosts MUST silently discard any received Router Solicitation messages.
+			routerOnly: true,
 		},
 		{
 			name: "RouterAdvert",
@@ -516,84 +692,133 @@ func TestICMPChecksumValidationSimple(t *testing.T) {
 		},
 	}
 
-	for _, typ := range types {
-		t.Run(typ.name, func(t *testing.T) {
-			e := channel.New(10, 1280, linkAddr0)
-			s := stack.New(stack.Options{
-				NetworkProtocols: []stack.NetworkProtocol{NewProtocol()},
-			})
-			if err := s.CreateNIC(1, e); err != nil {
-				t.Fatalf("CreateNIC(_) = %s", err)
-			}
-
-			if err := s.AddAddress(1, ProtocolNumber, lladdr0); err != nil {
-				t.Fatalf("AddAddress(_, %d, %s) = %s", ProtocolNumber, lladdr0, err)
-			}
-			{
-				subnet, err := tcpip.NewSubnet(lladdr1, tcpip.AddressMask(strings.Repeat("\xff", len(lladdr1))))
-				if err != nil {
-					t.Fatal(err)
-				}
-				s.SetRouteTable(
-					[]tcpip.Route{{
-						Destination: subnet,
-						NIC:         1,
-					}},
-				)
-			}
+	tests := []struct {
+		name             string
+		useNeighborCache bool
+	}{
+		{
+			name:             "linkAddrCache",
+			useNeighborCache: false,
+		},
+		{
+			name:             "neighborCache",
+			useNeighborCache: true,
+		},
+	}
 
-			handleIPv6Payload := func(checksum bool) {
-				icmp := header.ICMPv6(buffer.NewView(typ.size + len(typ.extraData)))
-				copy(icmp[typ.size:], typ.extraData)
-				icmp.SetType(typ.typ)
-				if checksum {
-					icmp.SetChecksum(header.ICMPv6Checksum(icmp, lladdr1, lladdr0, buffer.View{}.ToVectorisedView()))
+	for _, test := range tests {
+		t.Run(test.name, func(t *testing.T) {
+			for _, typ := range types {
+				for _, isRouter := range []bool{false, true} {
+					name := typ.name
+					if isRouter {
+						name += " (Router)"
+					}
+					t.Run(name, func(t *testing.T) {
+						e := channel.New(0, 1280, linkAddr0)
+
+						// Indicate that resolution for link layer addresses is required to
+						// send packets over this link. This is needed so the NIC knows to
+						// allocate a neighbor table.
+						e.LinkEPCapabilities |= stack.CapabilityResolutionRequired
+
+						s := stack.New(stack.Options{
+							NetworkProtocols: []stack.NetworkProtocol{NewProtocol()},
+							UseNeighborCache: test.useNeighborCache,
+						})
+						if isRouter {
+							// Enabling forwarding makes the stack act as a router.
+							s.SetForwarding(true)
+						}
+						if err := s.CreateNIC(nicID, e); err != nil {
+							t.Fatalf("CreateNIC(_, _) = %s", err)
+						}
+
+						if err := s.AddAddress(nicID, ProtocolNumber, lladdr0); err != nil {
+							t.Fatalf("AddAddress(_, %d, %s) = %s", ProtocolNumber, lladdr0, err)
+						}
+						{
+							subnet, err := tcpip.NewSubnet(lladdr1, tcpip.AddressMask(strings.Repeat("\xff", len(lladdr1))))
+							if err != nil {
+								t.Fatal(err)
+							}
+							s.SetRouteTable(
+								[]tcpip.Route{{
+									Destination: subnet,
+									NIC:         nicID,
+								}},
+							)
+						}
+
+						handleIPv6Payload := func(checksum bool) {
+							icmp := header.ICMPv6(buffer.NewView(typ.size + len(typ.extraData)))
+							copy(icmp[typ.size:], typ.extraData)
+							icmp.SetType(typ.typ)
+							if checksum {
+								icmp.SetChecksum(header.ICMPv6Checksum(icmp, lladdr1, lladdr0, buffer.View{}.ToVectorisedView()))
+							}
+							ip := header.IPv6(buffer.NewView(header.IPv6MinimumSize))
+							ip.Encode(&header.IPv6Fields{
+								PayloadLength: uint16(len(icmp)),
+								NextHeader:    uint8(header.ICMPv6ProtocolNumber),
+								HopLimit:      header.NDPHopLimit,
+								SrcAddr:       lladdr1,
+								DstAddr:       lladdr0,
+							})
+							pkt := stack.NewPacketBuffer(stack.PacketBufferOptions{
+								Data: buffer.NewVectorisedView(len(ip)+len(icmp), []buffer.View{buffer.View(ip), buffer.View(icmp)}),
+							})
+							e.InjectInbound(ProtocolNumber, pkt)
+						}
+
+						stats := s.Stats().ICMP.V6PacketsReceived
+						invalid := stats.Invalid
+						routerOnly := stats.RouterOnlyPacketsDroppedByHost
+						typStat := typ.statCounter(stats)
+
+						// Initial stat counts should be 0.
+						if got := invalid.Value(); got != 0 {
+							t.Fatalf("got invalid = %d, want = 0", got)
+						}
+						if got := routerOnly.Value(); got != 0 {
+							t.Fatalf("got RouterOnlyPacketsReceivedByHost = %d, want = 0", got)
+						}
+						if got := typStat.Value(); got != 0 {
+							t.Fatalf("got %s = %d, want = 0", typ.name, got)
+						}
+
+						// Without setting checksum, the incoming packet should
+						// be invalid.
+						handleIPv6Payload(false)
+						if got := invalid.Value(); got != 1 {
+							t.Fatalf("got invalid = %d, want = 1", got)
+						}
+						// Router only count should not have increased.
+						if got := routerOnly.Value(); got != 0 {
+							t.Fatalf("got RouterOnlyPacketsReceivedByHost = %d, want = 0", got)
+						}
+						// Rx count of type typ.typ should not have increased.
+						if got := typStat.Value(); got != 0 {
+							t.Fatalf("got %s = %d, want = 0", typ.name, got)
+						}
+
+						// When checksum is set, it should be received.
+						handleIPv6Payload(true)
+						if got := typStat.Value(); got != 1 {
+							t.Fatalf("got %s = %d, want = 1", typ.name, got)
+						}
+						// Invalid count should not have increased again.
+						if got := invalid.Value(); got != 1 {
+							t.Fatalf("got invalid = %d, want = 1", got)
+						}
+						if !isRouter && typ.routerOnly && test.useNeighborCache {
+							// Router only count should have increased.
+							if got := routerOnly.Value(); got != 1 {
+								t.Fatalf("got RouterOnlyPacketsReceivedByHost = %d, want = 1", got)
+							}
+						}
+					})
 				}
-				ip := header.IPv6(buffer.NewView(header.IPv6MinimumSize))
-				ip.Encode(&header.IPv6Fields{
-					PayloadLength: uint16(len(icmp)),
-					NextHeader:    uint8(header.ICMPv6ProtocolNumber),
-					HopLimit:      header.NDPHopLimit,
-					SrcAddr:       lladdr1,
-					DstAddr:       lladdr0,
-				})
-				pkt := stack.NewPacketBuffer(stack.PacketBufferOptions{
-					Data: buffer.NewVectorisedView(len(ip)+len(icmp), []buffer.View{buffer.View(ip), buffer.View(icmp)}),
-				})
-				e.InjectInbound(ProtocolNumber, pkt)
-			}
-
-			stats := s.Stats().ICMP.V6PacketsReceived
-			invalid := stats.Invalid
-			typStat := typ.statCounter(stats)
-
-			// Initial stat counts should be 0.
-			if got := invalid.Value(); got != 0 {
-				t.Fatalf("got invalid = %d, want = 0", got)
-			}
-			if got := typStat.Value(); got != 0 {
-				t.Fatalf("got %s = %d, want = 0", typ.name, got)
-			}
-
-			// Without setting checksum, the incoming packet should
-			// be invalid.
-			handleIPv6Payload(false)
-			if got := invalid.Value(); got != 1 {
-				t.Fatalf("got invalid = %d, want = 1", got)
-			}
-			// Rx count of type typ.typ should not have increased.
-			if got := typStat.Value(); got != 0 {
-				t.Fatalf("got %s = %d, want = 0", typ.name, got)
-			}
-
-			// When checksum is set, it should be received.
-			handleIPv6Payload(true)
-			if got := typStat.Value(); got != 1 {
-				t.Fatalf("got %s = %d, want = 1", typ.name, got)
-			}
-			// Invalid count should not have increased again.
-			if got := invalid.Value(); got != 1 {
-				t.Fatalf("got invalid = %d, want = 1", got)
 			}
 		})
 	}
@@ -696,11 +921,11 @@ func TestICMPChecksumValidationWithPayload(t *testing.T) {
 			s := stack.New(stack.Options{
 				NetworkProtocols: []stack.NetworkProtocol{NewProtocol()},
 			})
-			if err := s.CreateNIC(1, e); err != nil {
-				t.Fatalf("CreateNIC(_) = %s", err)
+			if err := s.CreateNIC(nicID, e); err != nil {
+				t.Fatalf("CreateNIC(_, _) = %s", err)
 			}
 
-			if err := s.AddAddress(1, ProtocolNumber, lladdr0); err != nil {
+			if err := s.AddAddress(nicID, ProtocolNumber, lladdr0); err != nil {
 				t.Fatalf("AddAddress(_, %d, %s) = %s", ProtocolNumber, lladdr0, err)
 			}
 			{
@@ -711,7 +936,7 @@ func TestICMPChecksumValidationWithPayload(t *testing.T) {
 				s.SetRouteTable(
 					[]tcpip.Route{{
 						Destination: subnet,
-						NIC:         1,
+						NIC:         nicID,
 					}},
 				)
 			}
@@ -750,7 +975,7 @@ func TestICMPChecksumValidationWithPayload(t *testing.T) {
 				t.Fatalf("got invalid = %d, want = 0", got)
 			}
 			if got := typStat.Value(); got != 0 {
-				t.Fatalf("got %s = %d, want = 0", typ.name, got)
+				t.Fatalf("got = %d, want = 0", got)
 			}
 
 			// Without setting checksum, the incoming packet should
@@ -761,13 +986,13 @@ func TestICMPChecksumValidationWithPayload(t *testing.T) {
 			}
 			// Rx count of type typ.typ should not have increased.
 			if got := typStat.Value(); got != 0 {
-				t.Fatalf("got %s = %d, want = 0", typ.name, got)
+				t.Fatalf("got = %d, want = 0", got)
 			}
 
 			// When checksum is set, it should be received.
 			handleIPv6Payload(typ.typ, typ.size, typ.payloadSize, typ.payload, true)
 			if got := typStat.Value(); got != 1 {
-				t.Fatalf("got %s = %d, want = 1", typ.name, got)
+				t.Fatalf("got = %d, want = 0", got)
 			}
 			// Invalid count should not have increased again.
 			if got := invalid.Value(); got != 1 {
@@ -874,12 +1099,12 @@ func TestICMPChecksumValidationWithPayloadMultipleViews(t *testing.T) {
 			s := stack.New(stack.Options{
 				NetworkProtocols: []stack.NetworkProtocol{NewProtocol()},
 			})
-			if err := s.CreateNIC(1, e); err != nil {
-				t.Fatalf("CreateNIC(_) = %s", err)
+			if err := s.CreateNIC(nicID, e); err != nil {
+				t.Fatalf("CreateNIC(%d, _) = %s", nicID, err)
 			}
 
-			if err := s.AddAddress(1, ProtocolNumber, lladdr0); err != nil {
-				t.Fatalf("AddAddress(_, %d, %s) = %s", ProtocolNumber, lladdr0, err)
+			if err := s.AddAddress(nicID, ProtocolNumber, lladdr0); err != nil {
+				t.Fatalf("AddAddress(%d, %d, %s) = %s", nicID, ProtocolNumber, lladdr0, err)
 			}
 			{
 				subnet, err := tcpip.NewSubnet(lladdr1, tcpip.AddressMask(strings.Repeat("\xff", len(lladdr1))))
@@ -889,7 +1114,7 @@ func TestICMPChecksumValidationWithPayloadMultipleViews(t *testing.T) {
 				s.SetRouteTable(
 					[]tcpip.Route{{
 						Destination: subnet,
-						NIC:         1,
+						NIC:         nicID,
 					}},
 				)
 			}
@@ -929,7 +1154,7 @@ func TestICMPChecksumValidationWithPayloadMultipleViews(t *testing.T) {
 				t.Fatalf("got invalid = %d, want = 0", got)
 			}
 			if got := typStat.Value(); got != 0 {
-				t.Fatalf("got %s = %d, want = 0", typ.name, got)
+				t.Fatalf("got = %d, want = 0", got)
 			}
 
 			// Without setting checksum, the incoming packet should
@@ -940,13 +1165,13 @@ func TestICMPChecksumValidationWithPayloadMultipleViews(t *testing.T) {
 			}
 			// Rx count of type typ.typ should not have increased.
 			if got := typStat.Value(); got != 0 {
-				t.Fatalf("got %s = %d, want = 0", typ.name, got)
+				t.Fatalf("got = %d, want = 0", got)
 			}
 
 			// When checksum is set, it should be received.
 			handleIPv6Payload(typ.typ, typ.size, typ.payloadSize, typ.payload, true)
 			if got := typStat.Value(); got != 1 {
-				t.Fatalf("got %s = %d, want = 1", typ.name, got)
+				t.Fatalf("got = %d, want = 0", got)
 			}
 			// Invalid count should not have increased again.
 			if got := invalid.Value(); got != 1 {
diff --git a/pkg/tcpip/network/ipv6/ipv6.go b/pkg/tcpip/network/ipv6/ipv6.go
index 267d2cce8e..36fbbebf09 100644
--- a/pkg/tcpip/network/ipv6/ipv6.go
+++ b/pkg/tcpip/network/ipv6/ipv6.go
@@ -48,6 +48,7 @@ type endpoint struct {
 	nicID         tcpip.NICID
 	linkEP        stack.LinkEndpoint
 	linkAddrCache stack.LinkAddressCache
+	nud           stack.NUDHandler
 	dispatcher    stack.TransportDispatcher
 	protocol      *protocol
 	stack         *stack.Stack
@@ -455,11 +456,12 @@ func (*protocol) ParseAddresses(v buffer.View) (src, dst tcpip.Address) {
 }
 
 // NewEndpoint creates a new ipv6 endpoint.
-func (p *protocol) NewEndpoint(nicID tcpip.NICID, linkAddrCache stack.LinkAddressCache, dispatcher stack.TransportDispatcher, linkEP stack.LinkEndpoint, st *stack.Stack) stack.NetworkEndpoint {
+func (p *protocol) NewEndpoint(nicID tcpip.NICID, linkAddrCache stack.LinkAddressCache, nud stack.NUDHandler, dispatcher stack.TransportDispatcher, linkEP stack.LinkEndpoint, st *stack.Stack) stack.NetworkEndpoint {
 	return &endpoint{
 		nicID:         nicID,
 		linkEP:        linkEP,
 		linkAddrCache: linkAddrCache,
+		nud:           nud,
 		dispatcher:    dispatcher,
 		protocol:      p,
 		stack:         st,
diff --git a/pkg/tcpip/network/ipv6/ndp_test.go b/pkg/tcpip/network/ipv6/ndp_test.go
index af71a7d6bf..480c495fa0 100644
--- a/pkg/tcpip/network/ipv6/ndp_test.go
+++ b/pkg/tcpip/network/ipv6/ndp_test.go
@@ -18,6 +18,7 @@ import (
 	"strings"
 	"testing"
 
+	"github.com/google/go-cmp/cmp"
 	"gvisor.dev/gvisor/pkg/tcpip"
 	"gvisor.dev/gvisor/pkg/tcpip/buffer"
 	"gvisor.dev/gvisor/pkg/tcpip/checker"
@@ -30,12 +31,13 @@ import (
 // setupStackAndEndpoint creates a stack with a single NIC with a link-local
 // address llladdr and an IPv6 endpoint to a remote with link-local address
 // rlladdr
-func setupStackAndEndpoint(t *testing.T, llladdr, rlladdr tcpip.Address) (*stack.Stack, stack.NetworkEndpoint) {
+func setupStackAndEndpoint(t *testing.T, llladdr, rlladdr tcpip.Address, useNeighborCache bool) (*stack.Stack, stack.NetworkEndpoint) {
 	t.Helper()
 
 	s := stack.New(stack.Options{
 		NetworkProtocols:   []stack.NetworkProtocol{NewProtocol()},
 		TransportProtocols: []stack.TransportProtocol{icmp.NewProtocol6()},
+		UseNeighborCache:   useNeighborCache,
 	})
 
 	if err := s.CreateNIC(1, &stubLinkEndpoint{}); err != nil {
@@ -63,8 +65,7 @@ func setupStackAndEndpoint(t *testing.T, llladdr, rlladdr tcpip.Address) (*stack
 		t.Fatalf("cannot find protocol instance for network protocol %d", ProtocolNumber)
 	}
 
-	ep := netProto.NewEndpoint(0, &stubLinkAddressCache{}, &stubDispatcher{}, nil, s)
-
+	ep := netProto.NewEndpoint(0, &stubLinkAddressCache{}, &stubNUDHandler{}, &stubDispatcher{}, nil, s)
 	return s, ep
 }
 
@@ -171,6 +172,123 @@ func TestNeighorSolicitationWithSourceLinkLayerOption(t *testing.T) {
 	}
 }
 
+// TestNeighorSolicitationWithSourceLinkLayerOptionUsingNeighborCache tests
+// that receiving a valid NDP NS message with the Source Link Layer Address
+// option results in a new entry in the link address cache for the sender of
+// the message.
+func TestNeighorSolicitationWithSourceLinkLayerOptionUsingNeighborCache(t *testing.T) {
+	const nicID = 1
+
+	tests := []struct {
+		name             string
+		optsBuf          []byte
+		expectedLinkAddr tcpip.LinkAddress
+	}{
+		{
+			name:             "Valid",
+			optsBuf:          []byte{1, 1, 2, 3, 4, 5, 6, 7},
+			expectedLinkAddr: "\x02\x03\x04\x05\x06\x07",
+		},
+		{
+			name:    "Too Small",
+			optsBuf: []byte{1, 1, 2, 3, 4, 5, 6},
+		},
+		{
+			name:    "Invalid Length",
+			optsBuf: []byte{1, 2, 2, 3, 4, 5, 6, 7},
+		},
+	}
+
+	for _, test := range tests {
+		t.Run(test.name, func(t *testing.T) {
+			s := stack.New(stack.Options{
+				NetworkProtocols: []stack.NetworkProtocol{NewProtocol()},
+				UseNeighborCache: true,
+			})
+			e := channel.New(0, 1280, linkAddr0)
+			e.LinkEPCapabilities |= stack.CapabilityResolutionRequired
+			if err := s.CreateNIC(nicID, e); err != nil {
+				t.Fatalf("CreateNIC(%d, _) = %s", nicID, err)
+			}
+			if err := s.AddAddress(nicID, ProtocolNumber, lladdr0); err != nil {
+				t.Fatalf("AddAddress(%d, %d, %s) = %s", nicID, ProtocolNumber, lladdr0, err)
+			}
+
+			ndpNSSize := header.ICMPv6NeighborSolicitMinimumSize + len(test.optsBuf)
+			hdr := buffer.NewPrependable(header.IPv6MinimumSize + ndpNSSize)
+			pkt := header.ICMPv6(hdr.Prepend(ndpNSSize))
+			pkt.SetType(header.ICMPv6NeighborSolicit)
+			ns := header.NDPNeighborSolicit(pkt.NDPPayload())
+			ns.SetTargetAddress(lladdr0)
+			opts := ns.Options()
+			copy(opts, test.optsBuf)
+			pkt.SetChecksum(header.ICMPv6Checksum(pkt, lladdr1, lladdr0, buffer.VectorisedView{}))
+			payloadLength := hdr.UsedLength()
+			ip := header.IPv6(hdr.Prepend(header.IPv6MinimumSize))
+			ip.Encode(&header.IPv6Fields{
+				PayloadLength: uint16(payloadLength),
+				NextHeader:    uint8(header.ICMPv6ProtocolNumber),
+				HopLimit:      255,
+				SrcAddr:       lladdr1,
+				DstAddr:       lladdr0,
+			})
+
+			invalid := s.Stats().ICMP.V6PacketsReceived.Invalid
+
+			// Invalid count should initially be 0.
+			if got := invalid.Value(); got != 0 {
+				t.Fatalf("got invalid = %d, want = 0", got)
+			}
+
+			e.InjectInbound(ProtocolNumber, &stack.PacketBuffer{
+				Data: hdr.View().ToVectorisedView(),
+			})
+
+			neighbors, err := s.Neighbors(nicID)
+			if err != nil {
+				t.Fatalf("s.Neighbors(%d): %s", nicID, err)
+			}
+
+			neighborByAddr := make(map[tcpip.Address]stack.NeighborEntry)
+			for _, n := range neighbors {
+				if existing, ok := neighborByAddr[n.Addr]; ok {
+					if diff := cmp.Diff(existing, n); diff != "" {
+						t.Fatalf("s.Neighbors(%d) returned unexpected duplicate neighbor entry (-existing +got):\n%s", nicID, diff)
+					}
+					t.Fatalf("s.Neighbors(%d) returned unexpected duplicate neighbor entry: %s", nicID, existing)
+				}
+				neighborByAddr[n.Addr] = n
+			}
+
+			if neigh, ok := neighborByAddr[lladdr1]; len(test.expectedLinkAddr) != 0 {
+				// Invalid count should not have increased.
+				if got := invalid.Value(); got != 0 {
+					t.Errorf("got invalid = %d, want = 0", got)
+				}
+
+				if !ok {
+					t.Fatalf("expected a neighbor entry for %q", lladdr1)
+				}
+				if neigh.LinkAddr != test.expectedLinkAddr {
+					t.Errorf("got link address = %s, want = %s", neigh.LinkAddr, test.expectedLinkAddr)
+				}
+				if neigh.State != stack.Stale {
+					t.Errorf("got NUD state = %s, want = %s", neigh.State, stack.Stale)
+				}
+			} else {
+				// Invalid count should have increased.
+				if got := invalid.Value(); got != 1 {
+					t.Errorf("got invalid = %d, want = 1", got)
+				}
+
+				if ok {
+					t.Fatalf("unexpectedly got neighbor entry: %s", neigh)
+				}
+			}
+		})
+	}
+}
+
 func TestNeighorSolicitationResponse(t *testing.T) {
 	const nicID = 1
 	nicAddr := lladdr0
@@ -180,6 +298,20 @@ func TestNeighorSolicitationResponse(t *testing.T) {
 	remoteLinkAddr0 := linkAddr1
 	remoteLinkAddr1 := linkAddr2
 
+	stacks := []struct {
+		name             string
+		useNeighborCache bool
+	}{
+		{
+			name:             "linkAddrCache",
+			useNeighborCache: false,
+		},
+		{
+			name:             "neighborCache",
+			useNeighborCache: true,
+		},
+	}
+
 	tests := []struct {
 		name          string
 		nsOpts        header.NDPOptionsSerializer
@@ -338,86 +470,92 @@ func TestNeighorSolicitationResponse(t *testing.T) {
 		},
 	}
 
-	for _, test := range tests {
-		t.Run(test.name, func(t *testing.T) {
-			s := stack.New(stack.Options{
-				NetworkProtocols: []stack.NetworkProtocol{NewProtocol()},
-			})
-			e := channel.New(1, 1280, nicLinkAddr)
-			if err := s.CreateNIC(nicID, e); err != nil {
-				t.Fatalf("CreateNIC(%d, _) = %s", nicID, err)
-			}
-			if err := s.AddAddress(nicID, ProtocolNumber, nicAddr); err != nil {
-				t.Fatalf("AddAddress(%d, %d, %s) = %s", nicID, ProtocolNumber, nicAddr, err)
-			}
+	for _, stackTyp := range stacks {
+		t.Run(stackTyp.name, func(t *testing.T) {
+			for _, test := range tests {
+				t.Run(test.name, func(t *testing.T) {
+					s := stack.New(stack.Options{
+						NetworkProtocols: []stack.NetworkProtocol{NewProtocol()},
+						UseNeighborCache: stackTyp.useNeighborCache,
+					})
+					e := channel.New(1, 1280, nicLinkAddr)
+					e.LinkEPCapabilities |= stack.CapabilityResolutionRequired
+					if err := s.CreateNIC(nicID, e); err != nil {
+						t.Fatalf("CreateNIC(%d, _) = %s", nicID, err)
+					}
+					if err := s.AddAddress(nicID, ProtocolNumber, nicAddr); err != nil {
+						t.Fatalf("AddAddress(%d, %d, %s) = %s", nicID, ProtocolNumber, nicAddr, err)
+					}
 
-			ndpNSSize := header.ICMPv6NeighborSolicitMinimumSize + test.nsOpts.Length()
-			hdr := buffer.NewPrependable(header.IPv6MinimumSize + ndpNSSize)
-			pkt := header.ICMPv6(hdr.Prepend(ndpNSSize))
-			pkt.SetType(header.ICMPv6NeighborSolicit)
-			ns := header.NDPNeighborSolicit(pkt.NDPPayload())
-			ns.SetTargetAddress(nicAddr)
-			opts := ns.Options()
-			opts.Serialize(test.nsOpts)
-			pkt.SetChecksum(header.ICMPv6Checksum(pkt, test.nsSrc, test.nsDst, buffer.VectorisedView{}))
-			payloadLength := hdr.UsedLength()
-			ip := header.IPv6(hdr.Prepend(header.IPv6MinimumSize))
-			ip.Encode(&header.IPv6Fields{
-				PayloadLength: uint16(payloadLength),
-				NextHeader:    uint8(header.ICMPv6ProtocolNumber),
-				HopLimit:      255,
-				SrcAddr:       test.nsSrc,
-				DstAddr:       test.nsDst,
-			})
+					ndpNSSize := header.ICMPv6NeighborSolicitMinimumSize + test.nsOpts.Length()
+					hdr := buffer.NewPrependable(header.IPv6MinimumSize + ndpNSSize)
+					pkt := header.ICMPv6(hdr.Prepend(ndpNSSize))
+					pkt.SetType(header.ICMPv6NeighborSolicit)
+					ns := header.NDPNeighborSolicit(pkt.NDPPayload())
+					ns.SetTargetAddress(nicAddr)
+					opts := ns.Options()
+					opts.Serialize(test.nsOpts)
+					pkt.SetChecksum(header.ICMPv6Checksum(pkt, test.nsSrc, test.nsDst, buffer.VectorisedView{}))
+					payloadLength := hdr.UsedLength()
+					ip := header.IPv6(hdr.Prepend(header.IPv6MinimumSize))
+					ip.Encode(&header.IPv6Fields{
+						PayloadLength: uint16(payloadLength),
+						NextHeader:    uint8(header.ICMPv6ProtocolNumber),
+						HopLimit:      255,
+						SrcAddr:       test.nsSrc,
+						DstAddr:       test.nsDst,
+					})
+
+					invalid := s.Stats().ICMP.V6PacketsReceived.Invalid
 
-			invalid := s.Stats().ICMP.V6PacketsReceived.Invalid
+					// Invalid count should initially be 0.
+					if got := invalid.Value(); got != 0 {
+						t.Fatalf("got invalid = %d, want = 0", got)
+					}
 
-			// Invalid count should initially be 0.
-			if got := invalid.Value(); got != 0 {
-				t.Fatalf("got invalid = %d, want = 0", got)
-			}
+					e.InjectLinkAddr(ProtocolNumber, test.nsSrcLinkAddr, stack.NewPacketBuffer(stack.PacketBufferOptions{
+						Data: hdr.View().ToVectorisedView(),
+					}))
 
-			e.InjectLinkAddr(ProtocolNumber, test.nsSrcLinkAddr, stack.NewPacketBuffer(stack.PacketBufferOptions{
-				Data: hdr.View().ToVectorisedView(),
-			}))
+					if test.nsInvalid {
+						if got := invalid.Value(); got != 1 {
+							t.Fatalf("got invalid = %d, want = 1", got)
+						}
 
-			if test.nsInvalid {
-				if got := invalid.Value(); got != 1 {
-					t.Fatalf("got invalid = %d, want = 1", got)
-				}
+						if p, got := e.Read(); got {
+							t.Fatalf("unexpected response to an invalid NS = %+v", p.Pkt)
+						}
 
-				if p, got := e.Read(); got {
-					t.Fatalf("unexpected response to an invalid NS = %+v", p.Pkt)
-				}
+						// If we expected the NS to be invalid, we have nothing else to check.
+						return
+					}
 
-				// If we expected the NS to be invalid, we have nothing else to check.
-				return
-			}
+					if got := invalid.Value(); got != 0 {
+						t.Fatalf("got invalid = %d, want = 0", got)
+					}
 
-			if got := invalid.Value(); got != 0 {
-				t.Fatalf("got invalid = %d, want = 0", got)
-			}
+					p, got := e.Read()
+					if !got {
+						t.Fatal("expected an NDP NA response")
+					}
 
-			p, got := e.Read()
-			if !got {
-				t.Fatal("expected an NDP NA response")
-			}
+					if p.Route.RemoteLinkAddress != test.naDstLinkAddr {
+						t.Errorf("got p.Route.RemoteLinkAddress = %s, want = %s", p.Route.RemoteLinkAddress, test.naDstLinkAddr)
+					}
 
-			if p.Route.RemoteLinkAddress != test.naDstLinkAddr {
-				t.Errorf("got p.Route.RemoteLinkAddress = %s, want = %s", p.Route.RemoteLinkAddress, test.naDstLinkAddr)
+					checker.IPv6(t, stack.PayloadSince(p.Pkt.NetworkHeader()),
+						checker.SrcAddr(test.naSrc),
+						checker.DstAddr(test.naDst),
+						checker.TTL(header.NDPHopLimit),
+						checker.NDPNA(
+							checker.NDPNASolicitedFlag(test.naSolicited),
+							checker.NDPNATargetAddress(nicAddr),
+							checker.NDPNAOptions([]header.NDPOption{
+								header.NDPTargetLinkLayerAddressOption(nicLinkAddr[:]),
+							}),
+						))
+				})
 			}
-
-			checker.IPv6(t, stack.PayloadSince(p.Pkt.NetworkHeader()),
-				checker.SrcAddr(test.naSrc),
-				checker.DstAddr(test.naDst),
-				checker.TTL(header.NDPHopLimit),
-				checker.NDPNA(
-					checker.NDPNASolicitedFlag(test.naSolicited),
-					checker.NDPNATargetAddress(nicAddr),
-					checker.NDPNAOptions([]header.NDPOption{
-						header.NDPTargetLinkLayerAddressOption(nicLinkAddr[:]),
-					}),
-				))
 		})
 	}
 }
@@ -532,197 +670,380 @@ func TestNeighorAdvertisementWithTargetLinkLayerOption(t *testing.T) {
 	}
 }
 
-func TestNDPValidation(t *testing.T) {
-	setup := func(t *testing.T) (*stack.Stack, stack.NetworkEndpoint, stack.Route) {
-		t.Helper()
-
-		// Create a stack with the assigned link-local address lladdr0
-		// and an endpoint to lladdr1.
-		s, ep := setupStackAndEndpoint(t, lladdr0, lladdr1)
-
-		r, err := s.FindRoute(1, lladdr0, lladdr1, ProtocolNumber, false /* multicastLoop */)
-		if err != nil {
-			t.Fatalf("FindRoute(_) = _, %s, want = _, nil", err)
-		}
-
-		return s, ep, r
-	}
-
-	handleIPv6Payload := func(payload buffer.View, hopLimit uint8, atomicFragment bool, ep stack.NetworkEndpoint, r *stack.Route) {
-		nextHdr := uint8(header.ICMPv6ProtocolNumber)
-		var extensions buffer.View
-		if atomicFragment {
-			extensions = buffer.NewView(header.IPv6FragmentExtHdrLength)
-			extensions[0] = nextHdr
-			nextHdr = uint8(header.IPv6FragmentExtHdrIdentifier)
-		}
-
-		pkt := stack.NewPacketBuffer(stack.PacketBufferOptions{
-			ReserveHeaderBytes: header.IPv6MinimumSize + len(extensions),
-			Data:               payload.ToVectorisedView(),
-		})
-		ip := header.IPv6(pkt.NetworkHeader().Push(header.IPv6MinimumSize + len(extensions)))
-		ip.Encode(&header.IPv6Fields{
-			PayloadLength: uint16(len(payload) + len(extensions)),
-			NextHeader:    nextHdr,
-			HopLimit:      hopLimit,
-			SrcAddr:       r.LocalAddress,
-			DstAddr:       r.RemoteAddress,
-		})
-		if n := copy(ip[header.IPv6MinimumSize:], extensions); n != len(extensions) {
-			t.Fatalf("expected to write %d bytes of extensions, but wrote %d", len(extensions), n)
-		}
-		ep.HandlePacket(r, pkt)
-	}
-
-	var tllData [header.NDPLinkLayerAddressSize]byte
-	header.NDPOptions(tllData[:]).Serialize(header.NDPOptionsSerializer{
-		header.NDPTargetLinkLayerAddressOption(linkAddr1),
-	})
+// TestNeighorAdvertisementWithTargetLinkLayerOptionUsingNeighborCache tests
+// that receiving a valid NDP NA message with the Target Link Layer Address
+// option does not result in a new entry in the neighbor cache for the target
+// of the message.
+func TestNeighorAdvertisementWithTargetLinkLayerOptionUsingNeighborCache(t *testing.T) {
+	const nicID = 1
 
-	types := []struct {
-		name        string
-		typ         header.ICMPv6Type
-		size        int
-		extraData   []byte
-		statCounter func(tcpip.ICMPv6ReceivedPacketStats) *tcpip.StatCounter
+	tests := []struct {
+		name    string
+		optsBuf []byte
+		isValid bool
 	}{
 		{
-			name: "RouterSolicit",
-			typ:  header.ICMPv6RouterSolicit,
-			size: header.ICMPv6MinimumSize,
-			statCounter: func(stats tcpip.ICMPv6ReceivedPacketStats) *tcpip.StatCounter {
-				return stats.RouterSolicit
-			},
-		},
-		{
-			name: "RouterAdvert",
-			typ:  header.ICMPv6RouterAdvert,
-			size: header.ICMPv6HeaderSize + header.NDPRAMinimumSize,
-			statCounter: func(stats tcpip.ICMPv6ReceivedPacketStats) *tcpip.StatCounter {
-				return stats.RouterAdvert
-			},
+			name:    "Valid",
+			optsBuf: []byte{2, 1, 2, 3, 4, 5, 6, 7},
+			isValid: true,
 		},
 		{
-			name: "NeighborSolicit",
-			typ:  header.ICMPv6NeighborSolicit,
-			size: header.ICMPv6NeighborSolicitMinimumSize,
-			statCounter: func(stats tcpip.ICMPv6ReceivedPacketStats) *tcpip.StatCounter {
-				return stats.NeighborSolicit
-			},
+			name:    "Too Small",
+			optsBuf: []byte{2, 1, 2, 3, 4, 5, 6},
 		},
 		{
-			name:      "NeighborAdvert",
-			typ:       header.ICMPv6NeighborAdvert,
-			size:      header.ICMPv6NeighborAdvertMinimumSize,
-			extraData: tllData[:],
-			statCounter: func(stats tcpip.ICMPv6ReceivedPacketStats) *tcpip.StatCounter {
-				return stats.NeighborAdvert
-			},
+			name:    "Invalid Length",
+			optsBuf: []byte{2, 2, 2, 3, 4, 5, 6, 7},
 		},
 		{
-			name: "RedirectMsg",
-			typ:  header.ICMPv6RedirectMsg,
-			size: header.ICMPv6MinimumSize,
-			statCounter: func(stats tcpip.ICMPv6ReceivedPacketStats) *tcpip.StatCounter {
-				return stats.RedirectMsg
+			name: "Multiple",
+			optsBuf: []byte{
+				2, 1, 2, 3, 4, 5, 6, 7,
+				2, 1, 2, 3, 4, 5, 6, 8,
 			},
 		},
 	}
 
-	subTests := []struct {
-		name           string
-		atomicFragment bool
-		hopLimit       uint8
-		code           header.ICMPv6Code
-		valid          bool
+	for _, test := range tests {
+		t.Run(test.name, func(t *testing.T) {
+			s := stack.New(stack.Options{
+				NetworkProtocols: []stack.NetworkProtocol{NewProtocol()},
+				UseNeighborCache: true,
+			})
+			e := channel.New(0, 1280, linkAddr0)
+			e.LinkEPCapabilities |= stack.CapabilityResolutionRequired
+			if err := s.CreateNIC(nicID, e); err != nil {
+				t.Fatalf("CreateNIC(%d, _) = %s", nicID, err)
+			}
+			if err := s.AddAddress(nicID, ProtocolNumber, lladdr0); err != nil {
+				t.Fatalf("AddAddress(%d, %d, %s) = %s", nicID, ProtocolNumber, lladdr0, err)
+			}
+
+			ndpNASize := header.ICMPv6NeighborAdvertMinimumSize + len(test.optsBuf)
+			hdr := buffer.NewPrependable(header.IPv6MinimumSize + ndpNASize)
+			pkt := header.ICMPv6(hdr.Prepend(ndpNASize))
+			pkt.SetType(header.ICMPv6NeighborAdvert)
+			ns := header.NDPNeighborAdvert(pkt.NDPPayload())
+			ns.SetTargetAddress(lladdr1)
+			opts := ns.Options()
+			copy(opts, test.optsBuf)
+			pkt.SetChecksum(header.ICMPv6Checksum(pkt, lladdr1, lladdr0, buffer.VectorisedView{}))
+			payloadLength := hdr.UsedLength()
+			ip := header.IPv6(hdr.Prepend(header.IPv6MinimumSize))
+			ip.Encode(&header.IPv6Fields{
+				PayloadLength: uint16(payloadLength),
+				NextHeader:    uint8(header.ICMPv6ProtocolNumber),
+				HopLimit:      255,
+				SrcAddr:       lladdr1,
+				DstAddr:       lladdr0,
+			})
+
+			invalid := s.Stats().ICMP.V6PacketsReceived.Invalid
+
+			// Invalid count should initially be 0.
+			if got := invalid.Value(); got != 0 {
+				t.Fatalf("got invalid = %d, want = 0", got)
+			}
+
+			e.InjectInbound(ProtocolNumber, &stack.PacketBuffer{
+				Data: hdr.View().ToVectorisedView(),
+			})
+
+			neighbors, err := s.Neighbors(nicID)
+			if err != nil {
+				t.Fatalf("s.Neighbors(%d): %s", nicID, err)
+			}
+
+			neighborByAddr := make(map[tcpip.Address]stack.NeighborEntry)
+			for _, n := range neighbors {
+				if existing, ok := neighborByAddr[n.Addr]; ok {
+					if diff := cmp.Diff(existing, n); diff != "" {
+						t.Fatalf("s.Neighbors(%d) returned unexpected duplicate neighbor entry (-existing +got):\n%s", nicID, diff)
+					}
+					t.Fatalf("s.Neighbors(%d) returned unexpected duplicate neighbor entry: %s", nicID, existing)
+				}
+				neighborByAddr[n.Addr] = n
+			}
+
+			if neigh, ok := neighborByAddr[lladdr1]; ok {
+				t.Fatalf("unexpectedly got neighbor entry: %s", neigh)
+			}
+
+			if test.isValid {
+				// Invalid count should not have increased.
+				if got := invalid.Value(); got != 0 {
+					t.Errorf("got invalid = %d, want = 0", got)
+				}
+			} else {
+				// Invalid count should have increased.
+				if got := invalid.Value(); got != 1 {
+					t.Errorf("got invalid = %d, want = 1", got)
+				}
+			}
+		})
+	}
+}
+
+func TestNDPValidation(t *testing.T) {
+	stacks := []struct {
+		name             string
+		useNeighborCache bool
 	}{
 		{
-			name:           "Valid",
-			atomicFragment: false,
-			hopLimit:       header.NDPHopLimit,
-			code:           0,
-			valid:          true,
-		},
-		{
-			name:           "Fragmented",
-			atomicFragment: true,
-			hopLimit:       header.NDPHopLimit,
-			code:           0,
-			valid:          false,
-		},
-		{
-			name:           "Invalid hop limit",
-			atomicFragment: false,
-			hopLimit:       header.NDPHopLimit - 1,
-			code:           0,
-			valid:          false,
+			name:             "linkAddrCache",
+			useNeighborCache: false,
 		},
 		{
-			name:           "Invalid ICMPv6 code",
-			atomicFragment: false,
-			hopLimit:       header.NDPHopLimit,
-			code:           1,
-			valid:          false,
+			name:             "neighborCache",
+			useNeighborCache: true,
 		},
 	}
 
-	for _, typ := range types {
-		t.Run(typ.name, func(t *testing.T) {
-			for _, test := range subTests {
-				t.Run(test.name, func(t *testing.T) {
-					s, ep, r := setup(t)
-					defer r.Release()
+	for _, stackTyp := range stacks {
+		t.Run(stackTyp.name, func(t *testing.T) {
+			setup := func(t *testing.T) (*stack.Stack, stack.NetworkEndpoint, stack.Route) {
+				t.Helper()
 
-					stats := s.Stats().ICMP.V6PacketsReceived
-					invalid := stats.Invalid
-					typStat := typ.statCounter(stats)
+				// Create a stack with the assigned link-local address lladdr0
+				// and an endpoint to lladdr1.
+				s, ep := setupStackAndEndpoint(t, lladdr0, lladdr1, stackTyp.useNeighborCache)
 
-					icmp := header.ICMPv6(buffer.NewView(typ.size + len(typ.extraData)))
-					copy(icmp[typ.size:], typ.extraData)
-					icmp.SetType(typ.typ)
-					icmp.SetCode(test.code)
-					icmp.SetChecksum(header.ICMPv6Checksum(icmp[:typ.size], r.LocalAddress, r.RemoteAddress, buffer.View(typ.extraData).ToVectorisedView()))
+				r, err := s.FindRoute(1, lladdr0, lladdr1, ProtocolNumber, false /* multicastLoop */)
+				if err != nil {
+					t.Fatalf("FindRoute(_) = _, %s, want = _, nil", err)
+				}
 
-					// Rx count of the NDP message should initially be 0.
-					if got := typStat.Value(); got != 0 {
-						t.Errorf("got %s = %d, want = 0", typ.name, got)
-					}
+				return s, ep, r
+			}
 
-					// Invalid count should initially be 0.
-					if got := invalid.Value(); got != 0 {
-						t.Errorf("got invalid = %d, want = 0", got)
-					}
+			handleIPv6Payload := func(payload buffer.View, hopLimit uint8, atomicFragment bool, ep stack.NetworkEndpoint, r *stack.Route) {
+				nextHdr := uint8(header.ICMPv6ProtocolNumber)
+				var extensions buffer.View
+				if atomicFragment {
+					extensions = buffer.NewView(header.IPv6FragmentExtHdrLength)
+					extensions[0] = nextHdr
+					nextHdr = uint8(header.IPv6FragmentExtHdrIdentifier)
+				}
 
-					if t.Failed() {
-						t.FailNow()
-					}
+				pkt := stack.NewPacketBuffer(stack.PacketBufferOptions{
+					ReserveHeaderBytes: header.IPv6MinimumSize + len(extensions),
+					Data:               payload.ToVectorisedView(),
+				})
+				ip := header.IPv6(pkt.NetworkHeader().Push(header.IPv6MinimumSize + len(extensions)))
+				ip.Encode(&header.IPv6Fields{
+					PayloadLength: uint16(len(payload) + len(extensions)),
+					NextHeader:    nextHdr,
+					HopLimit:      hopLimit,
+					SrcAddr:       r.LocalAddress,
+					DstAddr:       r.RemoteAddress,
+				})
+				if n := copy(ip[header.IPv6MinimumSize:], extensions); n != len(extensions) {
+					t.Fatalf("expected to write %d bytes of extensions, but wrote %d", len(extensions), n)
+				}
+				ep.HandlePacket(r, pkt)
+			}
 
-					handleIPv6Payload(buffer.View(icmp), test.hopLimit, test.atomicFragment, ep, &r)
+			var tllData [header.NDPLinkLayerAddressSize]byte
+			header.NDPOptions(tllData[:]).Serialize(header.NDPOptionsSerializer{
+				header.NDPTargetLinkLayerAddressOption(linkAddr1),
+			})
 
-					// Rx count of the NDP packet should have increased.
-					if got := typStat.Value(); got != 1 {
-						t.Errorf("got %s = %d, want = 1", typ.name, got)
-					}
+			var sllData [header.NDPLinkLayerAddressSize]byte
+			header.NDPOptions(sllData[:]).Serialize(header.NDPOptionsSerializer{
+				header.NDPSourceLinkLayerAddressOption(linkAddr1),
+			})
 
-					want := uint64(0)
-					if !test.valid {
-						// Invalid count should have increased.
-						want = 1
-					}
-					if got := invalid.Value(); got != want {
-						t.Errorf("got invalid = %d, want = %d", got, want)
+			types := []struct {
+				name        string
+				typ         header.ICMPv6Type
+				size        int
+				extraData   []byte
+				statCounter func(tcpip.ICMPv6ReceivedPacketStats) *tcpip.StatCounter
+				routerOnly  bool
+			}{
+				{
+					name: "RouterSolicit",
+					typ:  header.ICMPv6RouterSolicit,
+					size: header.ICMPv6MinimumSize,
+					statCounter: func(stats tcpip.ICMPv6ReceivedPacketStats) *tcpip.StatCounter {
+						return stats.RouterSolicit
+					},
+					routerOnly: true,
+				},
+				{
+					name: "RouterAdvert",
+					typ:  header.ICMPv6RouterAdvert,
+					size: header.ICMPv6HeaderSize + header.NDPRAMinimumSize,
+					statCounter: func(stats tcpip.ICMPv6ReceivedPacketStats) *tcpip.StatCounter {
+						return stats.RouterAdvert
+					},
+				},
+				{
+					name:      "NeighborSolicit",
+					typ:       header.ICMPv6NeighborSolicit,
+					size:      header.ICMPv6NeighborSolicitMinimumSize,
+					extraData: sllData[:],
+					statCounter: func(stats tcpip.ICMPv6ReceivedPacketStats) *tcpip.StatCounter {
+						return stats.NeighborSolicit
+					},
+				},
+				{
+					name:      "NeighborAdvert",
+					typ:       header.ICMPv6NeighborAdvert,
+					size:      header.ICMPv6NeighborAdvertMinimumSize,
+					extraData: tllData[:],
+					statCounter: func(stats tcpip.ICMPv6ReceivedPacketStats) *tcpip.StatCounter {
+						return stats.NeighborAdvert
+					},
+				},
+				{
+					name: "RedirectMsg",
+					typ:  header.ICMPv6RedirectMsg,
+					size: header.ICMPv6MinimumSize,
+					statCounter: func(stats tcpip.ICMPv6ReceivedPacketStats) *tcpip.StatCounter {
+						return stats.RedirectMsg
+					},
+				},
+			}
+
+			subTests := []struct {
+				name           string
+				atomicFragment bool
+				hopLimit       uint8
+				code           header.ICMPv6Code
+				valid          bool
+			}{
+				{
+					name:           "Valid",
+					atomicFragment: false,
+					hopLimit:       header.NDPHopLimit,
+					code:           0,
+					valid:          true,
+				},
+				{
+					name:           "Fragmented",
+					atomicFragment: true,
+					hopLimit:       header.NDPHopLimit,
+					code:           0,
+					valid:          false,
+				},
+				{
+					name:           "Invalid hop limit",
+					atomicFragment: false,
+					hopLimit:       header.NDPHopLimit - 1,
+					code:           0,
+					valid:          false,
+				},
+				{
+					name:           "Invalid ICMPv6 code",
+					atomicFragment: false,
+					hopLimit:       header.NDPHopLimit,
+					code:           1,
+					valid:          false,
+				},
+			}
+
+			for _, typ := range types {
+				for _, isRouter := range []bool{false, true} {
+					name := typ.name
+					if isRouter {
+						name += " (Router)"
 					}
-				})
+
+					t.Run(name, func(t *testing.T) {
+						for _, test := range subTests {
+							t.Run(test.name, func(t *testing.T) {
+								s, ep, r := setup(t)
+								defer r.Release()
+
+								if isRouter {
+									// Enabling forwarding makes the stack act as a router.
+									s.SetForwarding(true)
+								}
+
+								stats := s.Stats().ICMP.V6PacketsReceived
+								invalid := stats.Invalid
+								routerOnly := stats.RouterOnlyPacketsDroppedByHost
+								typStat := typ.statCounter(stats)
+
+								icmp := header.ICMPv6(buffer.NewView(typ.size + len(typ.extraData)))
+								copy(icmp[typ.size:], typ.extraData)
+								icmp.SetType(typ.typ)
+								icmp.SetCode(test.code)
+								icmp.SetChecksum(header.ICMPv6Checksum(icmp[:typ.size], r.LocalAddress, r.RemoteAddress, buffer.View(typ.extraData).ToVectorisedView()))
+
+								// Rx count of the NDP message should initially be 0.
+								if got := typStat.Value(); got != 0 {
+									t.Errorf("got %s = %d, want = 0", typ.name, got)
+								}
+
+								// Invalid count should initially be 0.
+								if got := invalid.Value(); got != 0 {
+									t.Errorf("got invalid = %d, want = 0", got)
+								}
+
+								// RouterOnlyPacketsReceivedByHost count should initially be 0.
+								if got := routerOnly.Value(); got != 0 {
+									t.Errorf("got RouterOnlyPacketsReceivedByHost = %d, want = 0", got)
+								}
+
+								if t.Failed() {
+									t.FailNow()
+								}
+
+								handleIPv6Payload(buffer.View(icmp), test.hopLimit, test.atomicFragment, ep, &r)
+
+								// Rx count of the NDP packet should have increased.
+								if got := typStat.Value(); got != 1 {
+									t.Errorf("got %s = %d, want = 1", typ.name, got)
+								}
+
+								want := uint64(0)
+								if !test.valid {
+									// Invalid count should have increased.
+									want = 1
+								}
+								if got := invalid.Value(); got != want {
+									t.Errorf("got invalid = %d, want = %d", got, want)
+								}
+
+								want = 0
+								if test.valid && !isRouter && typ.routerOnly {
+									// RouterOnlyPacketsReceivedByHost count should have increased.
+									want = 1
+								}
+								if got := routerOnly.Value(); got != want {
+									t.Errorf("got RouterOnlyPacketsReceivedByHost = %d, want = %d", got, want)
+								}
+
+							})
+						}
+					})
+				}
 			}
 		})
 	}
+
 }
 
 // TestRouterAdvertValidation tests that when the NIC is configured to handle
 // NDP Router Advertisement packets, it validates the Router Advertisement
 // properly before handling them.
 func TestRouterAdvertValidation(t *testing.T) {
+	stacks := []struct {
+		name             string
+		useNeighborCache bool
+	}{
+		{
+			name:             "linkAddrCache",
+			useNeighborCache: false,
+		},
+		{
+			name:             "neighborCache",
+			useNeighborCache: true,
+		},
+	}
+
 	tests := []struct {
 		name            string
 		src             tcpip.Address
@@ -844,61 +1165,67 @@ func TestRouterAdvertValidation(t *testing.T) {
 		},
 	}
 
-	for _, test := range tests {
-		t.Run(test.name, func(t *testing.T) {
-			e := channel.New(10, 1280, linkAddr1)
-			s := stack.New(stack.Options{
-				NetworkProtocols: []stack.NetworkProtocol{NewProtocol()},
-			})
-
-			if err := s.CreateNIC(1, e); err != nil {
-				t.Fatalf("CreateNIC(_) = %s", err)
-			}
+	for _, stackTyp := range stacks {
+		t.Run(stackTyp.name, func(t *testing.T) {
+			for _, test := range tests {
+				t.Run(test.name, func(t *testing.T) {
+					e := channel.New(10, 1280, linkAddr1)
+					e.LinkEPCapabilities |= stack.CapabilityResolutionRequired
+					s := stack.New(stack.Options{
+						NetworkProtocols: []stack.NetworkProtocol{NewProtocol()},
+						UseNeighborCache: stackTyp.useNeighborCache,
+					})
+
+					if err := s.CreateNIC(1, e); err != nil {
+						t.Fatalf("CreateNIC(_) = %s", err)
+					}
 
-			icmpSize := header.ICMPv6HeaderSize + len(test.ndpPayload)
-			hdr := buffer.NewPrependable(header.IPv6MinimumSize + icmpSize)
-			pkt := header.ICMPv6(hdr.Prepend(icmpSize))
-			pkt.SetType(header.ICMPv6RouterAdvert)
-			pkt.SetCode(test.code)
-			copy(pkt.NDPPayload(), test.ndpPayload)
-			payloadLength := hdr.UsedLength()
-			pkt.SetChecksum(header.ICMPv6Checksum(pkt, test.src, header.IPv6AllNodesMulticastAddress, buffer.VectorisedView{}))
-			ip := header.IPv6(hdr.Prepend(header.IPv6MinimumSize))
-			ip.Encode(&header.IPv6Fields{
-				PayloadLength: uint16(payloadLength),
-				NextHeader:    uint8(icmp.ProtocolNumber6),
-				HopLimit:      test.hopLimit,
-				SrcAddr:       test.src,
-				DstAddr:       header.IPv6AllNodesMulticastAddress,
-			})
+					icmpSize := header.ICMPv6HeaderSize + len(test.ndpPayload)
+					hdr := buffer.NewPrependable(header.IPv6MinimumSize + icmpSize)
+					pkt := header.ICMPv6(hdr.Prepend(icmpSize))
+					pkt.SetType(header.ICMPv6RouterAdvert)
+					pkt.SetCode(test.code)
+					copy(pkt.NDPPayload(), test.ndpPayload)
+					payloadLength := hdr.UsedLength()
+					pkt.SetChecksum(header.ICMPv6Checksum(pkt, test.src, header.IPv6AllNodesMulticastAddress, buffer.VectorisedView{}))
+					ip := header.IPv6(hdr.Prepend(header.IPv6MinimumSize))
+					ip.Encode(&header.IPv6Fields{
+						PayloadLength: uint16(payloadLength),
+						NextHeader:    uint8(icmp.ProtocolNumber6),
+						HopLimit:      test.hopLimit,
+						SrcAddr:       test.src,
+						DstAddr:       header.IPv6AllNodesMulticastAddress,
+					})
 
-			stats := s.Stats().ICMP.V6PacketsReceived
-			invalid := stats.Invalid
-			rxRA := stats.RouterAdvert
+					stats := s.Stats().ICMP.V6PacketsReceived
+					invalid := stats.Invalid
+					rxRA := stats.RouterAdvert
 
-			if got := invalid.Value(); got != 0 {
-				t.Fatalf("got invalid = %d, want = 0", got)
-			}
-			if got := rxRA.Value(); got != 0 {
-				t.Fatalf("got rxRA = %d, want = 0", got)
-			}
+					if got := invalid.Value(); got != 0 {
+						t.Fatalf("got invalid = %d, want = 0", got)
+					}
+					if got := rxRA.Value(); got != 0 {
+						t.Fatalf("got rxRA = %d, want = 0", got)
+					}
 
-			e.InjectInbound(header.IPv6ProtocolNumber, stack.NewPacketBuffer(stack.PacketBufferOptions{
-				Data: hdr.View().ToVectorisedView(),
-			}))
+					e.InjectInbound(header.IPv6ProtocolNumber, stack.NewPacketBuffer(stack.PacketBufferOptions{
+						Data: hdr.View().ToVectorisedView(),
+					}))
 
-			if got := rxRA.Value(); got != 1 {
-				t.Fatalf("got rxRA = %d, want = 1", got)
-			}
+					if got := rxRA.Value(); got != 1 {
+						t.Fatalf("got rxRA = %d, want = 1", got)
+					}
 
-			if test.expectedSuccess {
-				if got := invalid.Value(); got != 0 {
-					t.Fatalf("got invalid = %d, want = 0", got)
-				}
-			} else {
-				if got := invalid.Value(); got != 1 {
-					t.Fatalf("got invalid = %d, want = 1", got)
-				}
+					if test.expectedSuccess {
+						if got := invalid.Value(); got != 0 {
+							t.Fatalf("got invalid = %d, want = 0", got)
+						}
+					} else {
+						if got := invalid.Value(); got != 1 {
+							t.Fatalf("got invalid = %d, want = 1", got)
+						}
+					}
+				})
 			}
 		})
 	}
diff --git a/pkg/tcpip/stack/forwarder_test.go b/pkg/tcpip/stack/forwarder_test.go
index 5a684eb9df..91165ebc7e 100644
--- a/pkg/tcpip/stack/forwarder_test.go
+++ b/pkg/tcpip/stack/forwarder_test.go
@@ -51,6 +51,8 @@ type fwdTestNetworkEndpoint struct {
 	ep         LinkEndpoint
 }
 
+var _ NetworkEndpoint = (*fwdTestNetworkEndpoint)(nil)
+
 func (f *fwdTestNetworkEndpoint) MTU() uint32 {
 	return f.ep.MTU() - uint32(f.MaxHeaderLength())
 }
@@ -110,11 +112,13 @@ func (*fwdTestNetworkEndpoint) Close() {}
 // resolution.
 type fwdTestNetworkProtocol struct {
 	addrCache              *linkAddrCache
+	neigh                  *neighborCache
 	addrResolveDelay       time.Duration
-	onLinkAddressResolved  func(cache *linkAddrCache, addr tcpip.Address, _ tcpip.LinkAddress)
+	onLinkAddressResolved  func(cache *linkAddrCache, neigh *neighborCache, addr tcpip.Address, _ tcpip.LinkAddress)
 	onResolveStaticAddress func(tcpip.Address) (tcpip.LinkAddress, bool)
 }
 
+var _ NetworkProtocol = (*fwdTestNetworkProtocol)(nil)
 var _ LinkAddressResolver = (*fwdTestNetworkProtocol)(nil)
 
 func (f *fwdTestNetworkProtocol) Number() tcpip.NetworkProtocolNumber {
@@ -141,7 +145,7 @@ func (*fwdTestNetworkProtocol) Parse(pkt *PacketBuffer) (tcpip.TransportProtocol
 	return tcpip.TransportProtocolNumber(netHeader[protocolNumberOffset]), true, true
 }
 
-func (f *fwdTestNetworkProtocol) NewEndpoint(nicID tcpip.NICID, linkAddrCache LinkAddressCache, dispatcher TransportDispatcher, ep LinkEndpoint, _ *Stack) NetworkEndpoint {
+func (f *fwdTestNetworkProtocol) NewEndpoint(nicID tcpip.NICID, _ LinkAddressCache, _ NUDHandler, dispatcher TransportDispatcher, ep LinkEndpoint, _ *Stack) NetworkEndpoint {
 	return &fwdTestNetworkEndpoint{
 		nicID:      nicID,
 		proto:      f,
@@ -163,9 +167,9 @@ func (f *fwdTestNetworkProtocol) Close() {}
 func (f *fwdTestNetworkProtocol) Wait() {}
 
 func (f *fwdTestNetworkProtocol) LinkAddressRequest(addr, localAddr tcpip.Address, remoteLinkAddr tcpip.LinkAddress, linkEP LinkEndpoint) *tcpip.Error {
-	if f.addrCache != nil && f.onLinkAddressResolved != nil {
+	if f.onLinkAddressResolved != nil {
 		time.AfterFunc(f.addrResolveDelay, func() {
-			f.onLinkAddressResolved(f.addrCache, addr, remoteLinkAddr)
+			f.onLinkAddressResolved(f.addrCache, f.neigh, addr, remoteLinkAddr)
 		})
 	}
 	return nil
@@ -300,13 +304,16 @@ func (e *fwdTestLinkEndpoint) AddHeader(local, remote tcpip.LinkAddress, protoco
 	panic("not implemented")
 }
 
-func fwdTestNetFactory(t *testing.T, proto *fwdTestNetworkProtocol) (ep1, ep2 *fwdTestLinkEndpoint) {
+func fwdTestNetFactory(t *testing.T, proto *fwdTestNetworkProtocol, useNeighborCache bool) (ep1, ep2 *fwdTestLinkEndpoint) {
 	// Create a stack with the network protocol and two NICs.
 	s := New(Options{
 		NetworkProtocols: []NetworkProtocol{proto},
+		UseNeighborCache: useNeighborCache,
 	})
 
-	proto.addrCache = s.linkAddrCache
+	if !useNeighborCache {
+		proto.addrCache = s.linkAddrCache
+	}
 
 	// Enable forwarding.
 	s.SetForwarding(true)
@@ -337,6 +344,15 @@ func fwdTestNetFactory(t *testing.T, proto *fwdTestNetworkProtocol) (ep1, ep2 *f
 		t.Fatal("AddAddress #2 failed:", err)
 	}
 
+	if useNeighborCache {
+		// Control the neighbor cache for NIC 2.
+		nic, ok := s.nics[2]
+		if !ok {
+			t.Fatal("failed to get the neighbor cache for NIC 2")
+		}
+		proto.neigh = nic.neigh
+	}
+
 	// Route all packets to NIC 2.
 	{
 		subnet, err := tcpip.NewSubnet("\x00", "\x00")
@@ -350,79 +366,129 @@ func fwdTestNetFactory(t *testing.T, proto *fwdTestNetworkProtocol) (ep1, ep2 *f
 }
 
 func TestForwardingWithStaticResolver(t *testing.T) {
-	// Create a network protocol with a static resolver.
-	proto := &fwdTestNetworkProtocol{
-		onResolveStaticAddress:
-		// The network address 3 is resolved to the link address "c".
-		func(addr tcpip.Address) (tcpip.LinkAddress, bool) {
-			if addr == "\x03" {
-				return "c", true
-			}
-			return "", false
+	tests := []struct {
+		name             string
+		useNeighborCache bool
+	}{
+		{
+			name:             "linkAddrCache",
+			useNeighborCache: false,
+		},
+		{
+			name:             "neighborCache",
+			useNeighborCache: true,
 		},
 	}
 
-	ep1, ep2 := fwdTestNetFactory(t, proto)
+	for _, test := range tests {
+		t.Run(test.name, func(t *testing.T) {
+			// Create a network protocol with a static resolver.
+			proto := &fwdTestNetworkProtocol{
+				onResolveStaticAddress:
+				// The network address 3 is resolved to the link address "c".
+				func(addr tcpip.Address) (tcpip.LinkAddress, bool) {
+					if addr == "\x03" {
+						return "c", true
+					}
+					return "", false
+				},
+			}
 
-	// Inject an inbound packet to address 3 on NIC 1, and see if it is
-	// forwarded to NIC 2.
-	buf := buffer.NewView(30)
-	buf[dstAddrOffset] = 3
-	ep1.InjectInbound(fwdTestNetNumber, NewPacketBuffer(PacketBufferOptions{
-		Data: buf.ToVectorisedView(),
-	}))
+			ep1, ep2 := fwdTestNetFactory(t, proto, test.useNeighborCache)
 
-	var p fwdTestPacketInfo
+			// Inject an inbound packet to address 3 on NIC 1, and see if it is
+			// forwarded to NIC 2.
+			buf := buffer.NewView(30)
+			buf[dstAddrOffset] = 3
+			ep1.InjectInbound(fwdTestNetNumber, NewPacketBuffer(PacketBufferOptions{
+				Data: buf.ToVectorisedView(),
+			}))
 
-	select {
-	case p = <-ep2.C:
-	default:
-		t.Fatal("packet not forwarded")
-	}
+			var p fwdTestPacketInfo
 
-	// Test that the static address resolution happened correctly.
-	if p.RemoteLinkAddress != "c" {
-		t.Fatalf("got p.RemoteLinkAddress = %s, want = c", p.RemoteLinkAddress)
-	}
-	if p.LocalLinkAddress != "b" {
-		t.Fatalf("got p.LocalLinkAddress = %s, want = b", p.LocalLinkAddress)
+			select {
+			case p = <-ep2.C:
+			default:
+				t.Fatal("packet not forwarded")
+			}
+
+			// Test that the static address resolution happened correctly.
+			if p.RemoteLinkAddress != "c" {
+				t.Fatalf("got p.RemoteLinkAddress = %s, want = c", p.RemoteLinkAddress)
+			}
+			if p.LocalLinkAddress != "b" {
+				t.Fatalf("got p.LocalLinkAddress = %s, want = b", p.LocalLinkAddress)
+			}
+		})
 	}
 }
 
 func TestForwardingWithFakeResolver(t *testing.T) {
-	// Create a network protocol with a fake resolver.
-	proto := &fwdTestNetworkProtocol{
-		addrResolveDelay: 500 * time.Millisecond,
-		onLinkAddressResolved: func(cache *linkAddrCache, addr tcpip.Address, _ tcpip.LinkAddress) {
-			// Any address will be resolved to the link address "c".
-			cache.add(tcpip.FullAddress{NIC: 2, Addr: addr}, "c")
+	tests := []struct {
+		name             string
+		useNeighborCache bool
+		proto            *fwdTestNetworkProtocol
+	}{
+		{
+			name:             "linkAddrCache",
+			useNeighborCache: false,
+			proto: &fwdTestNetworkProtocol{
+				addrResolveDelay: 500 * time.Millisecond,
+				onLinkAddressResolved: func(cache *linkAddrCache, neigh *neighborCache, addr tcpip.Address, _ tcpip.LinkAddress) {
+					// Any address will be resolved to the link address "c".
+					cache.add(tcpip.FullAddress{NIC: 2, Addr: addr}, "c")
+				},
+			},
+		},
+		{
+			name:             "neighborCache",
+			useNeighborCache: true,
+			proto: &fwdTestNetworkProtocol{
+				addrResolveDelay: 500 * time.Millisecond,
+				onLinkAddressResolved: func(cache *linkAddrCache, neigh *neighborCache, addr tcpip.Address, remoteLinkAddr tcpip.LinkAddress) {
+					t.Helper()
+					if len(remoteLinkAddr) != 0 {
+						t.Fatalf("got remoteLinkAddr=%q, want unspecified", remoteLinkAddr)
+					}
+					// Any address will be resolved to the link address "c".
+					neigh.HandleConfirmation(addr, "c", ReachabilityConfirmationFlags{
+						Solicited: true,
+						Override:  false,
+						IsRouter:  false,
+					})
+				},
+			},
 		},
 	}
 
-	ep1, ep2 := fwdTestNetFactory(t, proto)
+	for _, test := range tests {
+		t.Run(test.name, func(t *testing.T) {
+			ep1, ep2 := fwdTestNetFactory(t, test.proto, test.useNeighborCache)
 
-	// Inject an inbound packet to address 3 on NIC 1, and see if it is
-	// forwarded to NIC 2.
-	buf := buffer.NewView(30)
-	buf[dstAddrOffset] = 3
-	ep1.InjectInbound(fwdTestNetNumber, NewPacketBuffer(PacketBufferOptions{
-		Data: buf.ToVectorisedView(),
-	}))
+			// Inject an inbound packet to address 3 on NIC 1, and see if it is
+			// forwarded to NIC 2.
+			buf := buffer.NewView(30)
+			buf[dstAddrOffset] = 3
+			ep1.InjectInbound(fwdTestNetNumber, NewPacketBuffer(PacketBufferOptions{
+				Data: buf.ToVectorisedView(),
+			}))
 
-	var p fwdTestPacketInfo
+			var p fwdTestPacketInfo
 
-	select {
-	case p = <-ep2.C:
-	case <-time.After(time.Second):
-		t.Fatal("packet not forwarded")
-	}
+			select {
+			case p = <-ep2.C:
+			case <-time.After(time.Second):
+				t.Fatal("packet not forwarded")
+			}
 
-	// Test that the address resolution happened correctly.
-	if p.RemoteLinkAddress != "c" {
-		t.Fatalf("got p.RemoteLinkAddress = %s, want = c", p.RemoteLinkAddress)
-	}
-	if p.LocalLinkAddress != "b" {
-		t.Fatalf("got p.LocalLinkAddress = %s, want = b", p.LocalLinkAddress)
+			// Test that the address resolution happened correctly.
+			if p.RemoteLinkAddress != "c" {
+				t.Fatalf("got p.RemoteLinkAddress = %s, want = c", p.RemoteLinkAddress)
+			}
+			if p.LocalLinkAddress != "b" {
+				t.Fatalf("got p.LocalLinkAddress = %s, want = b", p.LocalLinkAddress)
+			}
+		})
 	}
 }
 
@@ -430,7 +496,9 @@ func TestForwardingWithNoResolver(t *testing.T) {
 	// Create a network protocol without a resolver.
 	proto := &fwdTestNetworkProtocol{}
 
-	ep1, ep2 := fwdTestNetFactory(t, proto)
+	// Whether or not we use the neighbor cache here does not matter since
+	// neither linkAddrCache nor neighborCache will be used.
+	ep1, ep2 := fwdTestNetFactory(t, proto, false /* useNeighborCache */)
 
 	// inject an inbound packet to address 3 on NIC 1, and see if it is
 	// forwarded to NIC 2.
@@ -448,203 +516,334 @@ func TestForwardingWithNoResolver(t *testing.T) {
 }
 
 func TestForwardingWithFakeResolverPartialTimeout(t *testing.T) {
-	// Create a network protocol with a fake resolver.
-	proto := &fwdTestNetworkProtocol{
-		addrResolveDelay: 500 * time.Millisecond,
-		onLinkAddressResolved: func(cache *linkAddrCache, addr tcpip.Address, _ tcpip.LinkAddress) {
-			// Only packets to address 3 will be resolved to the
-			// link address "c".
-			if addr == "\x03" {
-				cache.add(tcpip.FullAddress{NIC: 2, Addr: addr}, "c")
-			}
+	tests := []struct {
+		name             string
+		useNeighborCache bool
+		proto            *fwdTestNetworkProtocol
+	}{
+		{
+			name:             "linkAddrCache",
+			useNeighborCache: false,
+			proto: &fwdTestNetworkProtocol{
+				addrResolveDelay: 500 * time.Millisecond,
+				onLinkAddressResolved: func(cache *linkAddrCache, neigh *neighborCache, addr tcpip.Address, _ tcpip.LinkAddress) {
+					// Only packets to address 3 will be resolved to the
+					// link address "c".
+					if addr == "\x03" {
+						cache.add(tcpip.FullAddress{NIC: 2, Addr: addr}, "c")
+					}
+				},
+			},
+		},
+		{
+			name:             "neighborCache",
+			useNeighborCache: true,
+			proto: &fwdTestNetworkProtocol{
+				addrResolveDelay: 500 * time.Millisecond,
+				onLinkAddressResolved: func(cache *linkAddrCache, neigh *neighborCache, addr tcpip.Address, remoteLinkAddr tcpip.LinkAddress) {
+					t.Helper()
+					if len(remoteLinkAddr) != 0 {
+						t.Fatalf("got remoteLinkAddr=%q, want unspecified", remoteLinkAddr)
+					}
+					// Only packets to address 3 will be resolved to the
+					// link address "c".
+					if addr == "\x03" {
+						neigh.HandleConfirmation(addr, "c", ReachabilityConfirmationFlags{
+							Solicited: true,
+							Override:  false,
+							IsRouter:  false,
+						})
+					}
+				},
+			},
 		},
 	}
 
-	ep1, ep2 := fwdTestNetFactory(t, proto)
-
-	// Inject an inbound packet to address 4 on NIC 1. This packet should
-	// not be forwarded.
-	buf := buffer.NewView(30)
-	buf[dstAddrOffset] = 4
-	ep1.InjectInbound(fwdTestNetNumber, NewPacketBuffer(PacketBufferOptions{
-		Data: buf.ToVectorisedView(),
-	}))
-
-	// Inject an inbound packet to address 3 on NIC 1, and see if it is
-	// forwarded to NIC 2.
-	buf = buffer.NewView(30)
-	buf[dstAddrOffset] = 3
-	ep1.InjectInbound(fwdTestNetNumber, NewPacketBuffer(PacketBufferOptions{
-		Data: buf.ToVectorisedView(),
-	}))
-
-	var p fwdTestPacketInfo
-
-	select {
-	case p = <-ep2.C:
-	case <-time.After(time.Second):
-		t.Fatal("packet not forwarded")
-	}
+	for _, test := range tests {
+		t.Run(test.name, func(t *testing.T) {
+			ep1, ep2 := fwdTestNetFactory(t, test.proto, test.useNeighborCache)
+
+			// Inject an inbound packet to address 4 on NIC 1. This packet should
+			// not be forwarded.
+			buf := buffer.NewView(30)
+			buf[dstAddrOffset] = 4
+			ep1.InjectInbound(fwdTestNetNumber, NewPacketBuffer(PacketBufferOptions{
+				Data: buf.ToVectorisedView(),
+			}))
+
+			// Inject an inbound packet to address 3 on NIC 1, and see if it is
+			// forwarded to NIC 2.
+			buf = buffer.NewView(30)
+			buf[dstAddrOffset] = 3
+			ep1.InjectInbound(fwdTestNetNumber, NewPacketBuffer(PacketBufferOptions{
+				Data: buf.ToVectorisedView(),
+			}))
+
+			var p fwdTestPacketInfo
+
+			select {
+			case p = <-ep2.C:
+			case <-time.After(time.Second):
+				t.Fatal("packet not forwarded")
+			}
 
-	if nh := PayloadSince(p.Pkt.NetworkHeader()); nh[dstAddrOffset] != 3 {
-		t.Fatalf("got p.Pkt.NetworkHeader[dstAddrOffset] = %d, want = 3", nh[dstAddrOffset])
-	}
+			if nh := PayloadSince(p.Pkt.NetworkHeader()); nh[dstAddrOffset] != 3 {
+				t.Fatalf("got p.Pkt.NetworkHeader[dstAddrOffset] = %d, want = 3", nh[dstAddrOffset])
+			}
 
-	// Test that the address resolution happened correctly.
-	if p.RemoteLinkAddress != "c" {
-		t.Fatalf("got p.RemoteLinkAddress = %s, want = c", p.RemoteLinkAddress)
-	}
-	if p.LocalLinkAddress != "b" {
-		t.Fatalf("got p.LocalLinkAddress = %s, want = b", p.LocalLinkAddress)
+			// Test that the address resolution happened correctly.
+			if p.RemoteLinkAddress != "c" {
+				t.Fatalf("got p.RemoteLinkAddress = %s, want = c", p.RemoteLinkAddress)
+			}
+			if p.LocalLinkAddress != "b" {
+				t.Fatalf("got p.LocalLinkAddress = %s, want = b", p.LocalLinkAddress)
+			}
+		})
 	}
 }
 
 func TestForwardingWithFakeResolverTwoPackets(t *testing.T) {
-	// Create a network protocol with a fake resolver.
-	proto := &fwdTestNetworkProtocol{
-		addrResolveDelay: 500 * time.Millisecond,
-		onLinkAddressResolved: func(cache *linkAddrCache, addr tcpip.Address, _ tcpip.LinkAddress) {
-			// Any packets will be resolved to the link address "c".
-			cache.add(tcpip.FullAddress{NIC: 2, Addr: addr}, "c")
+	tests := []struct {
+		name             string
+		useNeighborCache bool
+		proto            *fwdTestNetworkProtocol
+	}{
+		{
+			name:             "linkAddrCache",
+			useNeighborCache: false,
+			proto: &fwdTestNetworkProtocol{
+				addrResolveDelay: 500 * time.Millisecond,
+				onLinkAddressResolved: func(cache *linkAddrCache, neigh *neighborCache, addr tcpip.Address, _ tcpip.LinkAddress) {
+					// Any packets will be resolved to the link address "c".
+					cache.add(tcpip.FullAddress{NIC: 2, Addr: addr}, "c")
+				},
+			},
+		},
+		{
+			name:             "neighborCache",
+			useNeighborCache: true,
+			proto: &fwdTestNetworkProtocol{
+				addrResolveDelay: 500 * time.Millisecond,
+				onLinkAddressResolved: func(cache *linkAddrCache, neigh *neighborCache, addr tcpip.Address, remoteLinkAddr tcpip.LinkAddress) {
+					t.Helper()
+					if len(remoteLinkAddr) != 0 {
+						t.Fatalf("got remoteLinkAddr=%q, want unspecified", remoteLinkAddr)
+					}
+					// Any packets will be resolved to the link address "c".
+					neigh.HandleConfirmation(addr, "c", ReachabilityConfirmationFlags{
+						Solicited: true,
+						Override:  false,
+						IsRouter:  false,
+					})
+				},
+			},
 		},
 	}
 
-	ep1, ep2 := fwdTestNetFactory(t, proto)
-
-	// Inject two inbound packets to address 3 on NIC 1.
-	for i := 0; i < 2; i++ {
-		buf := buffer.NewView(30)
-		buf[dstAddrOffset] = 3
-		ep1.InjectInbound(fwdTestNetNumber, NewPacketBuffer(PacketBufferOptions{
-			Data: buf.ToVectorisedView(),
-		}))
-	}
-
-	for i := 0; i < 2; i++ {
-		var p fwdTestPacketInfo
-
-		select {
-		case p = <-ep2.C:
-		case <-time.After(time.Second):
-			t.Fatal("packet not forwarded")
-		}
+	for _, test := range tests {
+		t.Run(test.name, func(t *testing.T) {
+			ep1, ep2 := fwdTestNetFactory(t, test.proto, test.useNeighborCache)
 
-		if nh := PayloadSince(p.Pkt.NetworkHeader()); nh[dstAddrOffset] != 3 {
-			t.Fatalf("got p.Pkt.NetworkHeader[dstAddrOffset] = %d, want = 3", nh[dstAddrOffset])
-		}
+			// Inject two inbound packets to address 3 on NIC 1.
+			for i := 0; i < 2; i++ {
+				buf := buffer.NewView(30)
+				buf[dstAddrOffset] = 3
+				ep1.InjectInbound(fwdTestNetNumber, NewPacketBuffer(PacketBufferOptions{
+					Data: buf.ToVectorisedView(),
+				}))
+			}
 
-		// Test that the address resolution happened correctly.
-		if p.RemoteLinkAddress != "c" {
-			t.Fatalf("got p.RemoteLinkAddress = %s, want = c", p.RemoteLinkAddress)
-		}
-		if p.LocalLinkAddress != "b" {
-			t.Fatalf("got p.LocalLinkAddress = %s, want = b", p.LocalLinkAddress)
-		}
+			for i := 0; i < 2; i++ {
+				var p fwdTestPacketInfo
+
+				select {
+				case p = <-ep2.C:
+				case <-time.After(time.Second):
+					t.Fatal("packet not forwarded")
+				}
+
+				if nh := PayloadSince(p.Pkt.NetworkHeader()); nh[dstAddrOffset] != 3 {
+					t.Fatalf("got p.Pkt.NetworkHeader[dstAddrOffset] = %d, want = 3", nh[dstAddrOffset])
+				}
+
+				// Test that the address resolution happened correctly.
+				if p.RemoteLinkAddress != "c" {
+					t.Fatalf("got p.RemoteLinkAddress = %s, want = c", p.RemoteLinkAddress)
+				}
+				if p.LocalLinkAddress != "b" {
+					t.Fatalf("got p.LocalLinkAddress = %s, want = b", p.LocalLinkAddress)
+				}
+			}
+		})
 	}
 }
 
 func TestForwardingWithFakeResolverManyPackets(t *testing.T) {
-	// Create a network protocol with a fake resolver.
-	proto := &fwdTestNetworkProtocol{
-		addrResolveDelay: 500 * time.Millisecond,
-		onLinkAddressResolved: func(cache *linkAddrCache, addr tcpip.Address, _ tcpip.LinkAddress) {
-			// Any packets will be resolved to the link address "c".
-			cache.add(tcpip.FullAddress{NIC: 2, Addr: addr}, "c")
+	tests := []struct {
+		name             string
+		useNeighborCache bool
+		proto            *fwdTestNetworkProtocol
+	}{
+		{
+			name:             "linkAddrCache",
+			useNeighborCache: false,
+			proto: &fwdTestNetworkProtocol{
+				addrResolveDelay: 500 * time.Millisecond,
+				onLinkAddressResolved: func(cache *linkAddrCache, neigh *neighborCache, addr tcpip.Address, _ tcpip.LinkAddress) {
+					// Any packets will be resolved to the link address "c".
+					cache.add(tcpip.FullAddress{NIC: 2, Addr: addr}, "c")
+				},
+			},
+		},
+		{
+			name:             "neighborCache",
+			useNeighborCache: true,
+			proto: &fwdTestNetworkProtocol{
+				addrResolveDelay: 500 * time.Millisecond,
+				onLinkAddressResolved: func(cache *linkAddrCache, neigh *neighborCache, addr tcpip.Address, remoteLinkAddr tcpip.LinkAddress) {
+					t.Helper()
+					if len(remoteLinkAddr) != 0 {
+						t.Fatalf("got remoteLinkAddr=%q, want unspecified", remoteLinkAddr)
+					}
+					// Any packets will be resolved to the link address "c".
+					neigh.HandleConfirmation(addr, "c", ReachabilityConfirmationFlags{
+						Solicited: true,
+						Override:  false,
+						IsRouter:  false,
+					})
+				},
+			},
 		},
 	}
 
-	ep1, ep2 := fwdTestNetFactory(t, proto)
-
-	for i := 0; i < maxPendingPacketsPerResolution+5; i++ {
-		// Inject inbound 'maxPendingPacketsPerResolution + 5' packets on NIC 1.
-		buf := buffer.NewView(30)
-		buf[dstAddrOffset] = 3
-		// Set the packet sequence number.
-		binary.BigEndian.PutUint16(buf[fwdTestNetHeaderLen:], uint16(i))
-		ep1.InjectInbound(fwdTestNetNumber, NewPacketBuffer(PacketBufferOptions{
-			Data: buf.ToVectorisedView(),
-		}))
-	}
-
-	for i := 0; i < maxPendingPacketsPerResolution; i++ {
-		var p fwdTestPacketInfo
+	for _, test := range tests {
+		t.Run(test.name, func(t *testing.T) {
+			ep1, ep2 := fwdTestNetFactory(t, test.proto, test.useNeighborCache)
 
-		select {
-		case p = <-ep2.C:
-		case <-time.After(time.Second):
-			t.Fatal("packet not forwarded")
-		}
-
-		b := PayloadSince(p.Pkt.NetworkHeader())
-		if b[dstAddrOffset] != 3 {
-			t.Fatalf("got b[dstAddrOffset] = %d, want = 3", b[dstAddrOffset])
-		}
-		if len(b) < fwdTestNetHeaderLen+2 {
-			t.Fatalf("packet is too short to hold a sequence number: len(b) = %d", b)
-		}
-		seqNumBuf := b[fwdTestNetHeaderLen:]
-
-		// The first 5 packets should not be forwarded so the sequence number should
-		// start with 5.
-		want := uint16(i + 5)
-		if n := binary.BigEndian.Uint16(seqNumBuf); n != want {
-			t.Fatalf("got the packet #%d, want = #%d", n, want)
-		}
+			for i := 0; i < maxPendingPacketsPerResolution+5; i++ {
+				// Inject inbound 'maxPendingPacketsPerResolution + 5' packets on NIC 1.
+				buf := buffer.NewView(30)
+				buf[dstAddrOffset] = 3
+				// Set the packet sequence number.
+				binary.BigEndian.PutUint16(buf[fwdTestNetHeaderLen:], uint16(i))
+				ep1.InjectInbound(fwdTestNetNumber, NewPacketBuffer(PacketBufferOptions{
+					Data: buf.ToVectorisedView(),
+				}))
+			}
 
-		// Test that the address resolution happened correctly.
-		if p.RemoteLinkAddress != "c" {
-			t.Fatalf("got p.RemoteLinkAddress = %s, want = c", p.RemoteLinkAddress)
-		}
-		if p.LocalLinkAddress != "b" {
-			t.Fatalf("got p.LocalLinkAddress = %s, want = b", p.LocalLinkAddress)
-		}
+			for i := 0; i < maxPendingPacketsPerResolution; i++ {
+				var p fwdTestPacketInfo
+
+				select {
+				case p = <-ep2.C:
+				case <-time.After(time.Second):
+					t.Fatal("packet not forwarded")
+				}
+
+				b := PayloadSince(p.Pkt.NetworkHeader())
+				if b[dstAddrOffset] != 3 {
+					t.Fatalf("got b[dstAddrOffset] = %d, want = 3", b[dstAddrOffset])
+				}
+				if len(b) < fwdTestNetHeaderLen+2 {
+					t.Fatalf("packet is too short to hold a sequence number: len(b) = %d", b)
+				}
+				seqNumBuf := b[fwdTestNetHeaderLen:]
+
+				// The first 5 packets should not be forwarded so the sequence number should
+				// start with 5.
+				want := uint16(i + 5)
+				if n := binary.BigEndian.Uint16(seqNumBuf); n != want {
+					t.Fatalf("got the packet #%d, want = #%d", n, want)
+				}
+
+				// Test that the address resolution happened correctly.
+				if p.RemoteLinkAddress != "c" {
+					t.Fatalf("got p.RemoteLinkAddress = %s, want = c", p.RemoteLinkAddress)
+				}
+				if p.LocalLinkAddress != "b" {
+					t.Fatalf("got p.LocalLinkAddress = %s, want = b", p.LocalLinkAddress)
+				}
+			}
+		})
 	}
 }
 
 func TestForwardingWithFakeResolverManyResolutions(t *testing.T) {
-	// Create a network protocol with a fake resolver.
-	proto := &fwdTestNetworkProtocol{
-		addrResolveDelay: 500 * time.Millisecond,
-		onLinkAddressResolved: func(cache *linkAddrCache, addr tcpip.Address, _ tcpip.LinkAddress) {
-			// Any packets will be resolved to the link address "c".
-			cache.add(tcpip.FullAddress{NIC: 2, Addr: addr}, "c")
+	tests := []struct {
+		name             string
+		useNeighborCache bool
+		proto            *fwdTestNetworkProtocol
+	}{
+		{
+			name:             "linkAddrCache",
+			useNeighborCache: false,
+			proto: &fwdTestNetworkProtocol{
+				addrResolveDelay: 500 * time.Millisecond,
+				onLinkAddressResolved: func(cache *linkAddrCache, neigh *neighborCache, addr tcpip.Address, _ tcpip.LinkAddress) {
+					// Any packets will be resolved to the link address "c".
+					cache.add(tcpip.FullAddress{NIC: 2, Addr: addr}, "c")
+				},
+			},
+		},
+		{
+			name:             "neighborCache",
+			useNeighborCache: true,
+			proto: &fwdTestNetworkProtocol{
+				addrResolveDelay: 500 * time.Millisecond,
+				onLinkAddressResolved: func(cache *linkAddrCache, neigh *neighborCache, addr tcpip.Address, remoteLinkAddr tcpip.LinkAddress) {
+					t.Helper()
+					if len(remoteLinkAddr) != 0 {
+						t.Fatalf("got remoteLinkAddr=%q, want unspecified", remoteLinkAddr)
+					}
+					// Any packets will be resolved to the link address "c".
+					neigh.HandleConfirmation(addr, "c", ReachabilityConfirmationFlags{
+						Solicited: true,
+						Override:  false,
+						IsRouter:  false,
+					})
+				},
+			},
 		},
 	}
 
-	ep1, ep2 := fwdTestNetFactory(t, proto)
-
-	for i := 0; i < maxPendingResolutions+5; i++ {
-		// Inject inbound 'maxPendingResolutions + 5' packets on NIC 1.
-		// Each packet has a different destination address (3 to
-		// maxPendingResolutions + 7).
-		buf := buffer.NewView(30)
-		buf[dstAddrOffset] = byte(3 + i)
-		ep1.InjectInbound(fwdTestNetNumber, NewPacketBuffer(PacketBufferOptions{
-			Data: buf.ToVectorisedView(),
-		}))
-	}
-
-	for i := 0; i < maxPendingResolutions; i++ {
-		var p fwdTestPacketInfo
-
-		select {
-		case p = <-ep2.C:
-		case <-time.After(time.Second):
-			t.Fatal("packet not forwarded")
-		}
+	for _, test := range tests {
+		t.Run(test.name, func(t *testing.T) {
+			ep1, ep2 := fwdTestNetFactory(t, test.proto, test.useNeighborCache)
 
-		// The first 5 packets (address 3 to 7) should not be forwarded
-		// because their address resolutions are interrupted.
-		if nh := PayloadSince(p.Pkt.NetworkHeader()); nh[dstAddrOffset] < 8 {
-			t.Fatalf("got p.Pkt.NetworkHeader[dstAddrOffset] = %d, want p.Pkt.NetworkHeader[dstAddrOffset] >= 8", nh[dstAddrOffset])
-		}
+			for i := 0; i < maxPendingResolutions+5; i++ {
+				// Inject inbound 'maxPendingResolutions + 5' packets on NIC 1.
+				// Each packet has a different destination address (3 to
+				// maxPendingResolutions + 7).
+				buf := buffer.NewView(30)
+				buf[dstAddrOffset] = byte(3 + i)
+				ep1.InjectInbound(fwdTestNetNumber, NewPacketBuffer(PacketBufferOptions{
+					Data: buf.ToVectorisedView(),
+				}))
+			}
 
-		// Test that the address resolution happened correctly.
-		if p.RemoteLinkAddress != "c" {
-			t.Fatalf("got p.RemoteLinkAddress = %s, want = c", p.RemoteLinkAddress)
-		}
-		if p.LocalLinkAddress != "b" {
-			t.Fatalf("got p.LocalLinkAddress = %s, want = b", p.LocalLinkAddress)
-		}
+			for i := 0; i < maxPendingResolutions; i++ {
+				var p fwdTestPacketInfo
+
+				select {
+				case p = <-ep2.C:
+				case <-time.After(time.Second):
+					t.Fatal("packet not forwarded")
+				}
+
+				// The first 5 packets (address 3 to 7) should not be forwarded
+				// because their address resolutions are interrupted.
+				if nh := PayloadSince(p.Pkt.NetworkHeader()); nh[dstAddrOffset] < 8 {
+					t.Fatalf("got p.Pkt.NetworkHeader[dstAddrOffset] = %d, want p.Pkt.NetworkHeader[dstAddrOffset] >= 8", nh[dstAddrOffset])
+				}
+
+				// Test that the address resolution happened correctly.
+				if p.RemoteLinkAddress != "c" {
+					t.Fatalf("got p.RemoteLinkAddress = %s, want = c", p.RemoteLinkAddress)
+				}
+				if p.LocalLinkAddress != "b" {
+					t.Fatalf("got p.LocalLinkAddress = %s, want = b", p.LocalLinkAddress)
+				}
+			}
+		})
 	}
 }
diff --git a/pkg/tcpip/stack/linkaddrcache_test.go b/pkg/tcpip/stack/linkaddrcache_test.go
index b15b8d1cb8..14fb4239bd 100644
--- a/pkg/tcpip/stack/linkaddrcache_test.go
+++ b/pkg/tcpip/stack/linkaddrcache_test.go
@@ -275,3 +275,71 @@ func TestStaticResolution(t *testing.T) {
 		t.Errorf("c.get(%q)=%q, want %q", string(addr), string(got), string(want))
 	}
 }
+
+// TestCacheWaker verifies that RemoveWaker removes a waker previously added
+// through get().
+func TestCacheWaker(t *testing.T) {
+	c := newLinkAddrCache(1<<63-1, 1*time.Second, 3)
+
+	// First, sanity check that wakers are working.
+	{
+		linkRes := &testLinkAddressResolver{cache: c}
+		s := sleep.Sleeper{}
+		defer s.Done()
+
+		const wakerID = 1
+		w := sleep.Waker{}
+		s.AddWaker(&w, wakerID)
+
+		e := testAddrs[0]
+
+		if _, _, err := c.get(e.addr, linkRes, "", nil, &w); err != tcpip.ErrWouldBlock {
+			t.Fatalf("got c.get(%q, _, _, _, _) = %s, want = %s", e.addr.Addr, err, tcpip.ErrWouldBlock)
+		}
+		id, ok := s.Fetch(true /* block */)
+		if !ok {
+			t.Fatal("got s.Fetch(true) = (_, false), want = (_, true)")
+		}
+		if id != wakerID {
+			t.Fatalf("got s.Fetch(true) = (%d, %t), want = (%d, true)", id, ok, wakerID)
+		}
+
+		if got, _, err := c.get(e.addr, linkRes, "", nil, nil); err != nil {
+			t.Fatalf("c.get(%q, _, _, _, _): %s", e.addr.Addr, err)
+		} else if got != e.linkAddr {
+			t.Fatalf("got c.get(%q) = %q, want = %q", e.addr.Addr, got, e.linkAddr)
+		}
+	}
+
+	// Check that RemoveWaker works.
+	{
+		linkRes := &testLinkAddressResolver{cache: c}
+		s := sleep.Sleeper{}
+		defer s.Done()
+
+		const wakerID = 2 // different than the ID used in the sanity check
+		w := sleep.Waker{}
+		s.AddWaker(&w, wakerID)
+
+		e := testAddrs[1]
+		linkRes.onLinkAddressRequest = func() {
+			// Remove the waker before the linkAddrCache has the opportunity to send
+			// a notification.
+			c.removeWaker(e.addr, &w)
+		}
+
+		if _, _, err := c.get(e.addr, linkRes, "", nil, &w); err != tcpip.ErrWouldBlock {
+			t.Fatalf("got c.get(%q, _, _, _, _) = %s, want = %s", e.addr.Addr, err, tcpip.ErrWouldBlock)
+		}
+
+		if got, err := getBlocking(c, e.addr, linkRes); err != nil {
+			t.Fatalf("c.get(%q, _, _, _, _): %s", e.addr.Addr, err)
+		} else if got != e.linkAddr {
+			t.Fatalf("c.get(%q) = %q, want = %q", e.addr.Addr, got, e.linkAddr)
+		}
+
+		if id, ok := s.Fetch(false /* block */); ok {
+			t.Fatalf("unexpected notification from waker with id %d", id)
+		}
+	}
+}
diff --git a/pkg/tcpip/stack/ndp_test.go b/pkg/tcpip/stack/ndp_test.go
index 21bf530101..67dc5364fc 100644
--- a/pkg/tcpip/stack/ndp_test.go
+++ b/pkg/tcpip/stack/ndp_test.go
@@ -2787,7 +2787,7 @@ func TestMixedSLAACAddrConflictRegen(t *testing.T) {
 // stack.Stack will have a default route through the router (llAddr3) installed
 // and a static link-address (linkAddr3) added to the link address cache for the
 // router.
-func stackAndNdpDispatcherWithDefaultRoute(t *testing.T, nicID tcpip.NICID) (*ndpDispatcher, *channel.Endpoint, *stack.Stack) {
+func stackAndNdpDispatcherWithDefaultRoute(t *testing.T, nicID tcpip.NICID, useNeighborCache bool) (*ndpDispatcher, *channel.Endpoint, *stack.Stack) {
 	t.Helper()
 	ndpDisp := &ndpDispatcher{
 		autoGenAddrC: make(chan ndpAutoGenAddrEvent, 1),
@@ -2800,7 +2800,8 @@ func stackAndNdpDispatcherWithDefaultRoute(t *testing.T, nicID tcpip.NICID) (*nd
 			HandleRAs:              true,
 			AutoGenGlobalAddresses: true,
 		},
-		NDPDisp: ndpDisp,
+		NDPDisp:          ndpDisp,
+		UseNeighborCache: useNeighborCache,
 	})
 	if err := s.CreateNIC(nicID, e); err != nil {
 		t.Fatalf("CreateNIC(%d, _) = %s", nicID, err)
@@ -2810,7 +2811,11 @@ func stackAndNdpDispatcherWithDefaultRoute(t *testing.T, nicID tcpip.NICID) (*nd
 		Gateway:     llAddr3,
 		NIC:         nicID,
 	}})
-	s.AddLinkAddress(nicID, llAddr3, linkAddr3)
+	if useNeighborCache {
+		s.AddStaticNeighbor(nicID, llAddr3, linkAddr3)
+	} else {
+		s.AddLinkAddress(nicID, llAddr3, linkAddr3)
+	}
 	return ndpDisp, e, s
 }
 
@@ -2884,110 +2889,128 @@ func addrForNewConnectionWithAddr(t *testing.T, s *stack.Stack, addr tcpip.FullA
 // TestAutoGenAddrDeprecateFromPI tests deprecating a SLAAC address when
 // receiving a PI with 0 preferred lifetime.
 func TestAutoGenAddrDeprecateFromPI(t *testing.T) {
-	const nicID = 1
+	stacks := []struct {
+		name             string
+		useNeighborCache bool
+	}{
+		{
+			name:             "linkAddrCache",
+			useNeighborCache: false,
+		},
+		{
+			name:             "neighborCache",
+			useNeighborCache: true,
+		},
+	}
 
-	prefix1, _, addr1 := prefixSubnetAddr(0, linkAddr1)
-	prefix2, _, addr2 := prefixSubnetAddr(1, linkAddr1)
+	for _, stackTyp := range stacks {
+		t.Run(stackTyp.name, func(t *testing.T) {
+			const nicID = 1
 
-	ndpDisp, e, s := stackAndNdpDispatcherWithDefaultRoute(t, nicID)
+			prefix1, _, addr1 := prefixSubnetAddr(0, linkAddr1)
+			prefix2, _, addr2 := prefixSubnetAddr(1, linkAddr1)
 
-	expectAutoGenAddrEvent := func(addr tcpip.AddressWithPrefix, eventType ndpAutoGenAddrEventType) {
-		t.Helper()
+			ndpDisp, e, s := stackAndNdpDispatcherWithDefaultRoute(t, nicID, stackTyp.useNeighborCache)
 
-		select {
-		case e := <-ndpDisp.autoGenAddrC:
-			if diff := checkAutoGenAddrEvent(e, addr, eventType); diff != "" {
-				t.Errorf("auto-gen addr event mismatch (-want +got):\n%s", diff)
+			expectAutoGenAddrEvent := func(addr tcpip.AddressWithPrefix, eventType ndpAutoGenAddrEventType) {
+				t.Helper()
+
+				select {
+				case e := <-ndpDisp.autoGenAddrC:
+					if diff := checkAutoGenAddrEvent(e, addr, eventType); diff != "" {
+						t.Errorf("auto-gen addr event mismatch (-want +got):\n%s", diff)
+					}
+				default:
+					t.Fatal("expected addr auto gen event")
+				}
 			}
-		default:
-			t.Fatal("expected addr auto gen event")
-		}
-	}
 
-	expectPrimaryAddr := func(addr tcpip.AddressWithPrefix) {
-		t.Helper()
+			expectPrimaryAddr := func(addr tcpip.AddressWithPrefix) {
+				t.Helper()
 
-		if got, err := s.GetMainNICAddress(nicID, header.IPv6ProtocolNumber); err != nil {
-			t.Fatalf("s.GetMainNICAddress(%d, %d): %s", nicID, header.IPv6ProtocolNumber, err)
-		} else if got != addr {
-			t.Errorf("got s.GetMainNICAddress(%d, %d) = %s, want = %s", nicID, header.IPv6ProtocolNumber, got, addr)
-		}
+				if got, err := s.GetMainNICAddress(nicID, header.IPv6ProtocolNumber); err != nil {
+					t.Fatalf("s.GetMainNICAddress(%d, %d): %s", nicID, header.IPv6ProtocolNumber, err)
+				} else if got != addr {
+					t.Errorf("got s.GetMainNICAddress(%d, %d) = %s, want = %s", nicID, header.IPv6ProtocolNumber, got, addr)
+				}
 
-		if got := addrForNewConnection(t, s); got != addr.Address {
-			t.Errorf("got addrForNewConnection = %s, want = %s", got, addr.Address)
-		}
-	}
+				if got := addrForNewConnection(t, s); got != addr.Address {
+					t.Errorf("got addrForNewConnection = %s, want = %s", got, addr.Address)
+				}
+			}
 
-	// Receive PI for prefix1.
-	e.InjectInbound(header.IPv6ProtocolNumber, raBufWithPI(llAddr2, 0, prefix1, true, true, 100, 100))
-	expectAutoGenAddrEvent(addr1, newAddr)
-	if !containsV6Addr(s.NICInfo()[nicID].ProtocolAddresses, addr1) {
-		t.Fatalf("should have %s in the list of addresses", addr1)
-	}
-	expectPrimaryAddr(addr1)
+			// Receive PI for prefix1.
+			e.InjectInbound(header.IPv6ProtocolNumber, raBufWithPI(llAddr2, 0, prefix1, true, true, 100, 100))
+			expectAutoGenAddrEvent(addr1, newAddr)
+			if !containsV6Addr(s.NICInfo()[nicID].ProtocolAddresses, addr1) {
+				t.Fatalf("should have %s in the list of addresses", addr1)
+			}
+			expectPrimaryAddr(addr1)
 
-	// Deprecate addr for prefix1 immedaitely.
-	e.InjectInbound(header.IPv6ProtocolNumber, raBufWithPI(llAddr2, 0, prefix1, true, true, 100, 0))
-	expectAutoGenAddrEvent(addr1, deprecatedAddr)
-	if !containsV6Addr(s.NICInfo()[nicID].ProtocolAddresses, addr1) {
-		t.Fatalf("should have %s in the list of addresses", addr1)
-	}
-	// addr should still be the primary endpoint as there are no other addresses.
-	expectPrimaryAddr(addr1)
+			// Deprecate addr for prefix1 immedaitely.
+			e.InjectInbound(header.IPv6ProtocolNumber, raBufWithPI(llAddr2, 0, prefix1, true, true, 100, 0))
+			expectAutoGenAddrEvent(addr1, deprecatedAddr)
+			if !containsV6Addr(s.NICInfo()[nicID].ProtocolAddresses, addr1) {
+				t.Fatalf("should have %s in the list of addresses", addr1)
+			}
+			// addr should still be the primary endpoint as there are no other addresses.
+			expectPrimaryAddr(addr1)
 
-	// Refresh lifetimes of addr generated from prefix1.
-	e.InjectInbound(header.IPv6ProtocolNumber, raBufWithPI(llAddr2, 0, prefix1, true, true, 100, 100))
-	select {
-	case <-ndpDisp.autoGenAddrC:
-		t.Fatal("unexpectedly got an auto-generated event")
-	default:
-	}
-	expectPrimaryAddr(addr1)
+			// Refresh lifetimes of addr generated from prefix1.
+			e.InjectInbound(header.IPv6ProtocolNumber, raBufWithPI(llAddr2, 0, prefix1, true, true, 100, 100))
+			select {
+			case <-ndpDisp.autoGenAddrC:
+				t.Fatal("unexpectedly got an auto-generated event")
+			default:
+			}
+			expectPrimaryAddr(addr1)
 
-	// Receive PI for prefix2.
-	e.InjectInbound(header.IPv6ProtocolNumber, raBufWithPI(llAddr2, 0, prefix2, true, true, 100, 100))
-	expectAutoGenAddrEvent(addr2, newAddr)
-	if !containsV6Addr(s.NICInfo()[nicID].ProtocolAddresses, addr2) {
-		t.Fatalf("should have %s in the list of addresses", addr2)
-	}
-	expectPrimaryAddr(addr2)
+			// Receive PI for prefix2.
+			e.InjectInbound(header.IPv6ProtocolNumber, raBufWithPI(llAddr2, 0, prefix2, true, true, 100, 100))
+			expectAutoGenAddrEvent(addr2, newAddr)
+			if !containsV6Addr(s.NICInfo()[nicID].ProtocolAddresses, addr2) {
+				t.Fatalf("should have %s in the list of addresses", addr2)
+			}
+			expectPrimaryAddr(addr2)
 
-	// Deprecate addr for prefix2 immedaitely.
-	e.InjectInbound(header.IPv6ProtocolNumber, raBufWithPI(llAddr2, 0, prefix2, true, true, 100, 0))
-	expectAutoGenAddrEvent(addr2, deprecatedAddr)
-	if !containsV6Addr(s.NICInfo()[nicID].ProtocolAddresses, addr2) {
-		t.Fatalf("should have %s in the list of addresses", addr2)
-	}
-	// addr1 should be the primary endpoint now since addr2 is deprecated but
-	// addr1 is not.
-	expectPrimaryAddr(addr1)
-	// addr2 is deprecated but if explicitly requested, it should be used.
-	fullAddr2 := tcpip.FullAddress{Addr: addr2.Address, NIC: nicID}
-	if got := addrForNewConnectionWithAddr(t, s, fullAddr2); got != addr2.Address {
-		t.Errorf("got addrForNewConnectionWithAddr(_, _, %+v) = %s, want = %s", fullAddr2, got, addr2.Address)
-	}
+			// Deprecate addr for prefix2 immedaitely.
+			e.InjectInbound(header.IPv6ProtocolNumber, raBufWithPI(llAddr2, 0, prefix2, true, true, 100, 0))
+			expectAutoGenAddrEvent(addr2, deprecatedAddr)
+			if !containsV6Addr(s.NICInfo()[nicID].ProtocolAddresses, addr2) {
+				t.Fatalf("should have %s in the list of addresses", addr2)
+			}
+			// addr1 should be the primary endpoint now since addr2 is deprecated but
+			// addr1 is not.
+			expectPrimaryAddr(addr1)
+			// addr2 is deprecated but if explicitly requested, it should be used.
+			fullAddr2 := tcpip.FullAddress{Addr: addr2.Address, NIC: nicID}
+			if got := addrForNewConnectionWithAddr(t, s, fullAddr2); got != addr2.Address {
+				t.Errorf("got addrForNewConnectionWithAddr(_, _, %+v) = %s, want = %s", fullAddr2, got, addr2.Address)
+			}
 
-	// Another PI w/ 0 preferred lifetime should not result in a deprecation
-	// event.
-	e.InjectInbound(header.IPv6ProtocolNumber, raBufWithPI(llAddr2, 0, prefix2, true, true, 100, 0))
-	select {
-	case <-ndpDisp.autoGenAddrC:
-		t.Fatal("unexpectedly got an auto-generated event")
-	default:
-	}
-	expectPrimaryAddr(addr1)
-	if got := addrForNewConnectionWithAddr(t, s, fullAddr2); got != addr2.Address {
-		t.Errorf("got addrForNewConnectionWithAddr(_, _, %+v) = %s, want = %s", fullAddr2, got, addr2.Address)
-	}
+			// Another PI w/ 0 preferred lifetime should not result in a deprecation
+			// event.
+			e.InjectInbound(header.IPv6ProtocolNumber, raBufWithPI(llAddr2, 0, prefix2, true, true, 100, 0))
+			select {
+			case <-ndpDisp.autoGenAddrC:
+				t.Fatal("unexpectedly got an auto-generated event")
+			default:
+			}
+			expectPrimaryAddr(addr1)
+			if got := addrForNewConnectionWithAddr(t, s, fullAddr2); got != addr2.Address {
+				t.Errorf("got addrForNewConnectionWithAddr(_, _, %+v) = %s, want = %s", fullAddr2, got, addr2.Address)
+			}
 
-	// Refresh lifetimes of addr generated from prefix2.
-	e.InjectInbound(header.IPv6ProtocolNumber, raBufWithPI(llAddr2, 0, prefix2, true, true, 100, 100))
-	select {
-	case <-ndpDisp.autoGenAddrC:
-		t.Fatal("unexpectedly got an auto-generated event")
-	default:
+			// Refresh lifetimes of addr generated from prefix2.
+			e.InjectInbound(header.IPv6ProtocolNumber, raBufWithPI(llAddr2, 0, prefix2, true, true, 100, 100))
+			select {
+			case <-ndpDisp.autoGenAddrC:
+				t.Fatal("unexpectedly got an auto-generated event")
+			default:
+			}
+			expectPrimaryAddr(addr2)
+		})
 	}
-	expectPrimaryAddr(addr2)
 }
 
 // TestAutoGenAddrJobDeprecation tests that an address is properly deprecated
@@ -2996,217 +3019,236 @@ func TestAutoGenAddrJobDeprecation(t *testing.T) {
 	const nicID = 1
 	const newMinVL = 2
 	newMinVLDuration := newMinVL * time.Second
-	saved := stack.MinPrefixInformationValidLifetimeForUpdate
-	defer func() {
-		stack.MinPrefixInformationValidLifetimeForUpdate = saved
-	}()
-	stack.MinPrefixInformationValidLifetimeForUpdate = newMinVLDuration
 
-	prefix1, _, addr1 := prefixSubnetAddr(0, linkAddr1)
-	prefix2, _, addr2 := prefixSubnetAddr(1, linkAddr1)
+	stacks := []struct {
+		name             string
+		useNeighborCache bool
+	}{
+		{
+			name:             "linkAddrCache",
+			useNeighborCache: false,
+		},
+		{
+			name:             "neighborCache",
+			useNeighborCache: true,
+		},
+	}
 
-	ndpDisp, e, s := stackAndNdpDispatcherWithDefaultRoute(t, nicID)
+	for _, stackTyp := range stacks {
+		t.Run(stackTyp.name, func(t *testing.T) {
+			saved := stack.MinPrefixInformationValidLifetimeForUpdate
+			defer func() {
+				stack.MinPrefixInformationValidLifetimeForUpdate = saved
+			}()
+			stack.MinPrefixInformationValidLifetimeForUpdate = newMinVLDuration
 
-	expectAutoGenAddrEvent := func(addr tcpip.AddressWithPrefix, eventType ndpAutoGenAddrEventType) {
-		t.Helper()
+			prefix1, _, addr1 := prefixSubnetAddr(0, linkAddr1)
+			prefix2, _, addr2 := prefixSubnetAddr(1, linkAddr1)
 
-		select {
-		case e := <-ndpDisp.autoGenAddrC:
-			if diff := checkAutoGenAddrEvent(e, addr, eventType); diff != "" {
-				t.Errorf("auto-gen addr event mismatch (-want +got):\n%s", diff)
+			ndpDisp, e, s := stackAndNdpDispatcherWithDefaultRoute(t, nicID, stackTyp.useNeighborCache)
+
+			expectAutoGenAddrEvent := func(addr tcpip.AddressWithPrefix, eventType ndpAutoGenAddrEventType) {
+				t.Helper()
+
+				select {
+				case e := <-ndpDisp.autoGenAddrC:
+					if diff := checkAutoGenAddrEvent(e, addr, eventType); diff != "" {
+						t.Errorf("auto-gen addr event mismatch (-want +got):\n%s", diff)
+					}
+				default:
+					t.Fatal("expected addr auto gen event")
+				}
 			}
-		default:
-			t.Fatal("expected addr auto gen event")
-		}
-	}
 
-	expectAutoGenAddrEventAfter := func(addr tcpip.AddressWithPrefix, eventType ndpAutoGenAddrEventType, timeout time.Duration) {
-		t.Helper()
+			expectAutoGenAddrEventAfter := func(addr tcpip.AddressWithPrefix, eventType ndpAutoGenAddrEventType, timeout time.Duration) {
+				t.Helper()
 
-		select {
-		case e := <-ndpDisp.autoGenAddrC:
-			if diff := checkAutoGenAddrEvent(e, addr, eventType); diff != "" {
-				t.Errorf("auto-gen addr event mismatch (-want +got):\n%s", diff)
+				select {
+				case e := <-ndpDisp.autoGenAddrC:
+					if diff := checkAutoGenAddrEvent(e, addr, eventType); diff != "" {
+						t.Errorf("auto-gen addr event mismatch (-want +got):\n%s", diff)
+					}
+				case <-time.After(timeout):
+					t.Fatal("timed out waiting for addr auto gen event")
+				}
 			}
-		case <-time.After(timeout):
-			t.Fatal("timed out waiting for addr auto gen event")
-		}
-	}
 
-	expectPrimaryAddr := func(addr tcpip.AddressWithPrefix) {
-		t.Helper()
+			expectPrimaryAddr := func(addr tcpip.AddressWithPrefix) {
+				t.Helper()
 
-		if got, err := s.GetMainNICAddress(nicID, header.IPv6ProtocolNumber); err != nil {
-			t.Fatalf("s.GetMainNICAddress(%d, %d): %s", nicID, header.IPv6ProtocolNumber, err)
-		} else if got != addr {
-			t.Errorf("got s.GetMainNICAddress(%d, %d) = %s, want = %s", nicID, header.IPv6ProtocolNumber, got, addr)
-		}
+				if got, err := s.GetMainNICAddress(nicID, header.IPv6ProtocolNumber); err != nil {
+					t.Fatalf("s.GetMainNICAddress(%d, %d): %s", nicID, header.IPv6ProtocolNumber, err)
+				} else if got != addr {
+					t.Errorf("got s.GetMainNICAddress(%d, %d) = %s, want = %s", nicID, header.IPv6ProtocolNumber, got, addr)
+				}
 
-		if got := addrForNewConnection(t, s); got != addr.Address {
-			t.Errorf("got addrForNewConnection = %s, want = %s", got, addr.Address)
-		}
-	}
+				if got := addrForNewConnection(t, s); got != addr.Address {
+					t.Errorf("got addrForNewConnection = %s, want = %s", got, addr.Address)
+				}
+			}
 
-	// Receive PI for prefix2.
-	e.InjectInbound(header.IPv6ProtocolNumber, raBufWithPI(llAddr2, 0, prefix2, true, true, 100, 100))
-	expectAutoGenAddrEvent(addr2, newAddr)
-	if !containsV6Addr(s.NICInfo()[nicID].ProtocolAddresses, addr2) {
-		t.Fatalf("should have %s in the list of addresses", addr2)
-	}
-	expectPrimaryAddr(addr2)
+			// Receive PI for prefix2.
+			e.InjectInbound(header.IPv6ProtocolNumber, raBufWithPI(llAddr2, 0, prefix2, true, true, 100, 100))
+			expectAutoGenAddrEvent(addr2, newAddr)
+			if !containsV6Addr(s.NICInfo()[nicID].ProtocolAddresses, addr2) {
+				t.Fatalf("should have %s in the list of addresses", addr2)
+			}
+			expectPrimaryAddr(addr2)
 
-	// Receive a PI for prefix1.
-	e.InjectInbound(header.IPv6ProtocolNumber, raBufWithPI(llAddr2, 0, prefix1, true, true, 100, 90))
-	expectAutoGenAddrEvent(addr1, newAddr)
-	if !containsV6Addr(s.NICInfo()[nicID].ProtocolAddresses, addr1) {
-		t.Fatalf("should have %s in the list of addresses", addr1)
-	}
-	if !containsV6Addr(s.NICInfo()[nicID].ProtocolAddresses, addr2) {
-		t.Fatalf("should have %s in the list of addresses", addr2)
-	}
-	expectPrimaryAddr(addr1)
+			// Receive a PI for prefix1.
+			e.InjectInbound(header.IPv6ProtocolNumber, raBufWithPI(llAddr2, 0, prefix1, true, true, 100, 90))
+			expectAutoGenAddrEvent(addr1, newAddr)
+			if !containsV6Addr(s.NICInfo()[nicID].ProtocolAddresses, addr1) {
+				t.Fatalf("should have %s in the list of addresses", addr1)
+			}
+			if !containsV6Addr(s.NICInfo()[nicID].ProtocolAddresses, addr2) {
+				t.Fatalf("should have %s in the list of addresses", addr2)
+			}
+			expectPrimaryAddr(addr1)
 
-	// Refresh lifetime for addr of prefix1.
-	e.InjectInbound(header.IPv6ProtocolNumber, raBufWithPI(llAddr2, 0, prefix1, true, true, newMinVL, newMinVL-1))
-	select {
-	case <-ndpDisp.autoGenAddrC:
-		t.Fatal("unexpectedly got an auto-generated event")
-	default:
-	}
-	expectPrimaryAddr(addr1)
+			// Refresh lifetime for addr of prefix1.
+			e.InjectInbound(header.IPv6ProtocolNumber, raBufWithPI(llAddr2, 0, prefix1, true, true, newMinVL, newMinVL-1))
+			select {
+			case <-ndpDisp.autoGenAddrC:
+				t.Fatal("unexpectedly got an auto-generated event")
+			default:
+			}
+			expectPrimaryAddr(addr1)
 
-	// Wait for addr of prefix1 to be deprecated.
-	expectAutoGenAddrEventAfter(addr1, deprecatedAddr, newMinVLDuration-time.Second+defaultAsyncPositiveEventTimeout)
-	if !containsV6Addr(s.NICInfo()[nicID].ProtocolAddresses, addr1) {
-		t.Fatalf("should not have %s in the list of addresses", addr1)
-	}
-	if !containsV6Addr(s.NICInfo()[nicID].ProtocolAddresses, addr2) {
-		t.Fatalf("should have %s in the list of addresses", addr2)
-	}
-	// addr2 should be the primary endpoint now since addr1 is deprecated but
-	// addr2 is not.
-	expectPrimaryAddr(addr2)
-	// addr1 is deprecated but if explicitly requested, it should be used.
-	fullAddr1 := tcpip.FullAddress{Addr: addr1.Address, NIC: nicID}
-	if got := addrForNewConnectionWithAddr(t, s, fullAddr1); got != addr1.Address {
-		t.Errorf("got addrForNewConnectionWithAddr(_, _, %+v) = %s, want = %s", fullAddr1, got, addr1.Address)
-	}
+			// Wait for addr of prefix1 to be deprecated.
+			expectAutoGenAddrEventAfter(addr1, deprecatedAddr, newMinVLDuration-time.Second+defaultAsyncPositiveEventTimeout)
+			if !containsV6Addr(s.NICInfo()[nicID].ProtocolAddresses, addr1) {
+				t.Fatalf("should not have %s in the list of addresses", addr1)
+			}
+			if !containsV6Addr(s.NICInfo()[nicID].ProtocolAddresses, addr2) {
+				t.Fatalf("should have %s in the list of addresses", addr2)
+			}
+			// addr2 should be the primary endpoint now since addr1 is deprecated but
+			// addr2 is not.
+			expectPrimaryAddr(addr2)
+			// addr1 is deprecated but if explicitly requested, it should be used.
+			fullAddr1 := tcpip.FullAddress{Addr: addr1.Address, NIC: nicID}
+			if got := addrForNewConnectionWithAddr(t, s, fullAddr1); got != addr1.Address {
+				t.Errorf("got addrForNewConnectionWithAddr(_, _, %+v) = %s, want = %s", fullAddr1, got, addr1.Address)
+			}
 
-	// Refresh valid lifetime for addr of prefix1, w/ 0 preferred lifetime to make
-	// sure we do not get a deprecation event again.
-	e.InjectInbound(header.IPv6ProtocolNumber, raBufWithPI(llAddr2, 0, prefix1, true, true, newMinVL, 0))
-	select {
-	case <-ndpDisp.autoGenAddrC:
-		t.Fatal("unexpectedly got an auto-generated event")
-	default:
-	}
-	expectPrimaryAddr(addr2)
-	if got := addrForNewConnectionWithAddr(t, s, fullAddr1); got != addr1.Address {
-		t.Errorf("got addrForNewConnectionWithAddr(_, _, %+v) = %s, want = %s", fullAddr1, got, addr1.Address)
-	}
+			// Refresh valid lifetime for addr of prefix1, w/ 0 preferred lifetime to make
+			// sure we do not get a deprecation event again.
+			e.InjectInbound(header.IPv6ProtocolNumber, raBufWithPI(llAddr2, 0, prefix1, true, true, newMinVL, 0))
+			select {
+			case <-ndpDisp.autoGenAddrC:
+				t.Fatal("unexpectedly got an auto-generated event")
+			default:
+			}
+			expectPrimaryAddr(addr2)
+			if got := addrForNewConnectionWithAddr(t, s, fullAddr1); got != addr1.Address {
+				t.Errorf("got addrForNewConnectionWithAddr(_, _, %+v) = %s, want = %s", fullAddr1, got, addr1.Address)
+			}
 
-	// Refresh lifetimes for addr of prefix1.
-	e.InjectInbound(header.IPv6ProtocolNumber, raBufWithPI(llAddr2, 0, prefix1, true, true, newMinVL, newMinVL-1))
-	select {
-	case <-ndpDisp.autoGenAddrC:
-		t.Fatal("unexpectedly got an auto-generated event")
-	default:
-	}
-	// addr1 is the primary endpoint again since it is non-deprecated now.
-	expectPrimaryAddr(addr1)
+			// Refresh lifetimes for addr of prefix1.
+			e.InjectInbound(header.IPv6ProtocolNumber, raBufWithPI(llAddr2, 0, prefix1, true, true, newMinVL, newMinVL-1))
+			select {
+			case <-ndpDisp.autoGenAddrC:
+				t.Fatal("unexpectedly got an auto-generated event")
+			default:
+			}
+			// addr1 is the primary endpoint again since it is non-deprecated now.
+			expectPrimaryAddr(addr1)
 
-	// Wait for addr of prefix1 to be deprecated.
-	expectAutoGenAddrEventAfter(addr1, deprecatedAddr, newMinVLDuration-time.Second+defaultAsyncPositiveEventTimeout)
-	if !containsV6Addr(s.NICInfo()[nicID].ProtocolAddresses, addr1) {
-		t.Fatalf("should not have %s in the list of addresses", addr1)
-	}
-	if !containsV6Addr(s.NICInfo()[nicID].ProtocolAddresses, addr2) {
-		t.Fatalf("should have %s in the list of addresses", addr2)
-	}
-	// addr2 should be the primary endpoint now since it is not deprecated.
-	expectPrimaryAddr(addr2)
-	if got := addrForNewConnectionWithAddr(t, s, fullAddr1); got != addr1.Address {
-		t.Errorf("got addrForNewConnectionWithAddr(_, _, %+v) = %s, want = %s", fullAddr1, got, addr1.Address)
-	}
+			// Wait for addr of prefix1 to be deprecated.
+			expectAutoGenAddrEventAfter(addr1, deprecatedAddr, newMinVLDuration-time.Second+defaultAsyncPositiveEventTimeout)
+			if !containsV6Addr(s.NICInfo()[nicID].ProtocolAddresses, addr1) {
+				t.Fatalf("should not have %s in the list of addresses", addr1)
+			}
+			if !containsV6Addr(s.NICInfo()[nicID].ProtocolAddresses, addr2) {
+				t.Fatalf("should have %s in the list of addresses", addr2)
+			}
+			// addr2 should be the primary endpoint now since it is not deprecated.
+			expectPrimaryAddr(addr2)
+			if got := addrForNewConnectionWithAddr(t, s, fullAddr1); got != addr1.Address {
+				t.Errorf("got addrForNewConnectionWithAddr(_, _, %+v) = %s, want = %s", fullAddr1, got, addr1.Address)
+			}
 
-	// Wait for addr of prefix1 to be invalidated.
-	expectAutoGenAddrEventAfter(addr1, invalidatedAddr, time.Second+defaultAsyncPositiveEventTimeout)
-	if containsV6Addr(s.NICInfo()[nicID].ProtocolAddresses, addr1) {
-		t.Fatalf("should not have %s in the list of addresses", addr1)
-	}
-	if !containsV6Addr(s.NICInfo()[nicID].ProtocolAddresses, addr2) {
-		t.Fatalf("should have %s in the list of addresses", addr2)
-	}
-	expectPrimaryAddr(addr2)
+			// Wait for addr of prefix1 to be invalidated.
+			expectAutoGenAddrEventAfter(addr1, invalidatedAddr, time.Second+defaultAsyncPositiveEventTimeout)
+			if containsV6Addr(s.NICInfo()[nicID].ProtocolAddresses, addr1) {
+				t.Fatalf("should not have %s in the list of addresses", addr1)
+			}
+			if !containsV6Addr(s.NICInfo()[nicID].ProtocolAddresses, addr2) {
+				t.Fatalf("should have %s in the list of addresses", addr2)
+			}
+			expectPrimaryAddr(addr2)
 
-	// Refresh both lifetimes for addr of prefix2 to the same value.
-	e.InjectInbound(header.IPv6ProtocolNumber, raBufWithPI(llAddr2, 0, prefix2, true, true, newMinVL, newMinVL))
-	select {
-	case <-ndpDisp.autoGenAddrC:
-		t.Fatal("unexpectedly got an auto-generated event")
-	default:
-	}
+			// Refresh both lifetimes for addr of prefix2 to the same value.
+			e.InjectInbound(header.IPv6ProtocolNumber, raBufWithPI(llAddr2, 0, prefix2, true, true, newMinVL, newMinVL))
+			select {
+			case <-ndpDisp.autoGenAddrC:
+				t.Fatal("unexpectedly got an auto-generated event")
+			default:
+			}
 
-	// Wait for a deprecation then invalidation events, or just an invalidation
-	// event. We need to cover both cases but cannot deterministically hit both
-	// cases because the deprecation and invalidation handlers could be handled in
-	// either deprecation then invalidation, or invalidation then deprecation
-	// (which should be cancelled by the invalidation handler).
-	select {
-	case e := <-ndpDisp.autoGenAddrC:
-		if diff := checkAutoGenAddrEvent(e, addr2, deprecatedAddr); diff == "" {
-			// If we get a deprecation event first, we should get an invalidation
-			// event almost immediately after.
+			// Wait for a deprecation then invalidation events, or just an invalidation
+			// event. We need to cover both cases but cannot deterministically hit both
+			// cases because the deprecation and invalidation handlers could be handled in
+			// either deprecation then invalidation, or invalidation then deprecation
+			// (which should be cancelled by the invalidation handler).
 			select {
 			case e := <-ndpDisp.autoGenAddrC:
-				if diff := checkAutoGenAddrEvent(e, addr2, invalidatedAddr); diff != "" {
-					t.Errorf("auto-gen addr event mismatch (-want +got):\n%s", diff)
+				if diff := checkAutoGenAddrEvent(e, addr2, deprecatedAddr); diff == "" {
+					// If we get a deprecation event first, we should get an invalidation
+					// event almost immediately after.
+					select {
+					case e := <-ndpDisp.autoGenAddrC:
+						if diff := checkAutoGenAddrEvent(e, addr2, invalidatedAddr); diff != "" {
+							t.Errorf("auto-gen addr event mismatch (-want +got):\n%s", diff)
+						}
+					case <-time.After(defaultAsyncPositiveEventTimeout):
+						t.Fatal("timed out waiting for addr auto gen event")
+					}
+				} else if diff := checkAutoGenAddrEvent(e, addr2, invalidatedAddr); diff == "" {
+					// If we get an invalidation  event first, we should not get a deprecation
+					// event after.
+					select {
+					case <-ndpDisp.autoGenAddrC:
+						t.Fatal("unexpectedly got an auto-generated event")
+					case <-time.After(defaultAsyncNegativeEventTimeout):
+					}
+				} else {
+					t.Fatalf("got unexpected auto-generated event")
 				}
-			case <-time.After(defaultAsyncPositiveEventTimeout):
+			case <-time.After(newMinVLDuration + defaultAsyncPositiveEventTimeout):
 				t.Fatal("timed out waiting for addr auto gen event")
 			}
-		} else if diff := checkAutoGenAddrEvent(e, addr2, invalidatedAddr); diff == "" {
-			// If we get an invalidation  event first, we should not get a deprecation
-			// event after.
-			select {
-			case <-ndpDisp.autoGenAddrC:
-				t.Fatal("unexpectedly got an auto-generated event")
-			case <-time.After(defaultAsyncNegativeEventTimeout):
+			if containsV6Addr(s.NICInfo()[nicID].ProtocolAddresses, addr1) {
+				t.Fatalf("should not have %s in the list of addresses", addr1)
+			}
+			if containsV6Addr(s.NICInfo()[nicID].ProtocolAddresses, addr2) {
+				t.Fatalf("should not have %s in the list of addresses", addr2)
+			}
+			// Should not have any primary endpoints.
+			if got, err := s.GetMainNICAddress(nicID, header.IPv6ProtocolNumber); err != nil {
+				t.Fatalf("s.GetMainNICAddress(%d, %d): %s", nicID, header.IPv6ProtocolNumber, err)
+			} else if want := (tcpip.AddressWithPrefix{}); got != want {
+				t.Errorf("got s.GetMainNICAddress(%d, %d) = %s, want = %s", nicID, header.IPv6ProtocolNumber, got, want)
+			}
+			wq := waiter.Queue{}
+			we, ch := waiter.NewChannelEntry(nil)
+			wq.EventRegister(&we, waiter.EventIn)
+			defer wq.EventUnregister(&we)
+			defer close(ch)
+			ep, err := s.NewEndpoint(header.UDPProtocolNumber, header.IPv6ProtocolNumber, &wq)
+			if err != nil {
+				t.Fatalf("s.NewEndpoint(%d, %d, _): %s", header.UDPProtocolNumber, header.IPv6ProtocolNumber, err)
+			}
+			defer ep.Close()
+			if err := ep.SetSockOptBool(tcpip.V6OnlyOption, true); err != nil {
+				t.Fatalf("SetSockOpt(tcpip.V6OnlyOption, true): %s", err)
 			}
-		} else {
-			t.Fatalf("got unexpected auto-generated event")
-		}
-	case <-time.After(newMinVLDuration + defaultAsyncPositiveEventTimeout):
-		t.Fatal("timed out waiting for addr auto gen event")
-	}
-	if containsV6Addr(s.NICInfo()[nicID].ProtocolAddresses, addr1) {
-		t.Fatalf("should not have %s in the list of addresses", addr1)
-	}
-	if containsV6Addr(s.NICInfo()[nicID].ProtocolAddresses, addr2) {
-		t.Fatalf("should not have %s in the list of addresses", addr2)
-	}
-	// Should not have any primary endpoints.
-	if got, err := s.GetMainNICAddress(nicID, header.IPv6ProtocolNumber); err != nil {
-		t.Fatalf("s.GetMainNICAddress(%d, %d): %s", nicID, header.IPv6ProtocolNumber, err)
-	} else if want := (tcpip.AddressWithPrefix{}); got != want {
-		t.Errorf("got s.GetMainNICAddress(%d, %d) = %s, want = %s", nicID, header.IPv6ProtocolNumber, got, want)
-	}
-	wq := waiter.Queue{}
-	we, ch := waiter.NewChannelEntry(nil)
-	wq.EventRegister(&we, waiter.EventIn)
-	defer wq.EventUnregister(&we)
-	defer close(ch)
-	ep, err := s.NewEndpoint(header.UDPProtocolNumber, header.IPv6ProtocolNumber, &wq)
-	if err != nil {
-		t.Fatalf("s.NewEndpoint(%d, %d, _): %s", header.UDPProtocolNumber, header.IPv6ProtocolNumber, err)
-	}
-	defer ep.Close()
-	if err := ep.SetSockOptBool(tcpip.V6OnlyOption, true); err != nil {
-		t.Fatalf("SetSockOpt(tcpip.V6OnlyOption, true): %s", err)
-	}
 
-	if err := ep.Connect(dstAddr); err != tcpip.ErrNoRoute {
-		t.Errorf("got ep.Connect(%+v) = %v, want = %s", dstAddr, err, tcpip.ErrNoRoute)
+			if err := ep.Connect(dstAddr); err != tcpip.ErrNoRoute {
+				t.Errorf("got ep.Connect(%+v) = %s, want = %s", dstAddr, err, tcpip.ErrNoRoute)
+			}
+		})
 	}
 }
 
@@ -3524,110 +3566,128 @@ func TestAutoGenAddrRemoval(t *testing.T) {
 func TestAutoGenAddrAfterRemoval(t *testing.T) {
 	const nicID = 1
 
-	prefix1, _, addr1 := prefixSubnetAddr(0, linkAddr1)
-	prefix2, _, addr2 := prefixSubnetAddr(1, linkAddr1)
-	ndpDisp, e, s := stackAndNdpDispatcherWithDefaultRoute(t, nicID)
-
-	expectAutoGenAddrEvent := func(addr tcpip.AddressWithPrefix, eventType ndpAutoGenAddrEventType) {
-		t.Helper()
-
-		select {
-		case e := <-ndpDisp.autoGenAddrC:
-			if diff := checkAutoGenAddrEvent(e, addr, eventType); diff != "" {
-				t.Errorf("auto-gen addr event mismatch (-want +got):\n%s", diff)
-			}
-		default:
-			t.Fatal("expected addr auto gen event")
-		}
+	stacks := []struct {
+		name             string
+		useNeighborCache bool
+	}{
+		{
+			name:             "linkAddrCache",
+			useNeighborCache: false,
+		},
+		{
+			name:             "neighborCache",
+			useNeighborCache: true,
+		},
 	}
 
-	expectPrimaryAddr := func(addr tcpip.AddressWithPrefix) {
-		t.Helper()
+	for _, stackTyp := range stacks {
+		t.Run(stackTyp.name, func(t *testing.T) {
+			prefix1, _, addr1 := prefixSubnetAddr(0, linkAddr1)
+			prefix2, _, addr2 := prefixSubnetAddr(1, linkAddr1)
+			ndpDisp, e, s := stackAndNdpDispatcherWithDefaultRoute(t, nicID, stackTyp.useNeighborCache)
 
-		if got, err := s.GetMainNICAddress(nicID, header.IPv6ProtocolNumber); err != nil {
-			t.Fatalf("s.GetMainNICAddress(%d, %d): %s", nicID, header.IPv6ProtocolNumber, err)
-		} else if got != addr {
-			t.Errorf("got s.GetMainNICAddress(%d, %d) = %s, want = %s", nicID, header.IPv6ProtocolNumber, got, addr)
-		}
-
-		if got := addrForNewConnection(t, s); got != addr.Address {
-			t.Errorf("got addrForNewConnection = %s, want = %s", got, addr.Address)
-		}
-	}
+			expectAutoGenAddrEvent := func(addr tcpip.AddressWithPrefix, eventType ndpAutoGenAddrEventType) {
+				t.Helper()
 
-	// Receive a PI to auto-generate addr1 with a large valid and preferred
-	// lifetime.
-	const largeLifetimeSeconds = 999
-	e.InjectInbound(header.IPv6ProtocolNumber, raBufWithPI(llAddr3, 0, prefix1, true, true, largeLifetimeSeconds, largeLifetimeSeconds))
-	expectAutoGenAddrEvent(addr1, newAddr)
-	expectPrimaryAddr(addr1)
+				select {
+				case e := <-ndpDisp.autoGenAddrC:
+					if diff := checkAutoGenAddrEvent(e, addr, eventType); diff != "" {
+						t.Errorf("auto-gen addr event mismatch (-want +got):\n%s", diff)
+					}
+				default:
+					t.Fatal("expected addr auto gen event")
+				}
+			}
 
-	// Add addr2 as a static address.
-	protoAddr2 := tcpip.ProtocolAddress{
-		Protocol:          header.IPv6ProtocolNumber,
-		AddressWithPrefix: addr2,
-	}
-	if err := s.AddProtocolAddressWithOptions(nicID, protoAddr2, stack.FirstPrimaryEndpoint); err != nil {
-		t.Fatalf("AddProtocolAddressWithOptions(%d, %+v, %d) = %s", nicID, protoAddr2, stack.FirstPrimaryEndpoint, err)
-	}
-	// addr2 should be more preferred now since it is at the front of the primary
-	// list.
-	expectPrimaryAddr(addr2)
+			expectPrimaryAddr := func(addr tcpip.AddressWithPrefix) {
+				t.Helper()
 
-	// Get a route using addr2 to increment its reference count then remove it
-	// to leave it in the permanentExpired state.
-	r, err := s.FindRoute(nicID, addr2.Address, addr3, header.IPv6ProtocolNumber, false)
-	if err != nil {
-		t.Fatalf("FindRoute(%d, %s, %s, %d, false): %s", nicID, addr2.Address, addr3, header.IPv6ProtocolNumber, err)
-	}
-	defer r.Release()
-	if err := s.RemoveAddress(nicID, addr2.Address); err != nil {
-		t.Fatalf("s.RemoveAddress(%d, %s): %s", nicID, addr2.Address, err)
-	}
-	// addr1 should be preferred again since addr2 is in the expired state.
-	expectPrimaryAddr(addr1)
+				if got, err := s.GetMainNICAddress(nicID, header.IPv6ProtocolNumber); err != nil {
+					t.Fatalf("s.GetMainNICAddress(%d, %d): %s", nicID, header.IPv6ProtocolNumber, err)
+				} else if got != addr {
+					t.Errorf("got s.GetMainNICAddress(%d, %d) = %s, want = %s", nicID, header.IPv6ProtocolNumber, got, addr)
+				}
 
-	// Receive a PI to auto-generate addr2 as valid and preferred.
-	e.InjectInbound(header.IPv6ProtocolNumber, raBufWithPI(llAddr3, 0, prefix2, true, true, largeLifetimeSeconds, largeLifetimeSeconds))
-	expectAutoGenAddrEvent(addr2, newAddr)
-	// addr2 should be more preferred now that it is closer to the front of the
-	// primary list and not deprecated.
-	expectPrimaryAddr(addr2)
+				if got := addrForNewConnection(t, s); got != addr.Address {
+					t.Errorf("got addrForNewConnection = %s, want = %s", got, addr.Address)
+				}
+			}
 
-	// Removing the address should result in an invalidation event immediately.
-	// It should still be in the permanentExpired state because r is still held.
-	//
-	// We remove addr2 here to make sure addr2 was marked as a SLAAC address
-	// (it was previously marked as a static address).
-	if err := s.RemoveAddress(1, addr2.Address); err != nil {
-		t.Fatalf("RemoveAddress(_, %s) = %s", addr2.Address, err)
-	}
-	expectAutoGenAddrEvent(addr2, invalidatedAddr)
-	// addr1 should be more preferred since addr2 is in the expired state.
-	expectPrimaryAddr(addr1)
+			// Receive a PI to auto-generate addr1 with a large valid and preferred
+			// lifetime.
+			const largeLifetimeSeconds = 999
+			e.InjectInbound(header.IPv6ProtocolNumber, raBufWithPI(llAddr3, 0, prefix1, true, true, largeLifetimeSeconds, largeLifetimeSeconds))
+			expectAutoGenAddrEvent(addr1, newAddr)
+			expectPrimaryAddr(addr1)
 
-	// Receive a PI to auto-generate addr2 as valid and deprecated.
-	e.InjectInbound(header.IPv6ProtocolNumber, raBufWithPI(llAddr3, 0, prefix2, true, true, largeLifetimeSeconds, 0))
-	expectAutoGenAddrEvent(addr2, newAddr)
-	// addr1 should still be more preferred since addr2 is deprecated, even though
-	// it is closer to the front of the primary list.
-	expectPrimaryAddr(addr1)
+			// Add addr2 as a static address.
+			protoAddr2 := tcpip.ProtocolAddress{
+				Protocol:          header.IPv6ProtocolNumber,
+				AddressWithPrefix: addr2,
+			}
+			if err := s.AddProtocolAddressWithOptions(nicID, protoAddr2, stack.FirstPrimaryEndpoint); err != nil {
+				t.Fatalf("AddProtocolAddressWithOptions(%d, %+v, %d) = %s", nicID, protoAddr2, stack.FirstPrimaryEndpoint, err)
+			}
+			// addr2 should be more preferred now since it is at the front of the primary
+			// list.
+			expectPrimaryAddr(addr2)
 
-	// Receive a PI to refresh addr2's preferred lifetime.
-	e.InjectInbound(header.IPv6ProtocolNumber, raBufWithPI(llAddr3, 0, prefix2, true, true, largeLifetimeSeconds, largeLifetimeSeconds))
-	select {
-	case <-ndpDisp.autoGenAddrC:
-		t.Fatal("unexpectedly got an auto gen addr event")
-	default:
-	}
-	// addr2 should be more preferred now that it is not deprecated.
-	expectPrimaryAddr(addr2)
+			// Get a route using addr2 to increment its reference count then remove it
+			// to leave it in the permanentExpired state.
+			r, err := s.FindRoute(nicID, addr2.Address, addr3, header.IPv6ProtocolNumber, false)
+			if err != nil {
+				t.Fatalf("FindRoute(%d, %s, %s, %d, false): %s", nicID, addr2.Address, addr3, header.IPv6ProtocolNumber, err)
+			}
+			defer r.Release()
+			if err := s.RemoveAddress(nicID, addr2.Address); err != nil {
+				t.Fatalf("s.RemoveAddress(%d, %s): %s", nicID, addr2.Address, err)
+			}
+			// addr1 should be preferred again since addr2 is in the expired state.
+			expectPrimaryAddr(addr1)
+
+			// Receive a PI to auto-generate addr2 as valid and preferred.
+			e.InjectInbound(header.IPv6ProtocolNumber, raBufWithPI(llAddr3, 0, prefix2, true, true, largeLifetimeSeconds, largeLifetimeSeconds))
+			expectAutoGenAddrEvent(addr2, newAddr)
+			// addr2 should be more preferred now that it is closer to the front of the
+			// primary list and not deprecated.
+			expectPrimaryAddr(addr2)
+
+			// Removing the address should result in an invalidation event immediately.
+			// It should still be in the permanentExpired state because r is still held.
+			//
+			// We remove addr2 here to make sure addr2 was marked as a SLAAC address
+			// (it was previously marked as a static address).
+			if err := s.RemoveAddress(1, addr2.Address); err != nil {
+				t.Fatalf("RemoveAddress(_, %s) = %s", addr2.Address, err)
+			}
+			expectAutoGenAddrEvent(addr2, invalidatedAddr)
+			// addr1 should be more preferred since addr2 is in the expired state.
+			expectPrimaryAddr(addr1)
+
+			// Receive a PI to auto-generate addr2 as valid and deprecated.
+			e.InjectInbound(header.IPv6ProtocolNumber, raBufWithPI(llAddr3, 0, prefix2, true, true, largeLifetimeSeconds, 0))
+			expectAutoGenAddrEvent(addr2, newAddr)
+			// addr1 should still be more preferred since addr2 is deprecated, even though
+			// it is closer to the front of the primary list.
+			expectPrimaryAddr(addr1)
+
+			// Receive a PI to refresh addr2's preferred lifetime.
+			e.InjectInbound(header.IPv6ProtocolNumber, raBufWithPI(llAddr3, 0, prefix2, true, true, largeLifetimeSeconds, largeLifetimeSeconds))
+			select {
+			case <-ndpDisp.autoGenAddrC:
+				t.Fatal("unexpectedly got an auto gen addr event")
+			default:
+			}
+			// addr2 should be more preferred now that it is not deprecated.
+			expectPrimaryAddr(addr2)
 
-	if err := s.RemoveAddress(1, addr2.Address); err != nil {
-		t.Fatalf("RemoveAddress(_, %s) = %s", addr2.Address, err)
+			if err := s.RemoveAddress(1, addr2.Address); err != nil {
+				t.Fatalf("RemoveAddress(_, %s) = %s", addr2.Address, err)
+			}
+			expectAutoGenAddrEvent(addr2, invalidatedAddr)
+			expectPrimaryAddr(addr1)
+		})
 	}
-	expectAutoGenAddrEvent(addr2, invalidatedAddr)
-	expectPrimaryAddr(addr1)
 }
 
 // TestAutoGenAddrStaticConflict tests that if SLAAC generates an address that
diff --git a/pkg/tcpip/stack/nic.go b/pkg/tcpip/stack/nic.go
index aff29f9cca..0c811efdb0 100644
--- a/pkg/tcpip/stack/nic.go
+++ b/pkg/tcpip/stack/nic.go
@@ -21,6 +21,7 @@ import (
 	"sort"
 	"sync/atomic"
 
+	"gvisor.dev/gvisor/pkg/sleep"
 	"gvisor.dev/gvisor/pkg/sync"
 	"gvisor.dev/gvisor/pkg/tcpip"
 	"gvisor.dev/gvisor/pkg/tcpip/buffer"
@@ -135,18 +136,8 @@ func newNIC(stack *Stack, id tcpip.NICID, name string, ep LinkEndpoint, ctx NICC
 	}
 	nic.mu.ndp.initializeTempAddrState()
 
-	// Register supported packet endpoint protocols.
-	for _, netProto := range header.Ethertypes {
-		nic.mu.packetEPs[netProto] = []PacketEndpoint{}
-	}
-	for _, netProto := range stack.networkProtocols {
-		netNum := netProto.Number()
-		nic.mu.packetEPs[netNum] = nil
-		nic.networkEndpoints[netNum] = netProto.NewEndpoint(id, stack, nic, ep, stack)
-	}
-
 	// Check for Neighbor Unreachability Detection support.
-	if ep.Capabilities()&CapabilityResolutionRequired != 0 && len(stack.linkAddrResolvers) != 0 {
+	if ep.Capabilities()&CapabilityResolutionRequired != 0 && len(stack.linkAddrResolvers) != 0 && stack.useNeighborCache {
 		rng := rand.New(rand.NewSource(stack.clock.NowNanoseconds()))
 		nic.neigh = &neighborCache{
 			nic:   nic,
@@ -155,6 +146,16 @@ func newNIC(stack *Stack, id tcpip.NICID, name string, ep LinkEndpoint, ctx NICC
 		}
 	}
 
+	// Register supported packet endpoint protocols.
+	for _, netProto := range header.Ethertypes {
+		nic.mu.packetEPs[netProto] = []PacketEndpoint{}
+	}
+	for _, netProto := range stack.networkProtocols {
+		netNum := netProto.Number()
+		nic.mu.packetEPs[netNum] = nil
+		nic.networkEndpoints[netNum] = netProto.NewEndpoint(id, stack, nic.neigh, nic, ep, stack)
+	}
+
 	nic.linkEP.Attach(nic)
 
 	return nic
@@ -431,7 +432,7 @@ func (n *NIC) setSpoofing(enable bool) {
 // If an IPv6 primary endpoint is requested, Source Address Selection (as
 // defined by RFC 6724 section 5) will be performed.
 func (n *NIC) primaryEndpoint(protocol tcpip.NetworkProtocolNumber, remoteAddr tcpip.Address) *referencedNetworkEndpoint {
-	if protocol == header.IPv6ProtocolNumber && remoteAddr != "" {
+	if protocol == header.IPv6ProtocolNumber && len(remoteAddr) != 0 {
 		return n.primaryIPv6Endpoint(remoteAddr)
 	}
 
@@ -818,11 +819,24 @@ func (n *NIC) addAddressLocked(protocolAddress tcpip.ProtocolAddress, peb Primar
 		}
 	}
 
-	ep, ok := n.networkEndpoints[protocolAddress.Protocol]
+	netProto, ok := n.stack.networkProtocols[protocolAddress.Protocol]
 	if !ok {
 		return nil, tcpip.ErrUnknownProtocol
 	}
 
+	var nud NUDHandler
+	if n.neigh != nil {
+		// An interface value that holds a nil concrete value is itself non-nil.
+		// For this reason, n.neigh cannot be passed directly to NewEndpoint so
+		// NetworkEndpoints don't confuse it for non-nil.
+		//
+		// See https://golang.org/doc/faq#nil_error for more information.
+		nud = n.neigh
+	}
+
+	// Create the new network endpoint.
+	ep := netProto.NewEndpoint(n.id, n.stack, nud, n, n.linkEP, n.stack)
+
 	isIPv6Unicast := protocolAddress.Protocol == header.IPv6ProtocolNumber && header.IsV6UnicastAddress(protocolAddress.AddressWithPrefix.Address)
 
 	// If the address is an IPv6 address and it is a permanent address,
@@ -844,10 +858,11 @@ func (n *NIC) addAddressLocked(protocolAddress tcpip.ProtocolAddress, peb Primar
 		deprecated: deprecated,
 	}
 
-	// Set up cache if link address resolution exists for this protocol.
+	// Set up resolver if link address resolution exists for this protocol.
 	if n.linkEP.Capabilities()&CapabilityResolutionRequired != 0 {
-		if _, ok := n.stack.linkAddrResolvers[protocolAddress.Protocol]; ok {
+		if linkRes, ok := n.stack.linkAddrResolvers[protocolAddress.Protocol]; ok {
 			ref.linkCache = n.stack
+			ref.linkRes = linkRes
 		}
 	}
 
@@ -1082,6 +1097,51 @@ func (n *NIC) RemoveAddress(addr tcpip.Address) *tcpip.Error {
 	return n.removePermanentAddressLocked(addr)
 }
 
+func (n *NIC) neighbors() ([]NeighborEntry, *tcpip.Error) {
+	if n.neigh == nil {
+		return nil, tcpip.ErrNotSupported
+	}
+
+	return n.neigh.entries(), nil
+}
+
+func (n *NIC) removeWaker(addr tcpip.Address, w *sleep.Waker) {
+	if n.neigh == nil {
+		return
+	}
+
+	n.neigh.removeWaker(addr, w)
+}
+
+func (n *NIC) addStaticNeighbor(addr tcpip.Address, linkAddress tcpip.LinkAddress) *tcpip.Error {
+	if n.neigh == nil {
+		return tcpip.ErrNotSupported
+	}
+
+	n.neigh.addStaticEntry(addr, linkAddress)
+	return nil
+}
+
+func (n *NIC) removeNeighbor(addr tcpip.Address) *tcpip.Error {
+	if n.neigh == nil {
+		return tcpip.ErrNotSupported
+	}
+
+	if !n.neigh.removeEntry(addr) {
+		return tcpip.ErrBadAddress
+	}
+	return nil
+}
+
+func (n *NIC) clearNeighbors() *tcpip.Error {
+	if n.neigh == nil {
+		return tcpip.ErrNotSupported
+	}
+
+	n.neigh.clear()
+	return nil
+}
+
 // joinGroup adds a new endpoint for the given multicast address, if none
 // exists yet. Otherwise it just increments its count.
 func (n *NIC) joinGroup(protocol tcpip.NetworkProtocolNumber, addr tcpip.Address) *tcpip.Error {
@@ -1662,6 +1722,10 @@ type referencedNetworkEndpoint struct {
 	// protocol. Set to nil otherwise.
 	linkCache LinkAddressCache
 
+	// linkRes is set if link address resolution is enabled for this protocol.
+	// Set to nil otherwise.
+	linkRes LinkAddressResolver
+
 	// refs is counting references held for this endpoint. When refs hits zero it
 	// triggers the automatic removal of the endpoint from the NIC.
 	refs int32
diff --git a/pkg/tcpip/stack/nic_test.go b/pkg/tcpip/stack/nic_test.go
index d312a79eb2..1e065b5c1f 100644
--- a/pkg/tcpip/stack/nic_test.go
+++ b/pkg/tcpip/stack/nic_test.go
@@ -192,7 +192,7 @@ func (*testIPv6Protocol) ParseAddresses(v buffer.View) (src, dst tcpip.Address)
 }
 
 // NewEndpoint implements NetworkProtocol.NewEndpoint.
-func (p *testIPv6Protocol) NewEndpoint(nicID tcpip.NICID, _ LinkAddressCache, _ TransportDispatcher, linkEP LinkEndpoint, _ *Stack) NetworkEndpoint {
+func (p *testIPv6Protocol) NewEndpoint(nicID tcpip.NICID, _ LinkAddressCache, _ NUDHandler, _ TransportDispatcher, linkEP LinkEndpoint, _ *Stack) NetworkEndpoint {
 	return &testIPv6Endpoint{
 		nicID:    nicID,
 		linkEP:   linkEP,
diff --git a/pkg/tcpip/stack/nud_test.go b/pkg/tcpip/stack/nud_test.go
index 2494ee610c..2b97e5972f 100644
--- a/pkg/tcpip/stack/nud_test.go
+++ b/pkg/tcpip/stack/nud_test.go
@@ -61,6 +61,7 @@ func TestSetNUDConfigurationFailsForBadNICID(t *testing.T) {
 		// stack will only allocate neighbor caches if a protocol providing link
 		// address resolution is specified (e.g. ARP or IPv6).
 		NetworkProtocols: []stack.NetworkProtocol{ipv6.NewProtocol()},
+		UseNeighborCache: true,
 	})
 
 	// No NIC with ID 1 yet.
@@ -84,7 +85,8 @@ func TestNUDConfigurationFailsForNotSupported(t *testing.T) {
 	e.LinkEPCapabilities |= stack.CapabilityResolutionRequired
 
 	s := stack.New(stack.Options{
-		NUDConfigs: stack.DefaultNUDConfigurations(),
+		NUDConfigs:       stack.DefaultNUDConfigurations(),
+		UseNeighborCache: true,
 	})
 	if err := s.CreateNIC(nicID, e); err != nil {
 		t.Fatalf("CreateNIC(%d, _) = %s", nicID, err)
@@ -108,7 +110,8 @@ func TestSetNUDConfigurationFailsForNotSupported(t *testing.T) {
 	e.LinkEPCapabilities |= stack.CapabilityResolutionRequired
 
 	s := stack.New(stack.Options{
-		NUDConfigs: stack.DefaultNUDConfigurations(),
+		NUDConfigs:       stack.DefaultNUDConfigurations(),
+		UseNeighborCache: true,
 	})
 	if err := s.CreateNIC(nicID, e); err != nil {
 		t.Fatalf("CreateNIC(%d, _) = %s", nicID, err)
@@ -136,6 +139,7 @@ func TestDefaultNUDConfigurations(t *testing.T) {
 		// address resolution is specified (e.g. ARP or IPv6).
 		NetworkProtocols: []stack.NetworkProtocol{ipv6.NewProtocol()},
 		NUDConfigs:       stack.DefaultNUDConfigurations(),
+		UseNeighborCache: true,
 	})
 	if err := s.CreateNIC(nicID, e); err != nil {
 		t.Fatalf("CreateNIC(%d, _) = %s", nicID, err)
@@ -190,6 +194,7 @@ func TestNUDConfigurationsBaseReachableTime(t *testing.T) {
 				// providing link address resolution is specified (e.g. ARP or IPv6).
 				NetworkProtocols: []stack.NetworkProtocol{ipv6.NewProtocol()},
 				NUDConfigs:       c,
+				UseNeighborCache: true,
 			})
 			if err := s.CreateNIC(nicID, e); err != nil {
 				t.Fatalf("CreateNIC(%d, _) = %s", nicID, err)
@@ -246,6 +251,7 @@ func TestNUDConfigurationsMinRandomFactor(t *testing.T) {
 				// providing link address resolution is specified (e.g. ARP or IPv6).
 				NetworkProtocols: []stack.NetworkProtocol{ipv6.NewProtocol()},
 				NUDConfigs:       c,
+				UseNeighborCache: true,
 			})
 			if err := s.CreateNIC(nicID, e); err != nil {
 				t.Fatalf("CreateNIC(%d, _) = %s", nicID, err)
@@ -325,6 +331,7 @@ func TestNUDConfigurationsMaxRandomFactor(t *testing.T) {
 				// providing link address resolution is specified (e.g. ARP or IPv6).
 				NetworkProtocols: []stack.NetworkProtocol{ipv6.NewProtocol()},
 				NUDConfigs:       c,
+				UseNeighborCache: true,
 			})
 			if err := s.CreateNIC(nicID, e); err != nil {
 				t.Fatalf("CreateNIC(%d, _) = %s", nicID, err)
@@ -386,6 +393,7 @@ func TestNUDConfigurationsRetransmitTimer(t *testing.T) {
 				// providing link address resolution is specified (e.g. ARP or IPv6).
 				NetworkProtocols: []stack.NetworkProtocol{ipv6.NewProtocol()},
 				NUDConfigs:       c,
+				UseNeighborCache: true,
 			})
 			if err := s.CreateNIC(nicID, e); err != nil {
 				t.Fatalf("CreateNIC(%d, _) = %s", nicID, err)
@@ -437,6 +445,7 @@ func TestNUDConfigurationsDelayFirstProbeTime(t *testing.T) {
 				// providing link address resolution is specified (e.g. ARP or IPv6).
 				NetworkProtocols: []stack.NetworkProtocol{ipv6.NewProtocol()},
 				NUDConfigs:       c,
+				UseNeighborCache: true,
 			})
 			if err := s.CreateNIC(nicID, e); err != nil {
 				t.Fatalf("CreateNIC(%d, _) = %s", nicID, err)
@@ -488,6 +497,7 @@ func TestNUDConfigurationsMaxMulticastProbes(t *testing.T) {
 				// providing link address resolution is specified (e.g. ARP or IPv6).
 				NetworkProtocols: []stack.NetworkProtocol{ipv6.NewProtocol()},
 				NUDConfigs:       c,
+				UseNeighborCache: true,
 			})
 			if err := s.CreateNIC(nicID, e); err != nil {
 				t.Fatalf("CreateNIC(%d, _) = %s", nicID, err)
@@ -539,6 +549,7 @@ func TestNUDConfigurationsMaxUnicastProbes(t *testing.T) {
 				// providing link address resolution is specified (e.g. ARP or IPv6).
 				NetworkProtocols: []stack.NetworkProtocol{ipv6.NewProtocol()},
 				NUDConfigs:       c,
+				UseNeighborCache: true,
 			})
 			if err := s.CreateNIC(nicID, e); err != nil {
 				t.Fatalf("CreateNIC(%d, _) = %s", nicID, err)
@@ -590,6 +601,7 @@ func TestNUDConfigurationsUnreachableTime(t *testing.T) {
 				// providing link address resolution is specified (e.g. ARP or IPv6).
 				NetworkProtocols: []stack.NetworkProtocol{ipv6.NewProtocol()},
 				NUDConfigs:       c,
+				UseNeighborCache: true,
 			})
 			if err := s.CreateNIC(nicID, e); err != nil {
 				t.Fatalf("CreateNIC(%d, _) = %s", nicID, err)
diff --git a/pkg/tcpip/stack/registration.go b/pkg/tcpip/stack/registration.go
index aca2f77f89..21ac38583d 100644
--- a/pkg/tcpip/stack/registration.go
+++ b/pkg/tcpip/stack/registration.go
@@ -298,7 +298,7 @@ type NetworkProtocol interface {
 	ParseAddresses(v buffer.View) (src, dst tcpip.Address)
 
 	// NewEndpoint creates a new endpoint of this protocol.
-	NewEndpoint(nicID tcpip.NICID, linkAddrCache LinkAddressCache, dispatcher TransportDispatcher, sender LinkEndpoint, st *Stack) NetworkEndpoint
+	NewEndpoint(nicID tcpip.NICID, linkAddrCache LinkAddressCache, nud NUDHandler, dispatcher TransportDispatcher, sender LinkEndpoint, st *Stack) NetworkEndpoint
 
 	// SetOption allows enabling/disabling protocol specific features.
 	// SetOption returns an error if the option is not supported or the
@@ -488,7 +488,7 @@ type LinkAddressResolver interface {
 	ResolveStaticAddress(addr tcpip.Address) (tcpip.LinkAddress, bool)
 
 	// LinkAddressProtocol returns the network protocol of the
-	// addresses this this resolver can resolve.
+	// addresses this resolver can resolve.
 	LinkAddressProtocol() tcpip.NetworkProtocolNumber
 }
 
diff --git a/pkg/tcpip/stack/route.go b/pkg/tcpip/stack/route.go
index e267bebb07..c2eabde9e4 100644
--- a/pkg/tcpip/stack/route.go
+++ b/pkg/tcpip/stack/route.go
@@ -141,6 +141,16 @@ func (r *Route) Resolve(waker *sleep.Waker) (<-chan struct{}, *tcpip.Error) {
 		}
 		nextAddr = r.RemoteAddress
 	}
+
+	if r.ref.nic.neigh != nil {
+		entry, ch, err := r.ref.nic.neigh.entry(nextAddr, r.LocalAddress, r.ref.linkRes, waker)
+		if err != nil {
+			return ch, err
+		}
+		r.RemoteLinkAddress = entry.LinkAddr
+		return nil, nil
+	}
+
 	linkAddr, ch, err := r.ref.linkCache.GetLinkAddress(r.ref.nic.ID(), nextAddr, r.LocalAddress, r.NetProto, waker)
 	if err != nil {
 		return ch, err
@@ -155,6 +165,12 @@ func (r *Route) RemoveWaker(waker *sleep.Waker) {
 	if nextAddr == "" {
 		nextAddr = r.RemoteAddress
 	}
+
+	if r.ref.nic.neigh != nil {
+		r.ref.nic.neigh.removeWaker(nextAddr, waker)
+		return
+	}
+
 	r.ref.linkCache.RemoveWaker(r.ref.nic.ID(), nextAddr, waker)
 }
 
@@ -163,6 +179,9 @@ func (r *Route) RemoveWaker(waker *sleep.Waker) {
 //
 // The NIC r uses must not be locked.
 func (r *Route) IsResolutionRequired() bool {
+	if r.ref.nic.neigh != nil {
+		return r.ref.isValidForOutgoing() && r.ref.linkRes != nil && r.RemoteLinkAddress == ""
+	}
 	return r.ref.isValidForOutgoing() && r.ref.linkCache != nil && r.RemoteLinkAddress == ""
 }
 
diff --git a/pkg/tcpip/stack/stack.go b/pkg/tcpip/stack/stack.go
index a3f87c8afd..7f5ed9e83d 100644
--- a/pkg/tcpip/stack/stack.go
+++ b/pkg/tcpip/stack/stack.go
@@ -248,7 +248,7 @@ type RcvBufAutoTuneParams struct {
 	// was started.
 	MeasureTime time.Time
 
-	// CopiedBytes is the number of bytes copied to userspace since
+	// CopiedBytes is the number of bytes copied to user space since
 	// this measure began.
 	CopiedBytes int
 
@@ -461,6 +461,10 @@ type Stack struct {
 	// nudConfigs is the default NUD configurations used by interfaces.
 	nudConfigs NUDConfigurations
 
+	// useNeighborCache indicates whether ARP and NDP packets should be handled
+	// by the NIC's neighborCache instead of linkAddrCache.
+	useNeighborCache bool
+
 	// autoGenIPv6LinkLocal determines whether or not the stack will attempt
 	// to auto-generate an IPv6 link-local address for newly enabled non-loopback
 	// NICs. See the AutoGenIPv6LinkLocal field of Options for more details.
@@ -541,6 +545,13 @@ type Options struct {
 	// NUDConfigs is the default NUD configurations used by interfaces.
 	NUDConfigs NUDConfigurations
 
+	// UseNeighborCache indicates whether ARP and NDP packets should be handled
+	// by the Neighbor Unreachability Detection (NUD) state machine. This flag
+	// also enables the APIs for inspecting and modifying the neighbor table via
+	// NUDDispatcher and the following Stack methods: Neighbors, RemoveNeighbor,
+	// and ClearNeighbors.
+	UseNeighborCache bool
+
 	// AutoGenIPv6LinkLocal determines whether or not the stack will attempt to
 	// auto-generate an IPv6 link-local address for newly enabled non-loopback
 	// NICs.
@@ -715,6 +726,7 @@ func New(opts Options) *Stack {
 		seed:                 generateRandUint32(),
 		ndpConfigs:           opts.NDPConfigs,
 		nudConfigs:           opts.NUDConfigs,
+		useNeighborCache:     opts.UseNeighborCache,
 		autoGenIPv6LinkLocal: opts.AutoGenIPv6LinkLocal,
 		uniqueIDGenerator:    opts.UniqueID,
 		ndpDisp:              opts.NDPDisp,
@@ -1209,8 +1221,8 @@ func (s *Stack) AddProtocolAddressWithOptions(id tcpip.NICID, protocolAddress tc
 	s.mu.RLock()
 	defer s.mu.RUnlock()
 
-	nic := s.nics[id]
-	if nic == nil {
+	nic, ok := s.nics[id]
+	if !ok {
 		return tcpip.ErrUnknownNICID
 	}
 
@@ -1335,8 +1347,8 @@ func (s *Stack) CheckLocalAddress(nicID tcpip.NICID, protocol tcpip.NetworkProto
 
 	// If a NIC is specified, we try to find the address there only.
 	if nicID != 0 {
-		nic := s.nics[nicID]
-		if nic == nil {
+		nic, ok := s.nics[nicID]
+		if !ok {
 			return 0
 		}
 
@@ -1367,8 +1379,8 @@ func (s *Stack) SetPromiscuousMode(nicID tcpip.NICID, enable bool) *tcpip.Error
 	s.mu.RLock()
 	defer s.mu.RUnlock()
 
-	nic := s.nics[nicID]
-	if nic == nil {
+	nic, ok := s.nics[nicID]
+	if !ok {
 		return tcpip.ErrUnknownNICID
 	}
 
@@ -1383,8 +1395,8 @@ func (s *Stack) SetSpoofing(nicID tcpip.NICID, enable bool) *tcpip.Error {
 	s.mu.RLock()
 	defer s.mu.RUnlock()
 
-	nic := s.nics[nicID]
-	if nic == nil {
+	nic, ok := s.nics[nicID]
+	if !ok {
 		return tcpip.ErrUnknownNICID
 	}
 
@@ -1416,8 +1428,33 @@ func (s *Stack) GetLinkAddress(nicID tcpip.NICID, addr, localAddr tcpip.Address,
 	return s.linkAddrCache.get(fullAddr, linkRes, localAddr, nic.linkEP, waker)
 }
 
-// RemoveWaker implements LinkAddressCache.RemoveWaker.
+// Neighbors returns all IP to MAC address associations.
+func (s *Stack) Neighbors(nicID tcpip.NICID) ([]NeighborEntry, *tcpip.Error) {
+	s.mu.RLock()
+	nic, ok := s.nics[nicID]
+	s.mu.RUnlock()
+
+	if !ok {
+		return nil, tcpip.ErrUnknownNICID
+	}
+
+	return nic.neighbors()
+}
+
+// RemoveWaker removes a waker that has been added when link resolution for
+// addr was requested.
 func (s *Stack) RemoveWaker(nicID tcpip.NICID, addr tcpip.Address, waker *sleep.Waker) {
+	if s.useNeighborCache {
+		s.mu.RLock()
+		nic, ok := s.nics[nicID]
+		s.mu.RUnlock()
+
+		if ok {
+			nic.removeWaker(addr, waker)
+		}
+		return
+	}
+
 	s.mu.RLock()
 	defer s.mu.RUnlock()
 
@@ -1427,6 +1464,47 @@ func (s *Stack) RemoveWaker(nicID tcpip.NICID, addr tcpip.Address, waker *sleep.
 	}
 }
 
+// AddStaticNeighbor statically associates an IP address to a MAC address.
+func (s *Stack) AddStaticNeighbor(nicID tcpip.NICID, addr tcpip.Address, linkAddr tcpip.LinkAddress) *tcpip.Error {
+	s.mu.RLock()
+	nic, ok := s.nics[nicID]
+	s.mu.RUnlock()
+
+	if !ok {
+		return tcpip.ErrUnknownNICID
+	}
+
+	return nic.addStaticNeighbor(addr, linkAddr)
+}
+
+// RemoveNeighbor removes an IP to MAC address association previously created
+// either automically or by AddStaticNeighbor. Returns ErrBadAddress if there
+// is no association with the provided address.
+func (s *Stack) RemoveNeighbor(nicID tcpip.NICID, addr tcpip.Address) *tcpip.Error {
+	s.mu.RLock()
+	nic, ok := s.nics[nicID]
+	s.mu.RUnlock()
+
+	if !ok {
+		return tcpip.ErrUnknownNICID
+	}
+
+	return nic.removeNeighbor(addr)
+}
+
+// ClearNeighbors removes all IP to MAC address associations.
+func (s *Stack) ClearNeighbors(nicID tcpip.NICID) *tcpip.Error {
+	s.mu.RLock()
+	nic, ok := s.nics[nicID]
+	s.mu.RUnlock()
+
+	if !ok {
+		return tcpip.ErrUnknownNICID
+	}
+
+	return nic.clearNeighbors()
+}
+
 // RegisterTransportEndpoint registers the given endpoint with the stack
 // transport dispatcher. Received packets that match the provided id will be
 // delivered to the given endpoint; specifying a nic is optional, but
@@ -1961,7 +2039,7 @@ func (s *Stack) FindNetworkEndpoint(netProto tcpip.NetworkProtocolNumber, addres
 	return nil, tcpip.ErrBadAddress
 }
 
-// FindNICNameFromID returns the name of the nic for the given NICID.
+// FindNICNameFromID returns the name of the NIC for the given NICID.
 func (s *Stack) FindNICNameFromID(id tcpip.NICID) string {
 	s.mu.RLock()
 	defer s.mu.RUnlock()
diff --git a/pkg/tcpip/stack/stack_test.go b/pkg/tcpip/stack/stack_test.go
index 106645c50b..1deeccb898 100644
--- a/pkg/tcpip/stack/stack_test.go
+++ b/pkg/tcpip/stack/stack_test.go
@@ -197,7 +197,7 @@ func (*fakeNetworkProtocol) ParseAddresses(v buffer.View) (src, dst tcpip.Addres
 	return tcpip.Address(v[srcAddrOffset : srcAddrOffset+1]), tcpip.Address(v[dstAddrOffset : dstAddrOffset+1])
 }
 
-func (f *fakeNetworkProtocol) NewEndpoint(nicID tcpip.NICID, linkAddrCache stack.LinkAddressCache, dispatcher stack.TransportDispatcher, ep stack.LinkEndpoint, _ *stack.Stack) stack.NetworkEndpoint {
+func (f *fakeNetworkProtocol) NewEndpoint(nicID tcpip.NICID, _ stack.LinkAddressCache, _ stack.NUDHandler, dispatcher stack.TransportDispatcher, ep stack.LinkEndpoint, _ *stack.Stack) stack.NetworkEndpoint {
 	return &fakeNetworkEndpoint{
 		nicID:      nicID,
 		proto:      f,
diff --git a/pkg/tcpip/tcpip.go b/pkg/tcpip/tcpip.go
index 290c4e138b..44f87e0073 100644
--- a/pkg/tcpip/tcpip.go
+++ b/pkg/tcpip/tcpip.go
@@ -1192,6 +1192,10 @@ type ICMPv6ReceivedPacketStats struct {
 	// Invalid is the total number of ICMPv6 packets received that the
 	// transport layer could not parse.
 	Invalid *StatCounter
+
+	// RouterOnlyPacketsDroppedByHost is the total number of ICMPv6 packets
+	// dropped due to being router-specific packets.
+	RouterOnlyPacketsDroppedByHost *StatCounter
 }
 
 // ICMPStats collects ICMP-specific stats (both v4 and v6).
diff --git a/pkg/tcpip/transport/udp/udp_test.go b/pkg/tcpip/transport/udp/udp_test.go
index f87d99d5a4..0a558df6d2 100644
--- a/pkg/tcpip/transport/udp/udp_test.go
+++ b/pkg/tcpip/transport/udp/udp_test.go
@@ -1469,7 +1469,7 @@ func TestTTL(t *testing.T) {
 				} else {
 					p = ipv6.NewProtocol()
 				}
-				ep := p.NewEndpoint(0, nil, nil, nil, stack.New(stack.Options{
+				ep := p.NewEndpoint(0, nil, nil, nil, nil, stack.New(stack.Options{
 					NetworkProtocols:   []stack.NetworkProtocol{ipv4.NewProtocol(), ipv6.NewProtocol()},
 					TransportProtocols: []stack.TransportProtocol{udp.NewProtocol()},
 				}))
@@ -1502,7 +1502,7 @@ func TestSetTTL(t *testing.T) {
 					} else {
 						p = ipv6.NewProtocol()
 					}
-					ep := p.NewEndpoint(0, nil, nil, nil, stack.New(stack.Options{
+					ep := p.NewEndpoint(0, nil, nil, nil, nil, stack.New(stack.Options{
 						NetworkProtocols:   []stack.NetworkProtocol{ipv4.NewProtocol(), ipv6.NewProtocol()},
 						TransportProtocols: []stack.TransportProtocol{udp.NewProtocol()},
 					}))

From 0e91c5804318732e57543ad9a3012b5cb0715b7a Mon Sep 17 00:00:00 2001
From: gVisor bot <gvisor-bot@google.com>
Date: Tue, 25 Aug 2020 11:57:35 -0700
Subject: [PATCH 074/211] Change "Fd" member to "FD" according to convension

PiperOrigin-RevId: 328374775
---
 pkg/sentry/vfs/file_description.go | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/pkg/sentry/vfs/file_description.go b/pkg/sentry/vfs/file_description.go
index 33910e0950..3219a9e137 100644
--- a/pkg/sentry/vfs/file_description.go
+++ b/pkg/sentry/vfs/file_description.go
@@ -851,7 +851,7 @@ func (fd *FileDescription) SetAsyncHandler(newHandler func() FileAsync) FileAsyn
 // FileReadWriteSeeker is a helper struct to pass a FileDescription as
 // io.Reader/io.Writer/io.ReadSeeker/etc.
 type FileReadWriteSeeker struct {
-	Fd    *FileDescription
+	FD    *FileDescription
 	Ctx   context.Context
 	ROpts ReadOptions
 	WOpts WriteOptions
@@ -860,18 +860,18 @@ type FileReadWriteSeeker struct {
 // Read implements io.ReadWriteSeeker.Read.
 func (f *FileReadWriteSeeker) Read(p []byte) (int, error) {
 	dst := usermem.BytesIOSequence(p)
-	ret, err := f.Fd.Read(f.Ctx, dst, f.ROpts)
+	ret, err := f.FD.Read(f.Ctx, dst, f.ROpts)
 	return int(ret), err
 }
 
 // Seek implements io.ReadWriteSeeker.Seek.
 func (f *FileReadWriteSeeker) Seek(offset int64, whence int) (int64, error) {
-	return f.Fd.Seek(f.Ctx, offset, int32(whence))
+	return f.FD.Seek(f.Ctx, offset, int32(whence))
 }
 
 // Write implements io.ReadWriteSeeker.Write.
 func (f *FileReadWriteSeeker) Write(p []byte) (int, error) {
 	buf := usermem.BytesIOSequence(p)
-	ret, err := f.Fd.Write(f.Ctx, buf, f.WOpts)
+	ret, err := f.FD.Write(f.Ctx, buf, f.WOpts)
 	return int(ret), err
 }

From 61ad71e6be239a860ed946722f0c4e4e8e643d16 Mon Sep 17 00:00:00 2001
From: Adin Scannell <ascannell@google.com>
Date: Tue, 25 Aug 2020 12:16:31 -0700
Subject: [PATCH 075/211] Add nogo support to go_binary and go_test targets.

Updates #3374

PiperOrigin-RevId: 328378700
---
 pkg/cpuid/cpuid_parse_x86_test.go      | 12 ++---
 pkg/seccomp/BUILD                      |  1 +
 test/benchmarks/database/redis_test.go |  4 +-
 test/benchmarks/fs/bazel_test.go       |  2 +-
 test/benchmarks/network/node_test.go   |  4 +-
 test/benchmarks/network/ruby_test.go   |  4 +-
 test/packetimpact/runner/defs.bzl      |  1 +
 test/root/crictl_test.go               |  2 +-
 test/runtimes/proctor/BUILD            |  1 +
 tools/bazeldefs/defs.bzl               |  9 +++-
 tools/defs.bzl                         | 40 +++++++++++++++-
 tools/issue_reviver/BUILD              |  1 +
 tools/issue_reviver/github/BUILD       |  1 +
 tools/nogo/check/BUILD                 |  1 +
 tools/nogo/defs.bzl                    | 63 +++++++++++++++++++++-----
 15 files changed, 118 insertions(+), 28 deletions(-)

diff --git a/pkg/cpuid/cpuid_parse_x86_test.go b/pkg/cpuid/cpuid_parse_x86_test.go
index c9bd40e1bf..e4ae0d6894 100644
--- a/pkg/cpuid/cpuid_parse_x86_test.go
+++ b/pkg/cpuid/cpuid_parse_x86_test.go
@@ -32,27 +32,27 @@ func kernelVersion() (int, int, error) {
 		return 0, 0, err
 	}
 
-	var r string
+	var sb strings.Builder
 	for _, b := range u.Release {
 		if b == 0 {
 			break
 		}
-		r += string(b)
+		sb.WriteByte(byte(b))
 	}
 
-	s := strings.Split(r, ".")
+	s := strings.Split(sb.String(), ".")
 	if len(s) < 2 {
-		return 0, 0, fmt.Errorf("kernel release missing major and minor component: %s", r)
+		return 0, 0, fmt.Errorf("kernel release missing major and minor component: %s", sb.String())
 	}
 
 	major, err := strconv.Atoi(s[0])
 	if err != nil {
-		return 0, 0, fmt.Errorf("error parsing major version %q in %q: %v", s[0], r, err)
+		return 0, 0, fmt.Errorf("error parsing major version %q in %q: %w", s[0], sb.String(), err)
 	}
 
 	minor, err := strconv.Atoi(s[1])
 	if err != nil {
-		return 0, 0, fmt.Errorf("error parsing minor version %q in %q: %v", s[1], r, err)
+		return 0, 0, fmt.Errorf("error parsing minor version %q in %q: %w", s[1], sb.String(), err)
 	}
 
 	return major, minor, nil
diff --git a/pkg/seccomp/BUILD b/pkg/seccomp/BUILD
index 29aeaab8c6..bdef7762cd 100644
--- a/pkg/seccomp/BUILD
+++ b/pkg/seccomp/BUILD
@@ -10,6 +10,7 @@ go_binary(
         "seccomp_test_victim_amd64.go",
         "seccomp_test_victim_arm64.go",
     ],
+    nogo = False,
     deps = [":seccomp"],
 )
 
diff --git a/test/benchmarks/database/redis_test.go b/test/benchmarks/database/redis_test.go
index 394fce820f..6671a49693 100644
--- a/test/benchmarks/database/redis_test.go
+++ b/test/benchmarks/database/redis_test.go
@@ -84,12 +84,12 @@ func BenchmarkRedis(b *testing.B) {
 
 			ip, err := serverMachine.IPAddress()
 			if err != nil {
-				b.Fatal("failed to get IP from server: %v", err)
+				b.Fatalf("failed to get IP from server: %v", err)
 			}
 
 			serverPort, err := server.FindPort(ctx, port)
 			if err != nil {
-				b.Fatal("failed to get IP from server: %v", err)
+				b.Fatalf("failed to get IP from server: %v", err)
 			}
 
 			if err = harness.WaitUntilServing(ctx, clientMachine, ip, serverPort); err != nil {
diff --git a/test/benchmarks/fs/bazel_test.go b/test/benchmarks/fs/bazel_test.go
index f4236ba374..fdbbfe280f 100644
--- a/test/benchmarks/fs/bazel_test.go
+++ b/test/benchmarks/fs/bazel_test.go
@@ -73,7 +73,7 @@ func runBuildBenchmark(b *testing.B, image, workdir, target string) {
 			if bm.tmpfs {
 				if out, err := container.Exec(ctx, dockerutil.ExecOpts{},
 					"cp", "-r", workdir, "/tmp/."); err != nil {
-					b.Fatal("failed to copy directory: %v %s", err, out)
+					b.Fatalf("failed to copy directory: %v (%s)", err, out)
 				}
 				workdir = "/tmp" + workdir
 			}
diff --git a/test/benchmarks/network/node_test.go b/test/benchmarks/network/node_test.go
index 52eb794c46..0f4a205b64 100644
--- a/test/benchmarks/network/node_test.go
+++ b/test/benchmarks/network/node_test.go
@@ -48,14 +48,14 @@ func runNode(b *testing.B, hey *tools.Hey) {
 	// The machine to hold Redis and the Node Server.
 	serverMachine, err := h.GetMachine()
 	if err != nil {
-		b.Fatal("failed to get machine with: %v", err)
+		b.Fatalf("failed to get machine with: %v", err)
 	}
 	defer serverMachine.CleanUp()
 
 	// The machine to run 'hey'.
 	clientMachine, err := h.GetMachine()
 	if err != nil {
-		b.Fatal("failed to get machine with: %v", err)
+		b.Fatalf("failed to get machine with: %v", err)
 	}
 	defer clientMachine.CleanUp()
 
diff --git a/test/benchmarks/network/ruby_test.go b/test/benchmarks/network/ruby_test.go
index 5e0b2b7242..67f63f76a1 100644
--- a/test/benchmarks/network/ruby_test.go
+++ b/test/benchmarks/network/ruby_test.go
@@ -47,14 +47,14 @@ func runRuby(b *testing.B, hey *tools.Hey) {
 	// The machine to hold Redis and the Ruby Server.
 	serverMachine, err := h.GetMachine()
 	if err != nil {
-		b.Fatal("failed to get machine with: %v", err)
+		b.Fatalf("failed to get machine with: %v", err)
 	}
 	defer serverMachine.CleanUp()
 
 	// The machine to run 'hey'.
 	clientMachine, err := h.GetMachine()
 	if err != nil {
-		b.Fatal("failed to get machine with: %v", err)
+		b.Fatalf("failed to get machine with: %v", err)
 	}
 	defer clientMachine.CleanUp()
 	ctx := context.Background()
diff --git a/test/packetimpact/runner/defs.bzl b/test/packetimpact/runner/defs.bzl
index 93a36c6c28..d72c63fe6d 100644
--- a/test/packetimpact/runner/defs.bzl
+++ b/test/packetimpact/runner/defs.bzl
@@ -125,6 +125,7 @@ def packetimpact_go_test(name, size = "small", pure = True, expect_native_failur
         name = testbench_binary,
         size = size,
         pure = pure,
+        nogo = False,  # FIXME(gvisor.dev/issue/3374): Not working with all build systems.
         tags = [
             "local",
             "manual",
diff --git a/test/root/crictl_test.go b/test/root/crictl_test.go
index df91fa0fe1..11ac5cb522 100644
--- a/test/root/crictl_test.go
+++ b/test/root/crictl_test.go
@@ -418,7 +418,7 @@ func setup(t *testing.T, version string) (*criutil.Crictl, func(), error) {
 		// care about the docker runtime name.
 		config = v2Template
 	default:
-		t.Fatalf("unknown version: %d", version)
+		t.Fatalf("unknown version: %s", version)
 	}
 	t.Logf("Using config: %s", config)
 
diff --git a/test/runtimes/proctor/BUILD b/test/runtimes/proctor/BUILD
index f76e2ddc04..d1935cbe8e 100644
--- a/test/runtimes/proctor/BUILD
+++ b/test/runtimes/proctor/BUILD
@@ -21,6 +21,7 @@ go_test(
     size = "small",
     srcs = ["proctor_test.go"],
     library = ":proctor",
+    nogo = False,  # FIXME(gvisor.dev/issue/3374): Not working with all build systems.
     pure = True,
     deps = [
         "//pkg/test/testutil",
diff --git a/tools/bazeldefs/defs.bzl b/tools/bazeldefs/defs.bzl
index db7f379b88..4bbcda054f 100644
--- a/tools/bazeldefs/defs.bzl
+++ b/tools/bazeldefs/defs.bzl
@@ -87,13 +87,14 @@ def cc_binary(name, static = False, **kwargs):
         **kwargs
     )
 
-def go_binary(name, static = False, pure = False, **kwargs):
+def go_binary(name, static = False, pure = False, x_defs = None, **kwargs):
     """Build a go binary.
 
     Args:
         name: name of the target.
         static: build a static binary.
         pure: build without cgo.
+        x_defs: additional definitions.
         **kwargs: rest of the arguments are passed to _go_binary.
     """
     if static:
@@ -102,6 +103,7 @@ def go_binary(name, static = False, pure = False, **kwargs):
         kwargs["pure"] = "on"
     _go_binary(
         name = name,
+        x_defs = x_defs,
         **kwargs
     )
 
@@ -151,6 +153,11 @@ def go_rule(rule, implementation, **kwargs):
     toolchains = kwargs.get("toolchains", []) + ["@io_bazel_rules_go//go:toolchain"]
     return rule(implementation, attrs = attrs, toolchains = toolchains, **kwargs)
 
+def go_test_library(target):
+    if hasattr(target.attr, "embed") and len(target.attr.embed) > 0:
+        return target.attr.embed[0]
+    return None
+
 def go_context(ctx):
     go_ctx = _go_context(ctx)
     return struct(
diff --git a/tools/defs.bzl b/tools/defs.bzl
index e71a26cf48..290d564f26 100644
--- a/tools/defs.bzl
+++ b/tools/defs.bzl
@@ -27,7 +27,6 @@ gbenchmark = _gbenchmark
 gazelle = _gazelle
 go_embed_data = _go_embed_data
 go_path = _go_path
-go_test = _go_test
 gtest = _gtest
 grpcpp = _grpcpp
 loopback = _loopback
@@ -45,17 +44,35 @@ vdso_linker_option = _vdso_linker_option
 default_platform = _default_platform
 platforms = _platforms
 
-def go_binary(name, **kwargs):
+def go_binary(name, nogo = True, pure = False, static = False, x_defs = None, **kwargs):
     """Wraps the standard go_binary.
 
     Args:
       name: the rule name.
+      nogo: enable nogo analysis.
+      pure: build a pure Go (no CGo) binary.
+      static: build a static binary.
+      x_defs: additional linker definitions.
       **kwargs: standard go_binary arguments.
     """
     _go_binary(
         name = name,
+        pure = pure,
+        static = static,
+        x_defs = x_defs,
         **kwargs
     )
+    if nogo:
+        # Note that the nogo rule applies only for go_library and go_test
+        # targets, therefore we construct a library from the binary sources.
+        _go_library(
+            name = name + "_nogo_library",
+            **kwargs
+        )
+        nogo_test(
+            name = name + "_nogo",
+            deps = [":" + name + "_nogo_library"],
+        )
 
 def calculate_sets(srcs):
     """Calculates special Go sets for templates.
@@ -119,6 +136,7 @@ def go_library(name, srcs, deps = [], imports = [], stateify = True, marshal = F
       stateify: whether statify is enabled (default: true).
       marshal: whether marshal is enabled (default: false).
       marshal_debug: whether the gomarshal tools emits debugging output (default: false).
+      nogo: enable nogo analysis.
       **kwargs: standard go_library arguments.
     """
     all_srcs = srcs
@@ -202,6 +220,24 @@ def go_library(name, srcs, deps = [], imports = [], stateify = True, marshal = F
                 **kwargs
             )
 
+def go_test(name, nogo = True, **kwargs):
+    """Wraps the standard go_test.
+
+    Args:
+      name: the rule name.
+      nogo: enable nogo analysis.
+      **kwargs: standard go_test arguments.
+    """
+    _go_test(
+        name = name,
+        **kwargs
+    )
+    if nogo:
+        nogo_test(
+            name = name + "_nogo",
+            deps = [":" + name],
+        )
+
 def proto_library(name, srcs, deps = None, has_services = 0, **kwargs):
     """Wraps the standard proto_library.
 
diff --git a/tools/issue_reviver/BUILD b/tools/issue_reviver/BUILD
index 4ef1a31243..35b0111ca3 100644
--- a/tools/issue_reviver/BUILD
+++ b/tools/issue_reviver/BUILD
@@ -5,6 +5,7 @@ package(licenses = ["notice"])
 go_binary(
     name = "issue_reviver",
     srcs = ["main.go"],
+    nogo = False,
     deps = [
         "//tools/issue_reviver/github",
         "//tools/issue_reviver/reviver",
diff --git a/tools/issue_reviver/github/BUILD b/tools/issue_reviver/github/BUILD
index 0eabc2835e..555abd2969 100644
--- a/tools/issue_reviver/github/BUILD
+++ b/tools/issue_reviver/github/BUILD
@@ -21,4 +21,5 @@ go_test(
     size = "small",
     srcs = ["github_test.go"],
     library = ":github",
+    nogo = False,
 )
diff --git a/tools/nogo/check/BUILD b/tools/nogo/check/BUILD
index e2d76cd5ca..21ba2c3066 100644
--- a/tools/nogo/check/BUILD
+++ b/tools/nogo/check/BUILD
@@ -7,6 +7,7 @@ package(licenses = ["notice"])
 go_binary(
     name = "check",
     srcs = ["main.go"],
+    nogo = False,
     visibility = ["//visibility:public"],
     deps = ["//tools/nogo"],
 )
diff --git a/tools/nogo/defs.bzl b/tools/nogo/defs.bzl
index d399079c55..5377620b0d 100644
--- a/tools/nogo/defs.bzl
+++ b/tools/nogo/defs.bzl
@@ -1,6 +1,6 @@
 """Nogo rules."""
 
-load("//tools/bazeldefs:defs.bzl", "go_context", "go_importpath", "go_rule")
+load("//tools/bazeldefs:defs.bzl", "go_context", "go_importpath", "go_rule", "go_test_library")
 
 # NogoInfo is the serialized set of package facts for a nogo analysis.
 #
@@ -8,10 +8,13 @@ load("//tools/bazeldefs:defs.bzl", "go_context", "go_importpath", "go_rule")
 # with the source files as input. Note however, that the individual nogo rules
 # are simply stubs that enter into the shadow dependency tree (the "aspect").
 NogoInfo = provider(
+    "information for nogo analysis",
     fields = {
         "facts": "serialized package facts",
         "importpath": "package import path",
         "binaries": "package binary files",
+        "srcs": "original source files (for go_test support)",
+        "deps": "original deps (for go_test support)",
     },
 )
 
@@ -21,16 +24,29 @@ def _nogo_aspect_impl(target, ctx):
     # All work is done in the shadow properties for go rules. For a proto
     # library, we simply skip the analysis portion but still need to return a
     # valid NogoInfo to reference the generated binary.
-    if ctx.rule.kind == "go_library":
+    if ctx.rule.kind in ("go_library", "go_binary", "go_test", "go_tool_library"):
         srcs = ctx.rule.files.srcs
-    elif ctx.rule.kind == "go_proto_library" or ctx.rule.kind == "go_wrap_cc":
+        deps = ctx.rule.attr.deps
+    elif ctx.rule.kind in ("go_proto_library", "go_wrap_cc"):
         srcs = []
+        deps = ctx.rule.attr.deps
     else:
         return [NogoInfo()]
 
-    go_ctx = go_context(ctx)
+    # If we're using the "library" attribute, then we need to aggregate the
+    # original library sources and dependencies into this target to perform
+    # proper type analysis.
+    if ctx.rule.kind == "go_test":
+        library = go_test_library(ctx.rule)
+        if library != None:
+            info = library[NogoInfo]
+            if hasattr(info, "srcs"):
+                srcs = srcs + info.srcs
+            if hasattr(info, "deps"):
+                deps = deps + info.deps
 
     # Construct the Go environment from the go_ctx.env dictionary.
+    go_ctx = go_context(ctx)
     env_prefix = " ".join(["%s=%s" % (key, value) for (key, value) in go_ctx.env.items()])
 
     # Start with all target files and srcs as input.
@@ -41,6 +57,13 @@ def _nogo_aspect_impl(target, ctx):
     # to cleanly allow us redirect stdout to the actual output file. Perhaps
     # I'm missing something here, but the intermediate script does work.
     binaries = target.files.to_list()
+    objfiles = [f for f in binaries if f.path.endswith(".a")]
+    if len(objfiles) > 0:
+        # Prefer the .a files for go_library targets.
+        target_objfile = objfiles[0]
+    else:
+        # Use the raw binary for go_binary and go_test targets.
+        target_objfile = binaries[0]
     disasm_file = ctx.actions.declare_file(target.label.name + ".out")
     dumper = ctx.actions.declare_file("%s-dumper" % ctx.label.name)
     ctx.actions.write(dumper, "\n".join([
@@ -48,12 +71,12 @@ def _nogo_aspect_impl(target, ctx):
         "%s %s tool objdump %s > %s\n" % (
             env_prefix,
             go_ctx.go.path,
-            [f.path for f in binaries if f.path.endswith(".a")][0],
+            target_objfile.path,
             disasm_file.path,
         ),
     ]), is_executable = True)
     ctx.actions.run(
-        inputs = binaries,
+        inputs = [target_objfile],
         outputs = [disasm_file],
         tools = go_ctx.runfiles,
         mnemonic = "GoObjdump",
@@ -63,7 +86,15 @@ def _nogo_aspect_impl(target, ctx):
     inputs.append(disasm_file)
 
     # Extract the importpath for this package.
-    importpath = go_importpath(target)
+    if ctx.rule.kind == "go_test":
+        # If this is a test, then it will not be imported by anything else.
+        # We can safely set the importapth to just "test". Note that this
+        # is necessary if the library also imports the core library (in
+        # addition to including the sources directly), which happens in
+        # some complex cases (seccomp_victim).
+        importpath = "test"
+    else:
+        importpath = go_importpath(target)
 
     # The nogo tool requires a configfile serialized in JSON format to do its
     # work. This must line up with the nogo.Config fields.
@@ -84,7 +115,7 @@ def _nogo_aspect_impl(target, ctx):
     )
 
     # Collect all info from shadow dependencies.
-    for dep in ctx.rule.attr.deps:
+    for dep in deps:
         # There will be no file attribute set for all transitive dependencies
         # that are not go_library or go_binary rules, such as a proto rules.
         # This is handled by the ctx.rule.kind check above.
@@ -126,12 +157,18 @@ def _nogo_aspect_impl(target, ctx):
         facts = facts,
         importpath = importpath,
         binaries = binaries,
+        srcs = srcs,
+        deps = deps,
     )]
 
 nogo_aspect = go_rule(
     aspect,
     implementation = _nogo_aspect_impl,
-    attr_aspects = ["deps"],
+    attr_aspects = [
+        "deps",
+        "library",
+        "embed",
+    ],
     attrs = {
         "_nogo": attr.label(
             default = "//tools/nogo/check:check",
@@ -171,6 +208,10 @@ _nogo_test = rule(
     test = True,
 )
 
-def nogo_test(**kwargs):
+def nogo_test(name, **kwargs):
     tags = kwargs.pop("tags", []) + ["nogo"]
-    _nogo_test(tags = tags, **kwargs)
+    _nogo_test(
+        name = name,
+        tags = tags,
+        **kwargs
+    )

From 92b1436011632d88fbe31ddfe434c5a49e917876 Mon Sep 17 00:00:00 2001
From: Adin Scannell <ascannell@google.com>
Date: Tue, 25 Aug 2020 12:21:37 -0700
Subject: [PATCH 076/211] Include shim in individual released binaries.

The debian rules are also moved to the top-level, since they
apply to binaries outside the //runsc directory.

Fixes #3665

PiperOrigin-RevId: 328379709
---
 Makefile                             |  8 ++--
 debian/BUILD                         | 59 ++++++++++++++++++++++++++++
 {runsc/debian => debian}/description |  0
 {runsc/debian => debian}/postinst.sh |  0
 runsc/BUILD                          | 58 +--------------------------
 shim/BUILD                           |  2 +-
 6 files changed, 66 insertions(+), 61 deletions(-)
 create mode 100644 debian/BUILD
 rename {runsc/debian => debian}/description (100%)
 rename {runsc/debian => debian}/postinst.sh (100%)

diff --git a/Makefile b/Makefile
index fdbc6fb419..43a243c906 100644
--- a/Makefile
+++ b/Makefile
@@ -114,7 +114,7 @@ runsc: ## Builds the runsc binary.
 .PHONY: runsc
 
 debian: ## Builds the debian packages.
-	@$(call submake,build OPTIONS="-c opt" TARGETS="//runsc:runsc-debian")
+	@$(call submake,build OPTIONS="-c opt" TARGETS="//debian:debian")
 .PHONY: debian
 
 smoke-tests: ## Runs a simple smoke test after build runsc.
@@ -301,8 +301,10 @@ $(RELEASE_KEY):
 release: $(RELEASE_KEY) ## Builds a release.
 	@mkdir -p $(RELEASE_ROOT)
 	@T=$$(mktemp -d /tmp/release.XXXXXX); \
-	  $(call submake,copy TARGETS="runsc" DESTINATION=$$T) && \
-	  $(call submake,copy TARGETS="runsc:runsc-debian" DESTINATION=$$T) && \
+	  $(call submake,copy TARGETS="//runsc:runsc" DESTINATION=$$T) && \
+	  $(call submake,copy TARGETS="//shim/v1:gvisor-containerd-shim" DESTINATION=$$T) && \
+	  $(call submake,copy TARGETS="//shim/v2:containerd-shim-runsc-v1" DESTINATION=$$T) && \
+	  $(call submake,copy TARGETS="//debian:debian" DESTINATION=$$T) && \
 	  NIGHTLY=$(RELEASE_NIGHTLY) tools/make_release.sh $(RELEASE_KEY) $(RELEASE_ROOT) $$T/*; \
 	rc=$$?; rm -rf $$T; exit $$rc
 .PHONY: release
diff --git a/debian/BUILD b/debian/BUILD
new file mode 100644
index 0000000000..331f44a5cb
--- /dev/null
+++ b/debian/BUILD
@@ -0,0 +1,59 @@
+load("//tools:defs.bzl", "pkg_deb", "pkg_tar")
+
+package(licenses = ["notice"])
+
+pkg_tar(
+    name = "debian-bin",
+    srcs = [
+        "//runsc",
+        "//shim/v1:gvisor-containerd-shim",
+        "//shim/v2:containerd-shim-runsc-v1",
+    ],
+    mode = "0755",
+    package_dir = "/usr/bin",
+)
+
+pkg_tar(
+    name = "debian-data",
+    extension = "tar.gz",
+    deps = [
+        ":debian-bin",
+        "//shim:config",
+    ],
+)
+
+genrule(
+    name = "debian-version",
+    # Note that runsc must appear in the srcs parameter and not the tools
+    # parameter, otherwise it will not be stamped. This is reasonable, as tools
+    # may be encoded differently in the build graph (cached more aggressively
+    # because they are assumes to be hermetic).
+    srcs = ["//runsc"],
+    outs = ["version.txt"],
+    # Note that the little dance here is necessary because files in the $(SRCS)
+    # attribute are not executable by default, and we can't touch in place.
+    cmd = "cp $(location //runsc:runsc) $(@D)/runsc && \
+        chmod a+x $(@D)/runsc && \
+        $(@D)/runsc -version | grep version | sed 's/^[^0-9]*//' > $@ && \
+        rm -f $(@D)/runsc",
+    stamp = 1,
+)
+
+pkg_deb(
+    name = "debian",
+    architecture = "amd64",
+    data = ":debian-data",
+    # Note that the description_file will be flatten (all newlines removed),
+    # and therefore it is kept to a simple one-line description. The expected
+    # format for debian packages is "short summary\nLonger explanation of
+    # tool." and this is impossible with the flattening.
+    description_file = "description",
+    homepage = "https://gvisor.dev/",
+    maintainer = "The gVisor Authors <gvisor-dev@googlegroups.com>",
+    package = "runsc",
+    postinst = "postinst.sh",
+    version_file = ":version.txt",
+    visibility = [
+        "//visibility:public",
+    ],
+)
diff --git a/runsc/debian/description b/debian/description
similarity index 100%
rename from runsc/debian/description
rename to debian/description
diff --git a/runsc/debian/postinst.sh b/debian/postinst.sh
similarity index 100%
rename from runsc/debian/postinst.sh
rename to debian/postinst.sh
diff --git a/runsc/BUILD b/runsc/BUILD
index 267fb2af86..33d8554af1 100644
--- a/runsc/BUILD
+++ b/runsc/BUILD
@@ -1,4 +1,4 @@
-load("//tools:defs.bzl", "go_binary", "pkg_deb", "pkg_tar")
+load("//tools:defs.bzl", "go_binary")
 
 package(licenses = ["notice"])
 
@@ -61,62 +61,6 @@ go_binary(
     ],
 )
 
-pkg_tar(
-    name = "debian-bin",
-    srcs = [
-        ":runsc",
-        "//shim/v1:gvisor-containerd-shim",
-        "//shim/v2:containerd-shim-runsc-v1",
-    ],
-    mode = "0755",
-    package_dir = "/usr/bin",
-)
-
-pkg_tar(
-    name = "debian-data",
-    extension = "tar.gz",
-    deps = [
-        ":debian-bin",
-        "//shim:config",
-    ],
-)
-
-genrule(
-    name = "deb-version",
-    # Note that runsc must appear in the srcs parameter and not the tools
-    # parameter, otherwise it will not be stamped. This is reasonable, as tools
-    # may be encoded differently in the build graph (cached more aggressively
-    # because they are assumes to be hermetic).
-    srcs = [":runsc"],
-    outs = ["version.txt"],
-    # Note that the little dance here is necessary because files in the $(SRCS)
-    # attribute are not executable by default, and we can't touch in place.
-    cmd = "cp $(location :runsc) $(@D)/runsc && \
-        chmod a+x $(@D)/runsc && \
-        $(@D)/runsc -version | grep version | sed 's/^[^0-9]*//' > $@ && \
-        rm -f $(@D)/runsc",
-    stamp = 1,
-)
-
-pkg_deb(
-    name = "runsc-debian",
-    architecture = "amd64",
-    data = ":debian-data",
-    # Note that the description_file will be flatten (all newlines removed),
-    # and therefore it is kept to a simple one-line description. The expected
-    # format for debian packages is "short summary\nLonger explanation of
-    # tool." and this is impossible with the flattening.
-    description_file = "debian/description",
-    homepage = "https://gvisor.dev/",
-    maintainer = "The gVisor Authors <gvisor-dev@googlegroups.com>",
-    package = "runsc",
-    postinst = "debian/postinst.sh",
-    version_file = ":version.txt",
-    visibility = [
-        "//visibility:public",
-    ],
-)
-
 sh_test(
     name = "version_test",
     size = "small",
diff --git a/shim/BUILD b/shim/BUILD
index e581618b25..8d29c459b6 100644
--- a/shim/BUILD
+++ b/shim/BUILD
@@ -10,6 +10,6 @@ pkg_tar(
     mode = "0644",
     package_dir = "/etc/containerd",
     visibility = [
-        "//runsc:__pkg__",
+        "//visibility:public",
     ],
 )

From 9e7a83e0f283965c6fbfa3d596421508dd088c58 Mon Sep 17 00:00:00 2001
From: Kevin Krakauer <krakauer@google.com>
Date: Tue, 25 Aug 2020 13:41:23 -0700
Subject: [PATCH 077/211] remove iptables sockopt special cases

iptables sockopts were kludged into an unnecessary check, this properly
relegates them to the {get,set}SockOptIP functions.

PiperOrigin-RevId: 328395135
---
 pkg/sentry/socket/netstack/netstack.go      | 137 ++++++++++----------
 pkg/sentry/socket/netstack/netstack_vfs2.go |  68 +---------
 pkg/sentry/socket/unix/unix.go              |   2 +-
 pkg/sentry/socket/unix/unix_vfs2.go         |   2 +-
 test/syscalls/linux/iptables.cc             |  37 +++++-
 5 files changed, 104 insertions(+), 142 deletions(-)

diff --git a/pkg/sentry/socket/netstack/netstack.go b/pkg/sentry/socket/netstack/netstack.go
index 626195be24..9e2ebc7d4f 100644
--- a/pkg/sentry/socket/netstack/netstack.go
+++ b/pkg/sentry/socket/netstack/netstack.go
@@ -983,53 +983,12 @@ func (s *SocketOperations) GetSockOpt(t *kernel.Task, level, name int, outPtr us
 		return &val, nil
 	}
 
-	if s.skType == linux.SOCK_RAW && level == linux.IPPROTO_IP {
-		switch name {
-		case linux.IPT_SO_GET_INFO:
-			if outLen < linux.SizeOfIPTGetinfo {
-				return nil, syserr.ErrInvalidArgument
-			}
-			if s.family != linux.AF_INET {
-				return nil, syserr.ErrInvalidArgument
-			}
-
-			stack := inet.StackFromContext(t)
-			if stack == nil {
-				return nil, syserr.ErrNoDevice
-			}
-			info, err := netfilter.GetInfo(t, stack.(*Stack).Stack, outPtr)
-			if err != nil {
-				return nil, err
-			}
-			return &info, nil
-
-		case linux.IPT_SO_GET_ENTRIES:
-			if outLen < linux.SizeOfIPTGetEntries {
-				return nil, syserr.ErrInvalidArgument
-			}
-			if s.family != linux.AF_INET {
-				return nil, syserr.ErrInvalidArgument
-			}
-
-			stack := inet.StackFromContext(t)
-			if stack == nil {
-				return nil, syserr.ErrNoDevice
-			}
-			entries, err := netfilter.GetEntries4(t, stack.(*Stack).Stack, outPtr, outLen)
-			if err != nil {
-				return nil, err
-			}
-			return &entries, nil
-
-		}
-	}
-
-	return GetSockOpt(t, s, s.Endpoint, s.family, s.skType, level, name, outLen)
+	return GetSockOpt(t, s, s.Endpoint, s.family, s.skType, level, name, outPtr, outLen)
 }
 
 // GetSockOpt can be used to implement the linux syscall getsockopt(2) for
 // sockets backed by a commonEndpoint.
-func GetSockOpt(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, family int, skType linux.SockType, level, name, outLen int) (marshal.Marshallable, *syserr.Error) {
+func GetSockOpt(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, family int, skType linux.SockType, level, name int, outPtr usermem.Addr, outLen int) (marshal.Marshallable, *syserr.Error) {
 	switch level {
 	case linux.SOL_SOCKET:
 		return getSockOptSocket(t, s, ep, family, skType, name, outLen)
@@ -1041,7 +1000,7 @@ func GetSockOpt(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, family in
 		return getSockOptIPv6(t, ep, name, outLen)
 
 	case linux.SOL_IP:
-		return getSockOptIP(t, ep, name, outLen, family)
+		return getSockOptIP(t, s, ep, name, outPtr, outLen, family)
 
 	case linux.SOL_UDP,
 		linux.SOL_ICMPV6,
@@ -1560,7 +1519,7 @@ func getSockOptIPv6(t *kernel.Task, ep commonEndpoint, name, outLen int) (marsha
 }
 
 // getSockOptIP implements GetSockOpt when level is SOL_IP.
-func getSockOptIP(t *kernel.Task, ep commonEndpoint, name, outLen int, family int) (marshal.Marshallable, *syserr.Error) {
+func getSockOptIP(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, name int, outPtr usermem.Addr, outLen int, family int) (marshal.Marshallable, *syserr.Error) {
 	switch name {
 	case linux.IP_TTL:
 		if outLen < sizeOfInt32 {
@@ -1676,6 +1635,46 @@ func getSockOptIP(t *kernel.Task, ep commonEndpoint, name, outLen int, family in
 		a, _ := ConvertAddress(linux.AF_INET, tcpip.FullAddress(v))
 		return a.(*linux.SockAddrInet), nil
 
+	case linux.IPT_SO_GET_INFO:
+		if outLen < linux.SizeOfIPTGetinfo {
+			return nil, syserr.ErrInvalidArgument
+		}
+
+		// Only valid for raw IPv4 sockets.
+		if family, skType, _ := s.Type(); family != linux.AF_INET || skType != linux.SOCK_RAW {
+			return nil, syserr.ErrProtocolNotAvailable
+		}
+
+		stack := inet.StackFromContext(t)
+		if stack == nil {
+			return nil, syserr.ErrNoDevice
+		}
+		info, err := netfilter.GetInfo(t, stack.(*Stack).Stack, outPtr)
+		if err != nil {
+			return nil, err
+		}
+		return &info, nil
+
+	case linux.IPT_SO_GET_ENTRIES:
+		if outLen < linux.SizeOfIPTGetEntries {
+			return nil, syserr.ErrInvalidArgument
+		}
+
+		// Only valid for raw IPv4 sockets.
+		if family, skType, _ := s.Type(); family != linux.AF_INET || skType != linux.SOCK_RAW {
+			return nil, syserr.ErrProtocolNotAvailable
+		}
+
+		stack := inet.StackFromContext(t)
+		if stack == nil {
+			return nil, syserr.ErrNoDevice
+		}
+		entries, err := netfilter.GetEntries4(t, stack.(*Stack).Stack, outPtr, outLen)
+		if err != nil {
+			return nil, err
+		}
+		return &entries, nil
+
 	default:
 		emitUnimplementedEventIP(t, name)
 	}
@@ -1709,29 +1708,6 @@ func (s *SocketOperations) SetSockOpt(t *kernel.Task, level int, name int, optVa
 		return nil
 	}
 
-	if s.skType == linux.SOCK_RAW && level == linux.SOL_IP {
-		switch name {
-		case linux.IPT_SO_SET_REPLACE:
-			if len(optVal) < linux.SizeOfIPTReplace {
-				return syserr.ErrInvalidArgument
-			}
-			if s.family != linux.AF_INET {
-				return syserr.ErrInvalidArgument
-			}
-
-			stack := inet.StackFromContext(t)
-			if stack == nil {
-				return syserr.ErrNoDevice
-			}
-			// Stack must be a netstack stack.
-			return netfilter.SetEntries(stack.(*Stack).Stack, optVal)
-
-		case linux.IPT_SO_SET_ADD_COUNTERS:
-			// TODO(gvisor.dev/issue/170): Counter support.
-			return nil
-		}
-	}
-
 	return SetSockOpt(t, s, s.Endpoint, level, name, optVal)
 }
 
@@ -1749,7 +1725,7 @@ func SetSockOpt(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, level int
 		return setSockOptIPv6(t, ep, name, optVal)
 
 	case linux.SOL_IP:
-		return setSockOptIP(t, ep, name, optVal)
+		return setSockOptIP(t, s, ep, name, optVal)
 
 	case linux.SOL_UDP,
 		linux.SOL_ICMPV6,
@@ -2160,7 +2136,7 @@ func parseIntOrChar(buf []byte) (int32, *syserr.Error) {
 }
 
 // setSockOptIP implements SetSockOpt when level is SOL_IP.
-func setSockOptIP(t *kernel.Task, ep commonEndpoint, name int, optVal []byte) *syserr.Error {
+func setSockOptIP(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, name int, optVal []byte) *syserr.Error {
 	switch name {
 	case linux.IP_MULTICAST_TTL:
 		v, err := parseIntOrChar(optVal)
@@ -2280,6 +2256,27 @@ func setSockOptIP(t *kernel.Task, ep commonEndpoint, name int, optVal []byte) *s
 		}
 		return syserr.TranslateNetstackError(ep.SetSockOptBool(tcpip.IPHdrIncludedOption, v != 0))
 
+	case linux.IPT_SO_SET_REPLACE:
+		if len(optVal) < linux.SizeOfIPTReplace {
+			return syserr.ErrInvalidArgument
+		}
+
+		// Only valid for raw IPv4 sockets.
+		if family, skType, _ := s.Type(); family != linux.AF_INET || skType != linux.SOCK_RAW {
+			return syserr.ErrProtocolNotAvailable
+		}
+
+		stack := inet.StackFromContext(t)
+		if stack == nil {
+			return syserr.ErrNoDevice
+		}
+		// Stack must be a netstack stack.
+		return netfilter.SetEntries(stack.(*Stack).Stack, optVal)
+
+	case linux.IPT_SO_SET_ADD_COUNTERS:
+		// TODO(gvisor.dev/issue/170): Counter support.
+		return nil
+
 	case linux.IP_ADD_SOURCE_MEMBERSHIP,
 		linux.IP_BIND_ADDRESS_NO_PORT,
 		linux.IP_BLOCK_SOURCE,
diff --git a/pkg/sentry/socket/netstack/netstack_vfs2.go b/pkg/sentry/socket/netstack/netstack_vfs2.go
index 1db8ae4913..59fa4c58f0 100644
--- a/pkg/sentry/socket/netstack/netstack_vfs2.go
+++ b/pkg/sentry/socket/netstack/netstack_vfs2.go
@@ -21,10 +21,8 @@ import (
 	"gvisor.dev/gvisor/pkg/sentry/arch"
 	fslock "gvisor.dev/gvisor/pkg/sentry/fs/lock"
 	"gvisor.dev/gvisor/pkg/sentry/fsimpl/sockfs"
-	"gvisor.dev/gvisor/pkg/sentry/inet"
 	"gvisor.dev/gvisor/pkg/sentry/kernel"
 	"gvisor.dev/gvisor/pkg/sentry/socket"
-	"gvisor.dev/gvisor/pkg/sentry/socket/netfilter"
 	"gvisor.dev/gvisor/pkg/sentry/vfs"
 	"gvisor.dev/gvisor/pkg/syserr"
 	"gvisor.dev/gvisor/pkg/syserror"
@@ -233,48 +231,7 @@ func (s *SocketVFS2) GetSockOpt(t *kernel.Task, level, name int, outPtr usermem.
 		return &val, nil
 	}
 
-	if s.skType == linux.SOCK_RAW && level == linux.IPPROTO_IP {
-		switch name {
-		case linux.IPT_SO_GET_INFO:
-			if outLen < linux.SizeOfIPTGetinfo {
-				return nil, syserr.ErrInvalidArgument
-			}
-			if s.family != linux.AF_INET {
-				return nil, syserr.ErrInvalidArgument
-			}
-
-			stack := inet.StackFromContext(t)
-			if stack == nil {
-				return nil, syserr.ErrNoDevice
-			}
-			info, err := netfilter.GetInfo(t, stack.(*Stack).Stack, outPtr)
-			if err != nil {
-				return nil, err
-			}
-			return &info, nil
-
-		case linux.IPT_SO_GET_ENTRIES:
-			if outLen < linux.SizeOfIPTGetEntries {
-				return nil, syserr.ErrInvalidArgument
-			}
-			if s.family != linux.AF_INET {
-				return nil, syserr.ErrInvalidArgument
-			}
-
-			stack := inet.StackFromContext(t)
-			if stack == nil {
-				return nil, syserr.ErrNoDevice
-			}
-			entries, err := netfilter.GetEntries4(t, stack.(*Stack).Stack, outPtr, outLen)
-			if err != nil {
-				return nil, err
-			}
-			return &entries, nil
-
-		}
-	}
-
-	return GetSockOpt(t, s, s.Endpoint, s.family, s.skType, level, name, outLen)
+	return GetSockOpt(t, s, s.Endpoint, s.family, s.skType, level, name, outPtr, outLen)
 }
 
 // SetSockOpt implements the linux syscall setsockopt(2) for sockets backed by
@@ -304,29 +261,6 @@ func (s *SocketVFS2) SetSockOpt(t *kernel.Task, level int, name int, optVal []by
 		return nil
 	}
 
-	if s.skType == linux.SOCK_RAW && level == linux.SOL_IP {
-		switch name {
-		case linux.IPT_SO_SET_REPLACE:
-			if len(optVal) < linux.SizeOfIPTReplace {
-				return syserr.ErrInvalidArgument
-			}
-			if s.family != linux.AF_INET {
-				return syserr.ErrInvalidArgument
-			}
-
-			stack := inet.StackFromContext(t)
-			if stack == nil {
-				return syserr.ErrNoDevice
-			}
-			// Stack must be a netstack stack.
-			return netfilter.SetEntries(stack.(*Stack).Stack, optVal)
-
-		case linux.IPT_SO_SET_ADD_COUNTERS:
-			// TODO(gvisor.dev/issue/170): Counter support.
-			return nil
-		}
-	}
-
 	return SetSockOpt(t, s, s.Endpoint, level, name, optVal)
 }
 
diff --git a/pkg/sentry/socket/unix/unix.go b/pkg/sentry/socket/unix/unix.go
index b7e8e4325a..0a7a26495b 100644
--- a/pkg/sentry/socket/unix/unix.go
+++ b/pkg/sentry/socket/unix/unix.go
@@ -194,7 +194,7 @@ func (s *SocketOperations) Ioctl(ctx context.Context, _ *fs.File, io usermem.IO,
 // GetSockOpt implements the linux syscall getsockopt(2) for sockets backed by
 // a transport.Endpoint.
 func (s *SocketOperations) GetSockOpt(t *kernel.Task, level, name int, outPtr usermem.Addr, outLen int) (marshal.Marshallable, *syserr.Error) {
-	return netstack.GetSockOpt(t, s, s.ep, linux.AF_UNIX, s.ep.Type(), level, name, outLen)
+	return netstack.GetSockOpt(t, s, s.ep, linux.AF_UNIX, s.ep.Type(), level, name, outPtr, outLen)
 }
 
 // Listen implements the linux syscall listen(2) for sockets backed by
diff --git a/pkg/sentry/socket/unix/unix_vfs2.go b/pkg/sentry/socket/unix/unix_vfs2.go
index d066ef8aba..65a285b8ff 100644
--- a/pkg/sentry/socket/unix/unix_vfs2.go
+++ b/pkg/sentry/socket/unix/unix_vfs2.go
@@ -91,7 +91,7 @@ func NewFileDescription(ep transport.Endpoint, stype linux.SockType, flags uint3
 // GetSockOpt implements the linux syscall getsockopt(2) for sockets backed by
 // a transport.Endpoint.
 func (s *SocketVFS2) GetSockOpt(t *kernel.Task, level, name int, outPtr usermem.Addr, outLen int) (marshal.Marshallable, *syserr.Error) {
-	return netstack.GetSockOpt(t, s, s.ep, linux.AF_UNIX, s.ep.Type(), level, name, outLen)
+	return netstack.GetSockOpt(t, s, s.ep, linux.AF_UNIX, s.ep.Type(), level, name, outPtr, outLen)
 }
 
 // blockingAccept implements a blocking version of accept(2), that is, if no
diff --git a/test/syscalls/linux/iptables.cc b/test/syscalls/linux/iptables.cc
index 9b338d9707..f1af8f097c 100644
--- a/test/syscalls/linux/iptables.cc
+++ b/test/syscalls/linux/iptables.cc
@@ -67,12 +67,43 @@ TEST(IPTablesBasic, FailSockoptNonRaw) {
   struct ipt_getinfo info = {};
   snprintf(info.name, XT_TABLE_MAXNAMELEN, "%s", kNatTablename);
   socklen_t info_size = sizeof(info);
-  EXPECT_THAT(getsockopt(sock, IPPROTO_IP, IPT_SO_GET_INFO, &info, &info_size),
+  EXPECT_THAT(getsockopt(sock, SOL_IP, IPT_SO_GET_INFO, &info, &info_size),
               SyscallFailsWithErrno(ENOPROTOOPT));
 
   ASSERT_THAT(close(sock), SyscallSucceeds());
 }
 
+TEST(IPTablesBasic, GetInfoErrorPrecedence) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+
+  int sock;
+  ASSERT_THAT(sock = socket(AF_INET, SOCK_DGRAM, 0), SyscallSucceeds());
+
+  // When using the wrong type of socket and a too-short optlen, we should get
+  // EINVAL.
+  struct ipt_getinfo info = {};
+  snprintf(info.name, XT_TABLE_MAXNAMELEN, "%s", kNatTablename);
+  socklen_t info_size = sizeof(info) - 1;
+  ASSERT_THAT(getsockopt(sock, SOL_IP, IPT_SO_GET_INFO, &info, &info_size),
+              SyscallFailsWithErrno(EINVAL));
+}
+
+TEST(IPTablesBasic, GetEntriesErrorPrecedence) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+
+  int sock;
+  ASSERT_THAT(sock = socket(AF_INET, SOCK_DGRAM, 0), SyscallSucceeds());
+
+  // When using the wrong type of socket and a too-short optlen, we should get
+  // EINVAL.
+  struct ipt_get_entries entries = {};
+  socklen_t entries_size = sizeof(struct ipt_get_entries) - 1;
+  snprintf(entries.name, XT_TABLE_MAXNAMELEN, "%s", kNatTablename);
+  ASSERT_THAT(
+      getsockopt(sock, SOL_IP, IPT_SO_GET_ENTRIES, &entries, &entries_size),
+      SyscallFailsWithErrno(EINVAL));
+}
+
 // Fixture for iptables tests.
 class IPTablesTest : public ::testing::Test {
  protected:
@@ -112,7 +143,7 @@ TEST_F(IPTablesTest, InitialState) {
   struct ipt_getinfo info = {};
   snprintf(info.name, XT_TABLE_MAXNAMELEN, "%s", kNatTablename);
   socklen_t info_size = sizeof(info);
-  ASSERT_THAT(getsockopt(s_, IPPROTO_IP, IPT_SO_GET_INFO, &info, &info_size),
+  ASSERT_THAT(getsockopt(s_, SOL_IP, IPT_SO_GET_INFO, &info, &info_size),
               SyscallSucceeds());
 
   // The nat table supports PREROUTING, and OUTPUT.
@@ -148,7 +179,7 @@ TEST_F(IPTablesTest, InitialState) {
   snprintf(entries->name, XT_TABLE_MAXNAMELEN, "%s", kNatTablename);
   entries->size = info.size;
   ASSERT_THAT(
-      getsockopt(s_, IPPROTO_IP, IPT_SO_GET_ENTRIES, entries, &entries_size),
+      getsockopt(s_, SOL_IP, IPT_SO_GET_ENTRIES, entries, &entries_size),
       SyscallSucceeds());
 
   // Verify the name and size.

From e6a1608bc71308fecf951970f17db0bba2611ff0 Mon Sep 17 00:00:00 2001
From: Adin Scannell <ascannell@google.com>
Date: Tue, 25 Aug 2020 14:28:03 -0700
Subject: [PATCH 078/211] Provide --secret-keyring parameter (for newer gpg).

PiperOrigin-RevId: 328403914
---
 tools/make_apt.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/make_apt.sh b/tools/make_apt.sh
index 3fb1066e5c..b47977ed58 100755
--- a/tools/make_apt.sh
+++ b/tools/make_apt.sh
@@ -64,8 +64,8 @@ trap cleanup EXIT
 # is not found. This isn't actually a failure for us, because we don't require
 # the public (this may be stored separately). The second import will succeed
 # because, in reality, the first import succeeded and it's a no-op.
-gpg --no-default-keyring --keyring "${keyring}" --import "${private_key}" || \
-  gpg --no-default-keyring --keyring "${keyring}" --import "${private_key}"
+gpg --no-default-keyring --keyring "${keyring}" --secret-keyring "${keyring}" --import "${private_key}" || \
+  gpg --no-default-keyring --keyring "${keyring}" --secret-keyring "${keyring}" --import "${private_key}"
 
 # Copy the packages into the root.
 for pkg in "$@"; do

From 09bd5a57f3456bb411c34cced923531dc8e0aec7 Mon Sep 17 00:00:00 2001
From: Bhasker Hariharan <bhaskerh@google.com>
Date: Tue, 25 Aug 2020 14:45:03 -0700
Subject: [PATCH 079/211] Clarify comment on NetworkProtocolNumber.

The actual values used for this field in Netstack are actually EtherType values
of the protocol in an Ethernet frame. Eg. header.IPv4ProtocolNumber is 0x0800
and not the number of the IPv4 Protocol Number itself which is 4. Similarly
header.IPv6ProtocolNumber is set to 0x86DD whereas the IPv6 protocol number is
41.

See:
  - https://www.iana.org/assignments/ieee-802-numbers/ieee-802-numbers.xhtml (For EtherType)
  - https://www.iana.org/assignments/protocol-numbers/protocol-numbers.xhtml (For ProtocolNumbers)
PiperOrigin-RevId: 328407293
---
 pkg/tcpip/tcpip.go | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/pkg/tcpip/tcpip.go b/pkg/tcpip/tcpip.go
index 44f87e0073..609b8af33e 100644
--- a/pkg/tcpip/tcpip.go
+++ b/pkg/tcpip/tcpip.go
@@ -1029,7 +1029,10 @@ func (r Route) String() string {
 // TransportProtocolNumber is the number of a transport protocol.
 type TransportProtocolNumber uint32
 
-// NetworkProtocolNumber is the number of a network protocol.
+// NetworkProtocolNumber is the EtherType of a network protocol in an Ethernet
+// frame.
+//
+// See: https://www.iana.org/assignments/ieee-802-numbers/ieee-802-numbers.xhtml
 type NetworkProtocolNumber uint32
 
 // A StatCounter keeps track of a statistic.

From 7483666eca67bf7d7ee814a4c8667af575f15bda Mon Sep 17 00:00:00 2001
From: Nicolas Lacasse <nlacasse@google.com>
Date: Tue, 25 Aug 2020 14:58:28 -0700
Subject: [PATCH 080/211] overlay: clonePrivateMount must pass a Dentry
 reference to MakeVirtualDentry.

PiperOrigin-RevId: 328410065
---
 pkg/sentry/fsimpl/overlay/overlay.go | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/pkg/sentry/fsimpl/overlay/overlay.go b/pkg/sentry/fsimpl/overlay/overlay.go
index 4b3dfbc017..00562667fa 100644
--- a/pkg/sentry/fsimpl/overlay/overlay.go
+++ b/pkg/sentry/fsimpl/overlay/overlay.go
@@ -315,7 +315,11 @@ func clonePrivateMount(vfsObj *vfs.VirtualFilesystem, vd vfs.VirtualDentry, forc
 	if err != nil {
 		return vfs.VirtualDentry{}, err
 	}
-	return vfs.MakeVirtualDentry(newmnt, vd.Dentry()), nil
+	// Take a reference on the dentry which will be owned by the returned
+	// VirtualDentry.
+	d := vd.Dentry()
+	d.IncRef()
+	return vfs.MakeVirtualDentry(newmnt, d), nil
 }
 
 // Release implements vfs.FilesystemImpl.Release.

From 38d512f589156949665d06c73ba5bf5a4bd35218 Mon Sep 17 00:00:00 2001
From: Jamie Liu <jamieliu@google.com>
Date: Tue, 25 Aug 2020 14:59:59 -0700
Subject: [PATCH 081/211] Link to PHP bug for disabled disk space tests.

PiperOrigin-RevId: 328410399
---
 test/runtimes/exclude_php7.3.6.csv | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/test/runtimes/exclude_php7.3.6.csv b/test/runtimes/exclude_php7.3.6.csv
index d252383a00..815a137c59 100644
--- a/test/runtimes/exclude_php7.3.6.csv
+++ b/test/runtimes/exclude_php7.3.6.csv
@@ -13,13 +13,13 @@ ext/session/tests/session_set_save_handler_class_018.phpt,,
 ext/session/tests/session_set_save_handler_iface_003.phpt,,
 ext/session/tests/session_set_save_handler_sid_001.phpt,,
 ext/session/tests/session_set_save_handler_variation4.phpt,,
-ext/standard/tests/file/disk.phpt,,Test bug
-ext/standard/tests/file/disk_free_space_basic.phpt,,Test bug
-ext/standard/tests/file/disk_free_space_error.phpt,,Test bug
-ext/standard/tests/file/disk_free_space_variation.phpt,,Test bug
-ext/standard/tests/file/disk_total_space_basic.phpt,,Test bug
-ext/standard/tests/file/disk_total_space_error.phpt,,Test bug
-ext/standard/tests/file/disk_total_space_variation.phpt,,Test bug
+ext/standard/tests/file/disk.phpt,https://bugs.php.net/bug.php?id=80018,
+ext/standard/tests/file/disk_free_space_basic.phpt,https://bugs.php.net/bug.php?id=80018,
+ext/standard/tests/file/disk_free_space_error.phpt,https://bugs.php.net/bug.php?id=80018,
+ext/standard/tests/file/disk_free_space_variation.phpt,https://bugs.php.net/bug.php?id=80018,
+ext/standard/tests/file/disk_total_space_basic.phpt,https://bugs.php.net/bug.php?id=80018,
+ext/standard/tests/file/disk_total_space_error.phpt,https://bugs.php.net/bug.php?id=80018,
+ext/standard/tests/file/disk_total_space_variation.phpt,https://bugs.php.net/bug.php?id=80018,
 ext/standard/tests/file/fopen_variation19.phpt,b/162894964,
 ext/standard/tests/file/lstat_stat_variation14.phpt,,Flaky
 ext/standard/tests/file/php_fd_wrapper_01.phpt,,

From e91164893d6bbaf42639b7e4bb948e9165587130 Mon Sep 17 00:00:00 2001
From: Ayush Ranjan <ayushranjan@google.com>
Date: Tue, 25 Aug 2020 15:26:54 -0700
Subject: [PATCH 082/211] [go-marshal] Enable auto-marshalling for host tty.

PiperOrigin-RevId: 328415633
---
 pkg/abi/linux/tty.go          |  4 ++++
 pkg/sentry/fs/host/BUILD      |  1 +
 pkg/sentry/fs/host/tty.go     | 40 +++++++++++++----------------------
 pkg/sentry/fsimpl/host/BUILD  |  1 +
 pkg/sentry/fsimpl/host/tty.go | 40 +++++++++++++----------------------
 5 files changed, 36 insertions(+), 50 deletions(-)

diff --git a/pkg/abi/linux/tty.go b/pkg/abi/linux/tty.go
index 8ac02aee82..e640969a65 100644
--- a/pkg/abi/linux/tty.go
+++ b/pkg/abi/linux/tty.go
@@ -23,6 +23,8 @@ const (
 )
 
 // Winsize is struct winsize, defined in uapi/asm-generic/termios.h.
+//
+// +marshal
 type Winsize struct {
 	Row    uint16
 	Col    uint16
@@ -31,6 +33,8 @@ type Winsize struct {
 }
 
 // Termios is struct termios, defined in uapi/asm-generic/termbits.h.
+//
+// +marshal
 type Termios struct {
 	InputFlags        uint32
 	OutputFlags       uint32
diff --git a/pkg/sentry/fs/host/BUILD b/pkg/sentry/fs/host/BUILD
index d41d23a437..42a6c41c2b 100644
--- a/pkg/sentry/fs/host/BUILD
+++ b/pkg/sentry/fs/host/BUILD
@@ -55,6 +55,7 @@ go_library(
         "//pkg/unet",
         "//pkg/usermem",
         "//pkg/waiter",
+        "//tools/go_marshal/primitive",
     ],
 )
 
diff --git a/pkg/sentry/fs/host/tty.go b/pkg/sentry/fs/host/tty.go
index e29ae00f24..67a807f9d4 100644
--- a/pkg/sentry/fs/host/tty.go
+++ b/pkg/sentry/fs/host/tty.go
@@ -24,6 +24,7 @@ import (
 	"gvisor.dev/gvisor/pkg/sync"
 	"gvisor.dev/gvisor/pkg/syserror"
 	"gvisor.dev/gvisor/pkg/usermem"
+	"gvisor.dev/gvisor/tools/go_marshal/primitive"
 )
 
 // LINT.IfChange
@@ -123,6 +124,11 @@ func (t *TTYFileOperations) Release(ctx context.Context) {
 
 // Ioctl implements fs.FileOperations.Ioctl.
 func (t *TTYFileOperations) Ioctl(ctx context.Context, _ *fs.File, io usermem.IO, args arch.SyscallArguments) (uintptr, error) {
+	task := kernel.TaskFromContext(ctx)
+	if task == nil {
+		return 0, syserror.ENOTTY
+	}
+
 	// Ignore arg[0].  This is the real FD:
 	fd := t.fileOperations.iops.fileState.FD()
 	ioctl := args[1].Uint64()
@@ -132,9 +138,7 @@ func (t *TTYFileOperations) Ioctl(ctx context.Context, _ *fs.File, io usermem.IO
 		if err != nil {
 			return 0, err
 		}
-		_, err = usermem.CopyObjectOut(ctx, io, args[2].Pointer(), termios, usermem.IOOpts{
-			AddressSpaceActive: true,
-		})
+		_, err = termios.CopyOut(task, args[2].Pointer())
 		return 0, err
 
 	case linux.TCSETS, linux.TCSETSW, linux.TCSETSF:
@@ -146,9 +150,7 @@ func (t *TTYFileOperations) Ioctl(ctx context.Context, _ *fs.File, io usermem.IO
 		}
 
 		var termios linux.Termios
-		if _, err := usermem.CopyObjectIn(ctx, io, args[2].Pointer(), &termios, usermem.IOOpts{
-			AddressSpaceActive: true,
-		}); err != nil {
+		if _, err := termios.CopyIn(task, args[2].Pointer()); err != nil {
 			return 0, err
 		}
 		err := ioctlSetTermios(fd, ioctl, &termios)
@@ -173,10 +175,8 @@ func (t *TTYFileOperations) Ioctl(ctx context.Context, _ *fs.File, io usermem.IO
 
 		// Map the ProcessGroup into a ProcessGroupID in the task's PID
 		// namespace.
-		pgID := pidns.IDOfProcessGroup(t.fgProcessGroup)
-		_, err := usermem.CopyObjectOut(ctx, io, args[2].Pointer(), &pgID, usermem.IOOpts{
-			AddressSpaceActive: true,
-		})
+		pgID := primitive.Int32(pidns.IDOfProcessGroup(t.fgProcessGroup))
+		_, err := pgID.CopyOut(task, args[2].Pointer())
 		return 0, err
 
 	case linux.TIOCSPGRP:
@@ -184,11 +184,6 @@ func (t *TTYFileOperations) Ioctl(ctx context.Context, _ *fs.File, io usermem.IO
 		// Equivalent to tcsetpgrp(fd, *argp).
 		// Set the foreground process group ID of this terminal.
 
-		task := kernel.TaskFromContext(ctx)
-		if task == nil {
-			return 0, syserror.ENOTTY
-		}
-
 		t.mu.Lock()
 		defer t.mu.Unlock()
 
@@ -208,12 +203,11 @@ func (t *TTYFileOperations) Ioctl(ctx context.Context, _ *fs.File, io usermem.IO
 			return 0, syserror.ENOTTY
 		}
 
-		var pgID kernel.ProcessGroupID
-		if _, err := usermem.CopyObjectIn(ctx, io, args[2].Pointer(), &pgID, usermem.IOOpts{
-			AddressSpaceActive: true,
-		}); err != nil {
+		var pgIDP primitive.Int32
+		if _, err := pgIDP.CopyIn(task, args[2].Pointer()); err != nil {
 			return 0, err
 		}
+		pgID := kernel.ProcessGroupID(pgIDP)
 
 		// pgID must be non-negative.
 		if pgID < 0 {
@@ -242,9 +236,7 @@ func (t *TTYFileOperations) Ioctl(ctx context.Context, _ *fs.File, io usermem.IO
 		if err != nil {
 			return 0, err
 		}
-		_, err = usermem.CopyObjectOut(ctx, io, args[2].Pointer(), winsize, usermem.IOOpts{
-			AddressSpaceActive: true,
-		})
+		_, err = winsize.CopyOut(task, args[2].Pointer())
 		return 0, err
 
 	case linux.TIOCSWINSZ:
@@ -255,9 +247,7 @@ func (t *TTYFileOperations) Ioctl(ctx context.Context, _ *fs.File, io usermem.IO
 		// background ones) can set the winsize.
 
 		var winsize linux.Winsize
-		if _, err := usermem.CopyObjectIn(ctx, io, args[2].Pointer(), &winsize, usermem.IOOpts{
-			AddressSpaceActive: true,
-		}); err != nil {
+		if _, err := winsize.CopyIn(task, args[2].Pointer()); err != nil {
 			return 0, err
 		}
 		err := ioctlSetWinsize(fd, &winsize)
diff --git a/pkg/sentry/fsimpl/host/BUILD b/pkg/sentry/fsimpl/host/BUILD
index 090ae08042..be1c88c824 100644
--- a/pkg/sentry/fsimpl/host/BUILD
+++ b/pkg/sentry/fsimpl/host/BUILD
@@ -72,6 +72,7 @@ go_library(
         "//pkg/unet",
         "//pkg/usermem",
         "//pkg/waiter",
+        "//tools/go_marshal/primitive",
         "@org_golang_x_sys//unix:go_default_library",
     ],
 )
diff --git a/pkg/sentry/fsimpl/host/tty.go b/pkg/sentry/fsimpl/host/tty.go
index 27cbd30599..7a9be4b97a 100644
--- a/pkg/sentry/fsimpl/host/tty.go
+++ b/pkg/sentry/fsimpl/host/tty.go
@@ -25,6 +25,7 @@ import (
 	"gvisor.dev/gvisor/pkg/sync"
 	"gvisor.dev/gvisor/pkg/syserror"
 	"gvisor.dev/gvisor/pkg/usermem"
+	"gvisor.dev/gvisor/tools/go_marshal/primitive"
 )
 
 // TTYFileDescription implements vfs.FileDescriptionImpl for a host file
@@ -143,6 +144,11 @@ func (t *TTYFileDescription) Write(ctx context.Context, src usermem.IOSequence,
 
 // Ioctl implements vfs.FileDescriptionImpl.
 func (t *TTYFileDescription) Ioctl(ctx context.Context, io usermem.IO, args arch.SyscallArguments) (uintptr, error) {
+	task := kernel.TaskFromContext(ctx)
+	if task == nil {
+		return 0, syserror.ENOTTY
+	}
+
 	// Ignore arg[0]. This is the real FD:
 	fd := t.inode.hostFD
 	ioctl := args[1].Uint64()
@@ -152,9 +158,7 @@ func (t *TTYFileDescription) Ioctl(ctx context.Context, io usermem.IO, args arch
 		if err != nil {
 			return 0, err
 		}
-		_, err = usermem.CopyObjectOut(ctx, io, args[2].Pointer(), termios, usermem.IOOpts{
-			AddressSpaceActive: true,
-		})
+		_, err = termios.CopyOut(task, args[2].Pointer())
 		return 0, err
 
 	case linux.TCSETS, linux.TCSETSW, linux.TCSETSF:
@@ -166,9 +170,7 @@ func (t *TTYFileDescription) Ioctl(ctx context.Context, io usermem.IO, args arch
 		}
 
 		var termios linux.Termios
-		if _, err := usermem.CopyObjectIn(ctx, io, args[2].Pointer(), &termios, usermem.IOOpts{
-			AddressSpaceActive: true,
-		}); err != nil {
+		if _, err := termios.CopyIn(task, args[2].Pointer()); err != nil {
 			return 0, err
 		}
 		err := ioctlSetTermios(fd, ioctl, &termios)
@@ -192,10 +194,8 @@ func (t *TTYFileDescription) Ioctl(ctx context.Context, io usermem.IO, args arch
 		defer t.mu.Unlock()
 
 		// Map the ProcessGroup into a ProcessGroupID in the task's PID namespace.
-		pgID := pidns.IDOfProcessGroup(t.fgProcessGroup)
-		_, err := usermem.CopyObjectOut(ctx, io, args[2].Pointer(), &pgID, usermem.IOOpts{
-			AddressSpaceActive: true,
-		})
+		pgID := primitive.Int32(pidns.IDOfProcessGroup(t.fgProcessGroup))
+		_, err := pgID.CopyOut(task, args[2].Pointer())
 		return 0, err
 
 	case linux.TIOCSPGRP:
@@ -203,11 +203,6 @@ func (t *TTYFileDescription) Ioctl(ctx context.Context, io usermem.IO, args arch
 		// Equivalent to tcsetpgrp(fd, *argp).
 		// Set the foreground process group ID of this terminal.
 
-		task := kernel.TaskFromContext(ctx)
-		if task == nil {
-			return 0, syserror.ENOTTY
-		}
-
 		t.mu.Lock()
 		defer t.mu.Unlock()
 
@@ -226,12 +221,11 @@ func (t *TTYFileDescription) Ioctl(ctx context.Context, io usermem.IO, args arch
 			return 0, syserror.ENOTTY
 		}
 
-		var pgID kernel.ProcessGroupID
-		if _, err := usermem.CopyObjectIn(ctx, io, args[2].Pointer(), &pgID, usermem.IOOpts{
-			AddressSpaceActive: true,
-		}); err != nil {
+		var pgIDP primitive.Int32
+		if _, err := pgIDP.CopyIn(task, args[2].Pointer()); err != nil {
 			return 0, err
 		}
+		pgID := kernel.ProcessGroupID(pgIDP)
 
 		// pgID must be non-negative.
 		if pgID < 0 {
@@ -260,9 +254,7 @@ func (t *TTYFileDescription) Ioctl(ctx context.Context, io usermem.IO, args arch
 		if err != nil {
 			return 0, err
 		}
-		_, err = usermem.CopyObjectOut(ctx, io, args[2].Pointer(), winsize, usermem.IOOpts{
-			AddressSpaceActive: true,
-		})
+		_, err = winsize.CopyOut(task, args[2].Pointer())
 		return 0, err
 
 	case linux.TIOCSWINSZ:
@@ -273,9 +265,7 @@ func (t *TTYFileDescription) Ioctl(ctx context.Context, io usermem.IO, args arch
 		// set the winsize.
 
 		var winsize linux.Winsize
-		if _, err := usermem.CopyObjectIn(ctx, io, args[2].Pointer(), &winsize, usermem.IOOpts{
-			AddressSpaceActive: true,
-		}); err != nil {
+		if _, err := winsize.CopyIn(task, args[2].Pointer()); err != nil {
 			return 0, err
 		}
 		err := ioctlSetWinsize(fd, &winsize)

From c8125fe386f7b835e04a0ea40a2b501ef61598e5 Mon Sep 17 00:00:00 2001
From: Toshi Kikuchi <toshik@google.com>
Date: Tue, 25 Aug 2020 16:13:39 -0700
Subject: [PATCH 083/211] Only send an ICMP error message if UDP checksum is
 valid.

Test:
 - TestV4UnknownDestination
 - TestV6UnknownDestination
PiperOrigin-RevId: 328424137
---
 pkg/tcpip/transport/udp/endpoint.go |  37 ++++---
 pkg/tcpip/transport/udp/protocol.go |   7 +-
 pkg/tcpip/transport/udp/udp_test.go | 162 ++++++++++++++--------------
 3 files changed, 106 insertions(+), 100 deletions(-)

diff --git a/pkg/tcpip/transport/udp/endpoint.go b/pkg/tcpip/transport/udp/endpoint.go
index c33434b75c..0a9d3c6cf8 100644
--- a/pkg/tcpip/transport/udp/endpoint.go
+++ b/pkg/tcpip/transport/udp/endpoint.go
@@ -1366,6 +1366,22 @@ func (e *endpoint) Readiness(mask waiter.EventMask) waiter.EventMask {
 	return result
 }
 
+// verifyChecksum verifies the checksum unless RX checksum offload is enabled.
+// On IPv4, UDP checksum is optional, and a zero value means the transmitter
+// omitted the checksum generation (RFC768).
+// On IPv6, UDP checksum is not optional (RFC2460 Section 8.1).
+func verifyChecksum(r *stack.Route, hdr header.UDP, pkt *stack.PacketBuffer) bool {
+	if r.Capabilities()&stack.CapabilityRXChecksumOffload == 0 &&
+		(hdr.Checksum() != 0 || r.NetProto == header.IPv6ProtocolNumber) {
+		xsum := r.PseudoHeaderChecksum(ProtocolNumber, hdr.Length())
+		for _, v := range pkt.Data.Views() {
+			xsum = header.Checksum(v, xsum)
+		}
+		return hdr.CalculateChecksum(xsum) == 0xffff
+	}
+	return true
+}
+
 // HandlePacket is called by the stack when new packets arrive to this transport
 // endpoint.
 func (e *endpoint) HandlePacket(r *stack.Route, id stack.TransportEndpointID, pkt *stack.PacketBuffer) {
@@ -1387,22 +1403,11 @@ func (e *endpoint) HandlePacket(r *stack.Route, id stack.TransportEndpointID, pk
 		return
 	}
 
-	// Verify checksum unless RX checksum offload is enabled.
-	// On IPv4, UDP checksum is optional, and a zero value means
-	// the transmitter omitted the checksum generation (RFC768).
-	// On IPv6, UDP checksum is not optional (RFC2460 Section 8.1).
-	if r.Capabilities()&stack.CapabilityRXChecksumOffload == 0 &&
-		(hdr.Checksum() != 0 || r.NetProto == header.IPv6ProtocolNumber) {
-		xsum := r.PseudoHeaderChecksum(ProtocolNumber, hdr.Length())
-		for _, v := range pkt.Data.Views() {
-			xsum = header.Checksum(v, xsum)
-		}
-		if hdr.CalculateChecksum(xsum) != 0xffff {
-			// Checksum Error.
-			e.stack.Stats().UDP.ChecksumErrors.Increment()
-			e.stats.ReceiveErrors.ChecksumErrors.Increment()
-			return
-		}
+	if !verifyChecksum(r, hdr, pkt) {
+		// Checksum Error.
+		e.stack.Stats().UDP.ChecksumErrors.Increment()
+		e.stats.ReceiveErrors.ChecksumErrors.Increment()
+		return
 	}
 
 	e.stack.Stats().UDP.PacketsReceived.Increment()
diff --git a/pkg/tcpip/transport/udp/protocol.go b/pkg/tcpip/transport/udp/protocol.go
index 63d4bed7ca..f65751dd45 100644
--- a/pkg/tcpip/transport/udp/protocol.go
+++ b/pkg/tcpip/transport/udp/protocol.go
@@ -88,7 +88,12 @@ func (p *protocol) HandleUnknownDestinationPacket(r *stack.Route, id stack.Trans
 		r.Stack().Stats().UDP.MalformedPacketsReceived.Increment()
 		return true
 	}
-	// TODO(b/129426613): only send an ICMP message if UDP checksum is valid.
+
+	if !verifyChecksum(r, hdr, pkt) {
+		// Checksum Error.
+		r.Stack().Stats().UDP.ChecksumErrors.Increment()
+		return true
+	}
 
 	// Only send ICMP error if the address is not a multicast/broadcast
 	// v4/v6 address or the source is not the unspecified address.
diff --git a/pkg/tcpip/transport/udp/udp_test.go b/pkg/tcpip/transport/udp/udp_test.go
index 0a558df6d2..bd1c8ac318 100644
--- a/pkg/tcpip/transport/udp/udp_test.go
+++ b/pkg/tcpip/transport/udp/udp_test.go
@@ -403,18 +403,35 @@ func (c *testContext) getPacketAndVerify(flow testFlow, checkers ...checker.Netw
 }
 
 // injectPacket creates a packet of the given flow and with the given payload,
-// and injects it into the link endpoint.
-func (c *testContext) injectPacket(flow testFlow, payload []byte) {
+// and injects it into the link endpoint. If badChecksum is true, the packet has
+// a bad checksum in the UDP header.
+func (c *testContext) injectPacket(flow testFlow, payload []byte, badChecksum bool) {
 	c.t.Helper()
 
 	h := flow.header4Tuple(incoming)
 	if flow.isV4() {
 		buf := c.buildV4Packet(payload, &h)
+		if badChecksum {
+			// Invalidate the UDP header checksum field, taking care to avoid
+			// overflow to zero, which would disable checksum validation.
+			for u := header.UDP(buf[header.IPv4MinimumSize:]); ; {
+				u.SetChecksum(u.Checksum() + 1)
+				if u.Checksum() != 0 {
+					break
+				}
+			}
+		}
 		c.linkEP.InjectInbound(ipv4.ProtocolNumber, stack.NewPacketBuffer(stack.PacketBufferOptions{
 			Data: buf.ToVectorisedView(),
 		}))
 	} else {
 		buf := c.buildV6Packet(payload, &h)
+		if badChecksum {
+			// Invalidate the UDP header checksum field (Unlike IPv4, zero is
+			// a valid checksum value for IPv6 so no need to avoid it).
+			u := header.UDP(buf[header.IPv6MinimumSize:])
+			u.SetChecksum(u.Checksum() + 1)
+		}
 		c.linkEP.InjectInbound(ipv6.ProtocolNumber, stack.NewPacketBuffer(stack.PacketBufferOptions{
 			Data: buf.ToVectorisedView(),
 		}))
@@ -569,7 +586,7 @@ func testReadInternal(c *testContext, flow testFlow, packetShouldBeDropped, expe
 	c.t.Helper()
 
 	payload := newPayload()
-	c.injectPacket(flow, payload)
+	c.injectPacket(flow, payload, false)
 
 	// Try to receive the data.
 	we, ch := waiter.NewChannelEntry(nil)
@@ -925,7 +942,7 @@ func TestReadFromMulticastStats(t *testing.T) {
 			}
 
 			payload := newPayload()
-			c.injectPacket(flow, payload)
+			c.injectPacket(flow, payload, false)
 
 			var want uint64 = 0
 			if flow.isReverseMulticast() {
@@ -1727,21 +1744,33 @@ func TestV4UnknownDestination(t *testing.T) {
 		// so that the final generated IPv4 packet is larger than
 		// header.IPv4MinimumProcessableDatagramSize.
 		largePayload bool
+		// badChecksum if true, will set an invalid checksum in the
+		// header.
+		badChecksum bool
 	}{
-		{unicastV4, true, false},
-		{unicastV4, true, true},
-		{multicastV4, false, false},
-		{multicastV4, false, true},
-		{broadcast, false, false},
-		{broadcast, false, true},
-	}
+		{unicastV4, true, false, false},
+		{unicastV4, true, true, false},
+		{unicastV4, false, false, true},
+		{unicastV4, false, true, true},
+		{multicastV4, false, false, false},
+		{multicastV4, false, true, false},
+		{broadcast, false, false, false},
+		{broadcast, false, true, false},
+	}
+	checksumErrors := uint64(0)
 	for _, tc := range testCases {
-		t.Run(fmt.Sprintf("flow:%s icmpRequired:%t largePayload:%t", tc.flow, tc.icmpRequired, tc.largePayload), func(t *testing.T) {
+		t.Run(fmt.Sprintf("flow:%s icmpRequired:%t largePayload:%t badChecksum:%t", tc.flow, tc.icmpRequired, tc.largePayload, tc.badChecksum), func(t *testing.T) {
 			payload := newPayload()
 			if tc.largePayload {
 				payload = newMinPayload(576)
 			}
-			c.injectPacket(tc.flow, payload)
+			c.injectPacket(tc.flow, payload, tc.badChecksum)
+			if tc.badChecksum {
+				checksumErrors++
+				if got, want := c.s.Stats().UDP.ChecksumErrors.Value(), checksumErrors; got != want {
+					t.Fatalf("got stats.UDP.ChecksumErrors.Value() = %d, want = %d", got, want)
+				}
+			}
 			if !tc.icmpRequired {
 				ctx, cancel := context.WithTimeout(context.Background(), time.Second)
 				defer cancel()
@@ -1806,19 +1835,31 @@ func TestV6UnknownDestination(t *testing.T) {
 		// largePayload if true will result in a payload large enough to
 		// create an IPv6 packet > header.IPv6MinimumMTU bytes.
 		largePayload bool
+		// badChecksum if true, will set an invalid checksum in the
+		// header.
+		badChecksum bool
 	}{
-		{unicastV6, true, false},
-		{unicastV6, true, true},
-		{multicastV6, false, false},
-		{multicastV6, false, true},
-	}
+		{unicastV6, true, false, false},
+		{unicastV6, true, true, false},
+		{unicastV6, false, false, true},
+		{unicastV6, false, true, true},
+		{multicastV6, false, false, false},
+		{multicastV6, false, true, false},
+	}
+	checksumErrors := uint64(0)
 	for _, tc := range testCases {
-		t.Run(fmt.Sprintf("flow:%s icmpRequired:%t largePayload:%t", tc.flow, tc.icmpRequired, tc.largePayload), func(t *testing.T) {
+		t.Run(fmt.Sprintf("flow:%s icmpRequired:%t largePayload:%t badChecksum:%t", tc.flow, tc.icmpRequired, tc.largePayload, tc.badChecksum), func(t *testing.T) {
 			payload := newPayload()
 			if tc.largePayload {
 				payload = newMinPayload(1280)
 			}
-			c.injectPacket(tc.flow, payload)
+			c.injectPacket(tc.flow, payload, tc.badChecksum)
+			if tc.badChecksum {
+				checksumErrors++
+				if got, want := c.s.Stats().UDP.ChecksumErrors.Value(), checksumErrors; got != want {
+					t.Fatalf("got stats.UDP.ChecksumErrors.Value() = %d, want = %d", got, want)
+				}
+			}
 			if !tc.icmpRequired {
 				ctx, cancel := context.WithTimeout(context.Background(), time.Second)
 				defer cancel()
@@ -1953,74 +1994,29 @@ func TestShortHeader(t *testing.T) {
 	}
 }
 
-// TestIncrementChecksumErrorsV4 verifies if a checksum error is detected,
+// TestBadChecksumErrors verifies if a checksum error is detected,
 // global and endpoint stats are incremented.
-func TestIncrementChecksumErrorsV4(t *testing.T) {
-	c := newDualTestContext(t, defaultMTU)
-	defer c.cleanup()
-
-	c.createEndpoint(ipv4.ProtocolNumber)
-	// Bind to wildcard.
-	if err := c.ep.Bind(tcpip.FullAddress{Port: stackPort}); err != nil {
-		c.t.Fatalf("Bind failed: %s", err)
-	}
-
-	payload := newPayload()
-	h := unicastV4.header4Tuple(incoming)
-	buf := c.buildV4Packet(payload, &h)
+func TestBadChecksumErrors(t *testing.T) {
+	for _, flow := range []testFlow{unicastV4, unicastV6} {
+		c := newDualTestContext(t, defaultMTU)
+		defer c.cleanup()
 
-	// Invalidate the UDP header checksum field, taking care to avoid
-	// overflow to zero, which would disable checksum validation.
-	for u := header.UDP(buf[header.IPv4MinimumSize:]); ; {
-		u.SetChecksum(u.Checksum() + 1)
-		if u.Checksum() != 0 {
-			break
+		c.createEndpoint(flow.sockProto())
+		// Bind to wildcard.
+		if err := c.ep.Bind(tcpip.FullAddress{Port: stackPort}); err != nil {
+			c.t.Fatalf("Bind failed: %s", err)
 		}
-	}
-
-	c.linkEP.InjectInbound(ipv4.ProtocolNumber, stack.NewPacketBuffer(stack.PacketBufferOptions{
-		Data: buf.ToVectorisedView(),
-	}))
-
-	const want = 1
-	if got := c.s.Stats().UDP.ChecksumErrors.Value(); got != want {
-		t.Errorf("got stats.UDP.ChecksumErrors.Value() = %d, want = %d", got, want)
-	}
-	if got := c.ep.Stats().(*tcpip.TransportEndpointStats).ReceiveErrors.ChecksumErrors.Value(); got != want {
-		t.Errorf("got EP Stats.ReceiveErrors.ChecksumErrors stats = %d, want = %d", got, want)
-	}
-}
-
-// TestIncrementChecksumErrorsV6 verifies if a checksum error is detected,
-// global and endpoint stats are incremented.
-func TestIncrementChecksumErrorsV6(t *testing.T) {
-	c := newDualTestContext(t, defaultMTU)
-	defer c.cleanup()
-
-	c.createEndpoint(ipv6.ProtocolNumber)
-	// Bind to wildcard.
-	if err := c.ep.Bind(tcpip.FullAddress{Port: stackPort}); err != nil {
-		c.t.Fatalf("Bind failed: %s", err)
-	}
-
-	payload := newPayload()
-	h := unicastV6.header4Tuple(incoming)
-	buf := c.buildV6Packet(payload, &h)
-
-	// Invalidate the UDP header checksum field.
-	u := header.UDP(buf[header.IPv6MinimumSize:])
-	u.SetChecksum(u.Checksum() + 1)
 
-	c.linkEP.InjectInbound(ipv6.ProtocolNumber, stack.NewPacketBuffer(stack.PacketBufferOptions{
-		Data: buf.ToVectorisedView(),
-	}))
+		payload := newPayload()
+		c.injectPacket(flow, payload, true /* badChecksum */)
 
-	const want = 1
-	if got := c.s.Stats().UDP.ChecksumErrors.Value(); got != want {
-		t.Errorf("got stats.UDP.ChecksumErrors.Value() = %d, want = %d", got, want)
-	}
-	if got := c.ep.Stats().(*tcpip.TransportEndpointStats).ReceiveErrors.ChecksumErrors.Value(); got != want {
-		t.Errorf("got EP Stats.ReceiveErrors.ChecksumErrors stats = %d, want = %d", got, want)
+		const want = 1
+		if got := c.s.Stats().UDP.ChecksumErrors.Value(); got != want {
+			t.Errorf("got stats.UDP.ChecksumErrors.Value() = %d, want = %d", got, want)
+		}
+		if got := c.ep.Stats().(*tcpip.TransportEndpointStats).ReceiveErrors.ChecksumErrors.Value(); got != want {
+			t.Errorf("got EP Stats.ReceiveErrors.ChecksumErrors stats = %d, want = %d", got, want)
+		}
 	}
 }
 

From 5683a8568adc9c13c1cf9d360dae105dc60b145d Mon Sep 17 00:00:00 2001
From: Dean Deng <deandeng@google.com>
Date: Tue, 25 Aug 2020 16:26:53 -0700
Subject: [PATCH 084/211] Expose basic coverage information to userspace
 through kcov interface.

In Linux, a kernel configuration is set that compiles the kernel with a
custom function that is called at the beginning of every basic block, which
updates the memory-mapped coverage information. The Go coverage tool does not
allow us to inject arbitrary instructions into basic blocks, but it does
provide data that we can convert to a kcov-like format and transfer them to
userspace through a memory mapping.

Note that this is not a strict implementation of kcov, which is especially
tricky to do because we do not have the same coverage tools available in Go
that that are available for the actual Linux kernel. In Linux, a kernel
configuration is set that compiles the kernel with a custom function that is
called at the beginning of every basic block to write program counters to the
kcov memory mapping. In Go, however, coverage tools only give us a count of
basic blocks as they are executed. Every time we return to userspace, we
collect the coverage information and write out PCs for each block that was
executed, providing userspace with the illusion that the kcov data is always
up to date. For convenience, we also generate a unique synthetic PC for each
block instead of using actual PCs. Finally, we do not provide thread-specific
coverage data (each kcov instance only contains PCs executed by the thread
owning it); instead, we will supply data for any file specified by --
instrumentation_filter.

Also, fix issue in nogo that was causing pkg/coverage:coverage_nogo
compilation to fail.

PiperOrigin-RevId: 328426526
---
 pkg/abi/linux/ioctl.go           |  21 ++
 pkg/coverage/BUILD               |  14 ++
 pkg/coverage/coverage.go         | 175 +++++++++++++++++
 pkg/sentry/fsimpl/sys/BUILD      |   5 +
 pkg/sentry/fsimpl/sys/kcov.go    | 116 +++++++++++
 pkg/sentry/fsimpl/sys/sys.go     |  18 +-
 pkg/sentry/kernel/BUILD          |   3 +
 pkg/sentry/kernel/kcov.go        | 321 +++++++++++++++++++++++++++++++
 pkg/sentry/kernel/kcov_unsafe.go |  28 +++
 pkg/sentry/kernel/kernel.go      |   2 +-
 pkg/sentry/kernel/task.go        |  18 ++
 pkg/sentry/kernel/task_exit.go   |   2 +
 runsc/config/config.go           |   6 +-
 test/syscalls/BUILD              |   4 +
 test/syscalls/linux/BUILD        |  14 ++
 test/syscalls/linux/kcov.cc      |  70 +++++++
 16 files changed, 813 insertions(+), 4 deletions(-)
 create mode 100644 pkg/coverage/BUILD
 create mode 100644 pkg/coverage/coverage.go
 create mode 100644 pkg/sentry/fsimpl/sys/kcov.go
 create mode 100644 pkg/sentry/kernel/kcov.go
 create mode 100644 pkg/sentry/kernel/kcov_unsafe.go
 create mode 100644 test/syscalls/linux/kcov.cc

diff --git a/pkg/abi/linux/ioctl.go b/pkg/abi/linux/ioctl.go
index 2c5e56ae56..d6dbedc3e5 100644
--- a/pkg/abi/linux/ioctl.go
+++ b/pkg/abi/linux/ioctl.go
@@ -117,3 +117,24 @@ const (
 func IOC(dir, typ, nr, size uint32) uint32 {
 	return uint32(dir)<<_IOC_DIRSHIFT | typ<<_IOC_TYPESHIFT | nr<<_IOC_NRSHIFT | size<<_IOC_SIZESHIFT
 }
+
+// Kcov ioctls from kernel/kcov.h.
+var (
+	KCOV_INIT_TRACE = IOC(_IOC_READ, 'c', 1, 8)
+	KCOV_ENABLE     = IOC(_IOC_NONE, 'c', 100, 0)
+	KCOV_DISABLE    = IOC(_IOC_NONE, 'c', 101, 0)
+)
+
+// Kcov trace types from kernel/kcov.h.
+const (
+	KCOV_TRACE_PC  = 0
+	KCOV_TRACE_CMP = 1
+)
+
+// Kcov state constants from kernel/kcov.h.
+const (
+	KCOV_MODE_DISABLED  = 0
+	KCOV_MODE_INIT      = 1
+	KCOV_MODE_TRACE_PC  = 2
+	KCOV_MODE_TRACE_CMP = 3
+)
diff --git a/pkg/coverage/BUILD b/pkg/coverage/BUILD
new file mode 100644
index 0000000000..a198e80289
--- /dev/null
+++ b/pkg/coverage/BUILD
@@ -0,0 +1,14 @@
+load("//tools:defs.bzl", "go_library")
+
+package(licenses = ["notice"])
+
+go_library(
+    name = "coverage",
+    srcs = ["coverage.go"],
+    visibility = ["//:sandbox"],
+    deps = [
+        "//pkg/sync",
+        "//pkg/usermem",
+        "@io_bazel_rules_go//go/tools/coverdata",
+    ],
+)
diff --git a/pkg/coverage/coverage.go b/pkg/coverage/coverage.go
new file mode 100644
index 0000000000..6831adcce5
--- /dev/null
+++ b/pkg/coverage/coverage.go
@@ -0,0 +1,175 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package coverage provides an interface through which Go coverage data can
+// be collected, converted to kcov format, and exposed to userspace.
+//
+// Coverage can be enabled by calling bazel {build,test} with
+// --collect_coverage_data and --instrumentation_filter with the desired
+// coverage surface. This causes bazel to use the Go cover tool manually to
+// generate instrumented files. It injects a hook that registers all coverage
+// data with the coverdata package.
+package coverage
+
+import (
+	"fmt"
+	"io"
+	"sort"
+	"sync/atomic"
+
+	"gvisor.dev/gvisor/pkg/sync"
+	"gvisor.dev/gvisor/pkg/usermem"
+
+	"github.com/bazelbuild/rules_go/go/tools/coverdata"
+)
+
+// KcovAvailable returns whether the kcov coverage interface is available. It is
+// available as long as coverage is enabled for some files.
+func KcovAvailable() bool {
+	return len(coverdata.Cover.Blocks) > 0
+}
+
+// coverageMu must be held while accessing coverdata.Cover. This prevents
+// concurrent reads/writes from multiple threads collecting coverage data.
+var coverageMu sync.RWMutex
+
+// once ensures that globalData is only initialized once.
+var once sync.Once
+
+var globalData struct {
+	// files is the set of covered files sorted by filename. It is calculated at
+	// startup.
+	files []string
+
+	// syntheticPCs are a set of PCs calculated at startup, where the PC
+	// at syntheticPCs[i][j] corresponds to file i, block j.
+	syntheticPCs [][]uint64
+}
+
+// ClearCoverageData clears existing coverage data.
+func ClearCoverageData() {
+	coverageMu.Lock()
+	defer coverageMu.Unlock()
+	for _, counters := range coverdata.Cover.Counters {
+		for index := 0; index < len(counters); index++ {
+			atomic.StoreUint32(&counters[index], 0)
+		}
+	}
+}
+
+var coveragePool = sync.Pool{
+	New: func() interface{} {
+		return make([]byte, 0)
+	},
+}
+
+// ConsumeCoverageData builds and writes the collection of covered PCs. It
+// returns the number of bytes written.
+//
+// In Linux, a kernel configuration is set that compiles the kernel with a
+// custom function that is called at the beginning of every basic block, which
+// updates the memory-mapped coverage information. The Go coverage tool does not
+// allow us to inject arbitrary instructions into basic blocks, but it does
+// provide data that we can convert to a kcov-like format and transfer them to
+// userspace through a memory mapping.
+//
+// Note that this is not a strict implementation of kcov, which is especially
+// tricky to do because we do not have the same coverage tools available in Go
+// that that are available for the actual Linux kernel. In Linux, a kernel
+// configuration is set that compiles the kernel with a custom function that is
+// called at the beginning of every basic block to write program counters to the
+// kcov memory mapping. In Go, however, coverage tools only give us a count of
+// basic blocks as they are executed. Every time we return to userspace, we
+// collect the coverage information and write out PCs for each block that was
+// executed, providing userspace with the illusion that the kcov data is always
+// up to date. For convenience, we also generate a unique synthetic PC for each
+// block instead of using actual PCs. Finally, we do not provide thread-specific
+// coverage data (each kcov instance only contains PCs executed by the thread
+// owning it); instead, we will supply data for any file specified by --
+// instrumentation_filter.
+//
+// Note that we "consume", i.e. clear, coverdata when this function is run, to
+// ensure that each event is only reported once.
+//
+// TODO(b/160639712): evaluate whether it is ok to reset the global coverage
+// data every time this function is run. We could technically have each thread
+// store a local snapshot against which we compare the most recent coverdata so
+// that separate threads do not affect each other's view of the data.
+func ConsumeCoverageData(w io.Writer) int {
+	once.Do(initCoverageData)
+
+	coverageMu.Lock()
+	defer coverageMu.Unlock()
+
+	total := 0
+	var pcBuffer [8]byte
+	for fileIndex, file := range globalData.files {
+		counters := coverdata.Cover.Counters[file]
+		for index := 0; index < len(counters); index++ {
+			val := atomic.SwapUint32(&counters[index], 0)
+			if val != 0 {
+				// Calculate the synthetic PC.
+				pc := globalData.syntheticPCs[fileIndex][index]
+
+				usermem.ByteOrder.PutUint64(pcBuffer[:], pc)
+				n, err := w.Write(pcBuffer[:])
+				if err != nil {
+					if err == io.EOF {
+						// Simply stop writing if we encounter EOF; it's ok if we attempted to
+						// write more than we can hold.
+						return total + n
+					}
+					panic(fmt.Sprintf("Internal error writing PCs to kcov area: %v", err))
+				}
+				total += n
+			}
+		}
+	}
+
+	if total == 0 {
+		// An empty profile indicates that coverage is not enabled, in which case
+		// there shouldn't be any task work registered.
+		panic("kcov task work is registered, but no coverage data was found")
+	}
+	return total
+}
+
+// initCoverageData initializes globalData. It should only be called once,
+// before any kcov data is written.
+func initCoverageData() {
+	// First, order all files. Then calculate synthetic PCs for every block
+	// (using the well-defined ordering for files as well).
+	for file := range coverdata.Cover.Blocks {
+		globalData.files = append(globalData.files, file)
+	}
+	sort.Strings(globalData.files)
+
+	// nextSyntheticPC is the first PC that we generate for a block.
+	//
+	// This uses a standard-looking kernel range for simplicity.
+	//
+	// FIXME(b/160639712): This is only necessary because syzkaller requires
+	// addresses in the kernel range. If we can remove this constraint, then we
+	// should be able to use the actual addresses.
+	var nextSyntheticPC uint64 = 0xffffffff80000000
+	for _, file := range globalData.files {
+		blocks := coverdata.Cover.Blocks[file]
+		thisFile := make([]uint64, 0, len(blocks))
+		for range blocks {
+			thisFile = append(thisFile, nextSyntheticPC)
+			nextSyntheticPC++ // Advance.
+		}
+		globalData.syntheticPCs = append(globalData.syntheticPCs, thisFile)
+	}
+}
diff --git a/pkg/sentry/fsimpl/sys/BUILD b/pkg/sentry/fsimpl/sys/BUILD
index 1b548ccd4f..f9b232da6e 100644
--- a/pkg/sentry/fsimpl/sys/BUILD
+++ b/pkg/sentry/fsimpl/sys/BUILD
@@ -5,17 +5,22 @@ licenses(["notice"])
 go_library(
     name = "sys",
     srcs = [
+        "kcov.go",
         "sys.go",
     ],
     visibility = ["//pkg/sentry:internal"],
     deps = [
         "//pkg/abi/linux",
         "//pkg/context",
+        "//pkg/coverage",
+        "//pkg/sentry/arch",
         "//pkg/sentry/fsimpl/kernfs",
         "//pkg/sentry/kernel",
         "//pkg/sentry/kernel/auth",
+        "//pkg/sentry/memmap",
         "//pkg/sentry/vfs",
         "//pkg/syserror",
+        "//pkg/usermem",
     ],
 )
 
diff --git a/pkg/sentry/fsimpl/sys/kcov.go b/pkg/sentry/fsimpl/sys/kcov.go
new file mode 100644
index 0000000000..92710d8775
--- /dev/null
+++ b/pkg/sentry/fsimpl/sys/kcov.go
@@ -0,0 +1,116 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package sys
+
+import (
+	"gvisor.dev/gvisor/pkg/abi/linux"
+	"gvisor.dev/gvisor/pkg/context"
+	"gvisor.dev/gvisor/pkg/sentry/arch"
+	"gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
+	"gvisor.dev/gvisor/pkg/sentry/kernel"
+	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+	"gvisor.dev/gvisor/pkg/sentry/memmap"
+	"gvisor.dev/gvisor/pkg/sentry/vfs"
+	"gvisor.dev/gvisor/pkg/syserror"
+	"gvisor.dev/gvisor/pkg/usermem"
+)
+
+func (fs *filesystem) newKcovFile(ctx context.Context, creds *auth.Credentials) *kernfs.Dentry {
+	k := &kcovInode{}
+	k.InodeAttrs.Init(creds, 0, 0, fs.NextIno(), linux.S_IFREG|0600)
+	d := &kernfs.Dentry{}
+	d.Init(k)
+	return d
+}
+
+// kcovInode implements kernfs.Inode.
+type kcovInode struct {
+	kernfs.InodeAttrs
+	kernfs.InodeNoopRefCount
+	kernfs.InodeNotSymlink
+	kernfs.InodeNotDirectory
+}
+
+func (i *kcovInode) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
+	k := kernel.KernelFromContext(ctx)
+	if k == nil {
+		panic("KernelFromContext returned nil")
+	}
+	fd := &kcovFD{
+		inode: i,
+		kcov:  k.NewKcov(),
+	}
+
+	if err := fd.vfsfd.Init(fd, opts.Flags, rp.Mount(), vfsd, &vfs.FileDescriptionOptions{
+		DenyPRead:  true,
+		DenyPWrite: true,
+	}); err != nil {
+		return nil, err
+	}
+	return &fd.vfsfd, nil
+}
+
+type kcovFD struct {
+	vfs.FileDescriptionDefaultImpl
+	vfs.NoLockFD
+
+	vfsfd vfs.FileDescription
+	inode *kcovInode
+	kcov  *kernel.Kcov
+}
+
+// Ioctl implements vfs.FileDescriptionImpl.Ioctl.
+func (fd *kcovFD) Ioctl(ctx context.Context, uio usermem.IO, args arch.SyscallArguments) (uintptr, error) {
+	cmd := uint32(args[1].Int())
+	arg := args[2].Uint64()
+	switch uint32(cmd) {
+	case linux.KCOV_INIT_TRACE:
+		return 0, fd.kcov.InitTrace(arg)
+	case linux.KCOV_ENABLE:
+		return 0, fd.kcov.EnableTrace(ctx, uint8(arg))
+	case linux.KCOV_DISABLE:
+		if arg != 0 {
+			// This arg is unused; it should be 0.
+			return 0, syserror.EINVAL
+		}
+		return 0, fd.kcov.DisableTrace(ctx)
+	default:
+		return 0, syserror.ENOTTY
+	}
+}
+
+// ConfigureMmap implements vfs.FileDescriptionImpl.ConfigureMmap.
+func (fd *kcovFD) ConfigureMMap(ctx context.Context, opts *memmap.MMapOpts) error {
+	return fd.kcov.ConfigureMMap(ctx, opts)
+}
+
+// Release implements vfs.FileDescriptionImpl.Release.
+func (fd *kcovFD) Release(ctx context.Context) {
+	// kcov instances have reference counts in Linux, but this seems sufficient
+	// for our purposes.
+	fd.kcov.Reset()
+}
+
+// SetStat implements vfs.FileDescriptionImpl.SetStat.
+func (fd *kcovFD) SetStat(ctx context.Context, opts vfs.SetStatOptions) error {
+	creds := auth.CredentialsFromContext(ctx)
+	fs := fd.vfsfd.VirtualDentry().Mount().Filesystem()
+	return fd.inode.SetStat(ctx, fs, creds, opts)
+}
+
+// Stat implements vfs.FileDescriptionImpl.Stat.
+func (fd *kcovFD) Stat(ctx context.Context, opts vfs.StatOptions) (linux.Statx, error) {
+	return fd.inode.Stat(ctx, fd.vfsfd.Mount().Filesystem(), opts)
+}
diff --git a/pkg/sentry/fsimpl/sys/sys.go b/pkg/sentry/fsimpl/sys/sys.go
index 393feb8022..1f042d9f78 100644
--- a/pkg/sentry/fsimpl/sys/sys.go
+++ b/pkg/sentry/fsimpl/sys/sys.go
@@ -21,6 +21,7 @@ import (
 
 	"gvisor.dev/gvisor/pkg/abi/linux"
 	"gvisor.dev/gvisor/pkg/context"
+	"gvisor.dev/gvisor/pkg/coverage"
 	"gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
 	"gvisor.dev/gvisor/pkg/sentry/kernel"
 	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
@@ -73,7 +74,7 @@ func (fsType FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt
 		}),
 		"firmware": fs.newDir(creds, defaultSysDirMode, nil),
 		"fs":       fs.newDir(creds, defaultSysDirMode, nil),
-		"kernel":   fs.newDir(creds, defaultSysDirMode, nil),
+		"kernel":   kernelDir(ctx, fs, creds),
 		"module":   fs.newDir(creds, defaultSysDirMode, nil),
 		"power":    fs.newDir(creds, defaultSysDirMode, nil),
 	})
@@ -94,6 +95,21 @@ func cpuDir(ctx context.Context, fs *filesystem, creds *auth.Credentials) *kernf
 	return fs.newDir(creds, defaultSysDirMode, children)
 }
 
+func kernelDir(ctx context.Context, fs *filesystem, creds *auth.Credentials) *kernfs.Dentry {
+	// If kcov is available, set up /sys/kernel/debug/kcov. Technically, debugfs
+	// should be mounted at debug/, but for our purposes, it is sufficient to
+	// keep it in sys.
+	var children map[string]*kernfs.Dentry
+	if coverage.KcovAvailable() {
+		children = map[string]*kernfs.Dentry{
+			"debug": fs.newDir(creds, linux.FileMode(0700), map[string]*kernfs.Dentry{
+				"kcov": fs.newKcovFile(ctx, creds),
+			}),
+		}
+	}
+	return fs.newDir(creds, defaultSysDirMode, children)
+}
+
 // Release implements vfs.FilesystemImpl.Release.
 func (fs *filesystem) Release(ctx context.Context) {
 	fs.Filesystem.VFSFilesystem().VirtualFilesystem().PutAnonBlockDevMinor(fs.devMinor)
diff --git a/pkg/sentry/kernel/BUILD b/pkg/sentry/kernel/BUILD
index 5416a310d3..d1ecceba3c 100644
--- a/pkg/sentry/kernel/BUILD
+++ b/pkg/sentry/kernel/BUILD
@@ -91,6 +91,8 @@ go_library(
         "fd_table_unsafe.go",
         "fs_context.go",
         "ipc_namespace.go",
+        "kcov.go",
+        "kcov_unsafe.go",
         "kernel.go",
         "kernel_opts.go",
         "kernel_state.go",
@@ -157,6 +159,7 @@ go_library(
         "//pkg/bits",
         "//pkg/bpf",
         "//pkg/context",
+        "//pkg/coverage",
         "//pkg/cpuid",
         "//pkg/eventchannel",
         "//pkg/fspath",
diff --git a/pkg/sentry/kernel/kcov.go b/pkg/sentry/kernel/kcov.go
new file mode 100644
index 0000000000..aad63aa998
--- /dev/null
+++ b/pkg/sentry/kernel/kcov.go
@@ -0,0 +1,321 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package kernel
+
+import (
+	"fmt"
+	"io"
+	"sync"
+
+	"gvisor.dev/gvisor/pkg/abi/linux"
+	"gvisor.dev/gvisor/pkg/context"
+	"gvisor.dev/gvisor/pkg/coverage"
+	"gvisor.dev/gvisor/pkg/safemem"
+	"gvisor.dev/gvisor/pkg/sentry/memmap"
+	"gvisor.dev/gvisor/pkg/sentry/mm"
+	"gvisor.dev/gvisor/pkg/sentry/pgalloc"
+	"gvisor.dev/gvisor/pkg/sentry/usage"
+	"gvisor.dev/gvisor/pkg/syserror"
+	"gvisor.dev/gvisor/pkg/usermem"
+)
+
+// kcovAreaSizeMax is the maximum number of uint64 entries allowed in the kcov
+// area. On Linux, the maximum is INT_MAX / 8.
+const kcovAreaSizeMax = 10 * 1024 * 1024
+
+// Kcov provides kernel coverage data to userspace through a memory-mapped
+// region, as kcov does in Linux.
+//
+// To give the illusion that the data is always up to date, we update the shared
+// memory every time before we return to userspace.
+type Kcov struct {
+	// mfp provides application memory. It is immutable after creation.
+	mfp pgalloc.MemoryFileProvider
+
+	// mu protects all of the fields below.
+	mu sync.RWMutex
+
+	// mode is the current kcov mode.
+	mode uint8
+
+	// size is the size of the mapping through which the kernel conveys coverage
+	// information to userspace.
+	size uint64
+
+	// owningTask is the task that currently owns coverage data on the system. The
+	// interface for kcov essentially requires that coverage is only going to a
+	// single task. Note that kcov should only generate coverage data for the
+	// owning task, but we currently generate global coverage.
+	owningTask *Task
+
+	// count is a locally cached version of the first uint64 in the kcov data,
+	// which is the number of subsequent entries representing PCs.
+	//
+	// It is used with kcovInode.countBlock(), to copy in/out the first element of
+	// the actual data in an efficient manner, avoid boilerplate, and prevent
+	// accidental garbage escapes by the temporary counts.
+	count uint64
+
+	mappable *mm.SpecialMappable
+}
+
+// NewKcov creates and returns a Kcov instance.
+func (k *Kernel) NewKcov() *Kcov {
+	return &Kcov{
+		mfp: k,
+	}
+}
+
+var coveragePool = sync.Pool{
+	New: func() interface{} {
+		return make([]byte, 0)
+	},
+}
+
+// TaskWork implements TaskWorker.TaskWork.
+func (kcov *Kcov) TaskWork(t *Task) {
+	kcov.mu.Lock()
+	defer kcov.mu.Unlock()
+
+	rw := &kcovReadWriter{
+		mf: kcov.mfp.MemoryFile(),
+		fr: kcov.mappable.FileRange(),
+	}
+
+	// Read in the PC count.
+	if _, err := safemem.ReadFullToBlocks(rw, kcov.countBlock()); err != nil {
+		panic(fmt.Sprintf("Internal error reading count from kcov area: %v", err))
+	}
+
+	rw.off = 8 * (1 + kcov.count)
+	n := coverage.ConsumeCoverageData(&kcovIOWriter{rw})
+
+	// Update the pc count, based on the number of entries written. Note that if
+	// we reached the end of the kcov area, we may not have written everything in
+	// output.
+	kcov.count += uint64(n / 8)
+	rw.off = 0
+	if _, err := safemem.WriteFullFromBlocks(rw, kcov.countBlock()); err != nil {
+		panic(fmt.Sprintf("Internal error writing count to kcov area: %v", err))
+	}
+
+	// Re-register for future work.
+	t.RegisterWork(kcov)
+}
+
+// InitTrace performs the KCOV_INIT_TRACE ioctl.
+func (kcov *Kcov) InitTrace(size uint64) error {
+	kcov.mu.Lock()
+	defer kcov.mu.Unlock()
+
+	if kcov.mode != linux.KCOV_MODE_DISABLED {
+		return syserror.EBUSY
+	}
+
+	// To simplify all the logic around mapping, we require that the length of the
+	// shared region is a multiple of the system page size.
+	if (8*size)&(usermem.PageSize-1) != 0 {
+		return syserror.EINVAL
+	}
+
+	// We need space for at least two uint64s to hold current position and a
+	// single PC.
+	if size < 2 || size > kcovAreaSizeMax {
+		return syserror.EINVAL
+	}
+
+	kcov.size = size
+	kcov.mode = linux.KCOV_MODE_INIT
+	return nil
+}
+
+// EnableTrace performs the KCOV_ENABLE_TRACE ioctl.
+func (kcov *Kcov) EnableTrace(ctx context.Context, traceMode uint8) error {
+	t := TaskFromContext(ctx)
+	if t == nil {
+		panic("kcovInode.EnableTrace() cannot be used outside of a task goroutine")
+	}
+
+	kcov.mu.Lock()
+	defer kcov.mu.Unlock()
+
+	// KCOV_ENABLE must be preceded by KCOV_INIT_TRACE and an mmap call.
+	if kcov.mode != linux.KCOV_MODE_INIT || kcov.mappable == nil {
+		return syserror.EINVAL
+	}
+
+	switch traceMode {
+	case linux.KCOV_TRACE_PC:
+		kcov.mode = traceMode
+	case linux.KCOV_TRACE_CMP:
+		// We do not support KCOV_MODE_TRACE_CMP.
+		return syserror.ENOTSUP
+	default:
+		return syserror.EINVAL
+	}
+
+	if kcov.owningTask != nil && kcov.owningTask != t {
+		return syserror.EBUSY
+	}
+
+	kcov.owningTask = t
+	t.RegisterWork(kcov)
+
+	// Clear existing coverage data; the task expects to read only coverage data
+	// from the time it is activated.
+	coverage.ClearCoverageData()
+	return nil
+}
+
+// DisableTrace performs the KCOV_DISABLE_TRACE ioctl.
+func (kcov *Kcov) DisableTrace(ctx context.Context) error {
+	kcov.mu.Lock()
+	defer kcov.mu.Unlock()
+
+	t := TaskFromContext(ctx)
+	if t == nil {
+		panic("kcovInode.EnableTrace() cannot be used outside of a task goroutine")
+	}
+
+	if t != kcov.owningTask {
+		return syserror.EINVAL
+	}
+	kcov.owningTask = nil
+	kcov.mode = linux.KCOV_MODE_INIT
+	kcov.resetLocked()
+	return nil
+}
+
+// Reset is called when the owning task exits.
+func (kcov *Kcov) Reset() {
+	kcov.mu.Lock()
+	kcov.resetLocked()
+	kcov.mu.Unlock()
+}
+
+// The kcov instance is reset when the owning task exits or when tracing is
+// disabled.
+func (kcov *Kcov) resetLocked() {
+	kcov.owningTask = nil
+	if kcov.mappable != nil {
+		kcov.mappable = nil
+	}
+}
+
+// ConfigureMMap is called by the vfs.FileDescription for this kcov instance to
+// implement vfs.FileDescription.ConfigureMMap.
+func (kcov *Kcov) ConfigureMMap(ctx context.Context, opts *memmap.MMapOpts) error {
+	kcov.mu.Lock()
+	defer kcov.mu.Unlock()
+
+	if kcov.mode != linux.KCOV_MODE_INIT {
+		return syserror.EINVAL
+	}
+
+	if kcov.mappable == nil {
+		// Set up the kcov area.
+		fr, err := kcov.mfp.MemoryFile().Allocate(kcov.size*8, usage.Anonymous)
+		if err != nil {
+			return err
+		}
+
+		// Get the thread id for the mmap name.
+		t := TaskFromContext(ctx)
+		if t == nil {
+			panic("ThreadFromContext returned nil")
+		}
+		// For convenience, a special mappable is used here. Note that these mappings
+		// will look different under /proc/[pid]/maps than they do on Linux.
+		kcov.mappable = mm.NewSpecialMappable(fmt.Sprintf("[kcov:%d]", t.ThreadID()), kcov.mfp, fr)
+	}
+	opts.Mappable = kcov.mappable
+	opts.MappingIdentity = kcov.mappable
+	return nil
+}
+
+// kcovReadWriter implements safemem.Reader and safemem.Writer.
+type kcovReadWriter struct {
+	off uint64
+	mf  *pgalloc.MemoryFile
+	fr  memmap.FileRange
+}
+
+// ReadToBlocks implements safemem.Reader.ReadToBlocks.
+func (rw *kcovReadWriter) ReadToBlocks(dsts safemem.BlockSeq) (uint64, error) {
+	if dsts.IsEmpty() {
+		return 0, nil
+	}
+
+	// Limit the read to the kcov range and check for overflow.
+	if rw.fr.Length() <= rw.off {
+		return 0, io.EOF
+	}
+	start := rw.fr.Start + rw.off
+	end := rw.fr.Start + rw.fr.Length()
+	if rend := start + dsts.NumBytes(); rend < end {
+		end = rend
+	}
+
+	// Get internal mappings.
+	bs, err := rw.mf.MapInternal(memmap.FileRange{start, end}, usermem.Read)
+	if err != nil {
+		return 0, err
+	}
+
+	// Copy from internal mappings.
+	n, err := safemem.CopySeq(dsts, bs)
+	rw.off += n
+	return n, err
+}
+
+// WriteFromBlocks implements safemem.Writer.WriteFromBlocks.
+func (rw *kcovReadWriter) WriteFromBlocks(srcs safemem.BlockSeq) (uint64, error) {
+	if srcs.IsEmpty() {
+		return 0, nil
+	}
+
+	// Limit the write to the kcov area and check for overflow.
+	if rw.fr.Length() <= rw.off {
+		return 0, io.EOF
+	}
+	start := rw.fr.Start + rw.off
+	end := rw.fr.Start + rw.fr.Length()
+	if wend := start + srcs.NumBytes(); wend < end {
+		end = wend
+	}
+
+	// Get internal mapping.
+	bs, err := rw.mf.MapInternal(memmap.FileRange{start, end}, usermem.Write)
+	if err != nil {
+		return 0, err
+	}
+
+	// Copy to internal mapping.
+	n, err := safemem.CopySeq(bs, srcs)
+	rw.off += n
+	return n, err
+}
+
+// kcovIOWriter implements io.Writer as a basic wrapper over kcovReadWriter.
+type kcovIOWriter struct {
+	rw *kcovReadWriter
+}
+
+// Write implements io.Writer.Write.
+func (w *kcovIOWriter) Write(p []byte) (int, error) {
+	bs := safemem.BlockSeqOf(safemem.BlockFromSafeSlice(p))
+	n, err := safemem.WriteFullFromBlocks(w.rw, bs)
+	return int(n), err
+}
diff --git a/pkg/sentry/kernel/kcov_unsafe.go b/pkg/sentry/kernel/kcov_unsafe.go
new file mode 100644
index 0000000000..6f64022ebb
--- /dev/null
+++ b/pkg/sentry/kernel/kcov_unsafe.go
@@ -0,0 +1,28 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package kernel
+
+import (
+	"unsafe"
+
+	"gvisor.dev/gvisor/pkg/safemem"
+)
+
+// countBlock provides a safemem.BlockSeq for k.count.
+//
+// Like k.count, the block returned is protected by k.mu.
+func (k *Kcov) countBlock() safemem.BlockSeq {
+	return safemem.BlockSeqOf(safemem.BlockFromSafePointer(unsafe.Pointer(&k.count), int(unsafe.Sizeof(k.count))))
+}
diff --git a/pkg/sentry/kernel/kernel.go b/pkg/sentry/kernel/kernel.go
index 2e0175e36a..402aa1718f 100644
--- a/pkg/sentry/kernel/kernel.go
+++ b/pkg/sentry/kernel/kernel.go
@@ -248,7 +248,7 @@ type Kernel struct {
 	// SpecialOpts contains special kernel options.
 	SpecialOpts
 
-	// VFS keeps the filesystem state used across the kernel.
+	// vfs keeps the filesystem state used across the kernel.
 	vfs vfs.VirtualFilesystem
 
 	// hostMount is the Mount used for file descriptors that were imported
diff --git a/pkg/sentry/kernel/task.go b/pkg/sentry/kernel/task.go
index 5aee699e7c..a436610c9c 100644
--- a/pkg/sentry/kernel/task.go
+++ b/pkg/sentry/kernel/task.go
@@ -574,6 +574,11 @@ type Task struct {
 	//
 	// startTime is protected by mu.
 	startTime ktime.Time
+
+	// kcov is the kcov instance providing code coverage owned by this task.
+	//
+	// kcov is exclusive to the task goroutine.
+	kcov *Kcov
 }
 
 func (t *Task) savePtraceTracer() *Task {
@@ -903,3 +908,16 @@ func (t *Task) UID() uint32 {
 func (t *Task) GID() uint32 {
 	return uint32(t.Credentials().EffectiveKGID)
 }
+
+// SetKcov sets the kcov instance associated with t.
+func (t *Task) SetKcov(k *Kcov) {
+	t.kcov = k
+}
+
+// ResetKcov clears the kcov instance associated with t.
+func (t *Task) ResetKcov() {
+	if t.kcov != nil {
+		t.kcov.Reset()
+		t.kcov = nil
+	}
+}
diff --git a/pkg/sentry/kernel/task_exit.go b/pkg/sentry/kernel/task_exit.go
index c165d6cb12..b76f7f5031 100644
--- a/pkg/sentry/kernel/task_exit.go
+++ b/pkg/sentry/kernel/task_exit.go
@@ -239,6 +239,8 @@ func (*runExitMain) execute(t *Task) taskRunState {
 	t.traceExitEvent()
 	lastExiter := t.exitThreadGroup()
 
+	t.ResetKcov()
+
 	// If the task has a cleartid, and the thread group wasn't killed by a
 	// signal, handle that before releasing the MM.
 	if t.cleartid != 0 {
diff --git a/runsc/config/config.go b/runsc/config/config.go
index ca85cef514..8cf0378d5b 100644
--- a/runsc/config/config.go
+++ b/runsc/config/config.go
@@ -300,10 +300,10 @@ type Config struct {
 	// E.g. 0.2 CPU quota will result in 1, and 1.9 in 2.
 	CPUNumFromQuota bool
 
-	// Enables VFS2 (not plumbled through yet).
+	// Enables VFS2 (not plumbed through yet).
 	VFS2 bool
 
-	// Enables FUSE usage (not plumbled through yet).
+	// Enables FUSE usage (not plumbed through yet).
 	FUSE bool
 
 	// TestOnlyAllowRunAsCurrentUserWithoutChroot should only be used in
@@ -353,6 +353,8 @@ func (c *Config) ToFlags() []string {
 		"--tx-checksum-offload=" + strconv.FormatBool(c.TXChecksumOffload),
 		"--overlayfs-stale-read=" + strconv.FormatBool(c.OverlayfsStaleRead),
 		"--qdisc=" + c.QDisc.String(),
+		"--vfs2=" + strconv.FormatBool(c.VFS2),
+		"--fuse=" + strconv.FormatBool(c.FUSE),
 	}
 	if c.CPUNumFromQuota {
 		f = append(f, "--cpu-num-from-quota")
diff --git a/test/syscalls/BUILD b/test/syscalls/BUILD
index eea1401acf..65e8299c3e 100644
--- a/test/syscalls/BUILD
+++ b/test/syscalls/BUILD
@@ -262,6 +262,10 @@ syscall_test(
     test = "//test/syscalls/linux:itimer_test",
 )
 
+syscall_test(
+    test = "//test/syscalls/linux:kcov_test",
+)
+
 syscall_test(
     test = "//test/syscalls/linux:kill_test",
 )
diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD
index ed0b6ecf4a..5a323d3319 100644
--- a/test/syscalls/linux/BUILD
+++ b/test/syscalls/linux/BUILD
@@ -1067,6 +1067,20 @@ cc_binary(
     ],
 )
 
+cc_binary(
+    name = "kcov_test",
+    testonly = 1,
+    srcs = ["kcov.cc"],
+    linkstatic = 1,
+    deps = [
+        "//test/util:capability_util",
+        "//test/util:file_descriptor",
+        gtest,
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
 cc_binary(
     name = "kill_test",
     testonly = 1,
diff --git a/test/syscalls/linux/kcov.cc b/test/syscalls/linux/kcov.cc
new file mode 100644
index 0000000000..f3c30444eb
--- /dev/null
+++ b/test/syscalls/linux/kcov.cc
@@ -0,0 +1,70 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <sys/errno.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+
+#include "gtest/gtest.h"
+#include "test/util/capability_util.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+// For this test to work properly, it must be run with coverage enabled. On
+// native Linux, this involves compiling the kernel with kcov enabled. For
+// gVisor, we need to enable the Go coverage tool, e.g.
+// bazel test --collect_coverage_data --instrumentation_filter=//pkg/... <test>.
+TEST(KcovTest, Kcov) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability((CAP_DAC_OVERRIDE))));
+
+  constexpr int kSize = 4096;
+  constexpr int KCOV_INIT_TRACE = 0x80086301;
+  constexpr int KCOV_ENABLE = 0x6364;
+
+  int fd;
+  ASSERT_THAT(fd = open("/sys/kernel/debug/kcov", O_RDWR),
+              AnyOf(SyscallSucceeds(), SyscallFailsWithErrno(ENOENT)));
+
+  // Kcov not enabled.
+  SKIP_IF(errno == ENOENT);
+
+  ASSERT_THAT(ioctl(fd, KCOV_INIT_TRACE, kSize), SyscallSucceeds());
+  uint64_t* area = (uint64_t*)mmap(nullptr, kSize * sizeof(uint64_t),
+                                   PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+
+  ASSERT_TRUE(area != MAP_FAILED);
+  ASSERT_THAT(ioctl(fd, KCOV_ENABLE, 0), SyscallSucceeds());
+
+  for (int i = 0; i < 10; i++) {
+    // Make some syscalls to generate coverage data.
+    ASSERT_THAT(ioctl(fd, KCOV_ENABLE, 0), SyscallFailsWithErrno(EINVAL));
+  }
+
+  uint64_t num_pcs = *(uint64_t*)(area);
+  EXPECT_GT(num_pcs, 0);
+  for (uint64_t i = 1; i <= num_pcs; i++) {
+    // Verify that PCs are in the standard kernel range.
+    EXPECT_GT(area[i], 0xffffffff7fffffffL);
+  }
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor

From e382f99346f54122276a38561f42556b600a9454 Mon Sep 17 00:00:00 2001
From: Jamie Liu <jamieliu@google.com>
Date: Tue, 25 Aug 2020 16:38:07 -0700
Subject: [PATCH 085/211] Return non-zero size for tmpfs statfs(2).

This does not implement accepting or enforcing any size limit, which will be
more complex and has performance implications; it just returns a fixed non-zero
size.

Updates #1936

PiperOrigin-RevId: 328428588
---
 pkg/abi/linux/fs.go                   | 11 ++++++-----
 pkg/sentry/fs/tmpfs/tmpfs.go          | 12 ++++++++++--
 pkg/sentry/fsimpl/tmpfs/filesystem.go | 12 +-----------
 pkg/sentry/fsimpl/tmpfs/tmpfs.go      | 24 ++++++++++++++++++++++++
 4 files changed, 41 insertions(+), 18 deletions(-)

diff --git a/pkg/abi/linux/fs.go b/pkg/abi/linux/fs.go
index 158d2db5bf..2b1ef0d4e7 100644
--- a/pkg/abi/linux/fs.go
+++ b/pkg/abi/linux/fs.go
@@ -44,17 +44,18 @@ type Statfs struct {
 	// Type is one of the filesystem magic values, defined above.
 	Type uint64
 
-	// BlockSize is the data block size.
+	// BlockSize is the optimal transfer block size in bytes.
 	BlockSize int64
 
-	// Blocks is the number of data blocks in use.
+	// Blocks is the maximum number of data blocks the filesystem may store, in
+	// units of BlockSize.
 	Blocks uint64
 
-	// BlocksFree is the number of free blocks.
+	// BlocksFree is the number of free data blocks, in units of BlockSize.
 	BlocksFree uint64
 
-	// BlocksAvailable is the number of blocks free for use by
-	// unprivileged users.
+	// BlocksAvailable is the number of data blocks free for use by
+	// unprivileged users, in units of BlockSize.
 	BlocksAvailable uint64
 
 	// Files is the number of used file nodes on the filesystem.
diff --git a/pkg/sentry/fs/tmpfs/tmpfs.go b/pkg/sentry/fs/tmpfs/tmpfs.go
index b095312fe7..998b697ca3 100644
--- a/pkg/sentry/fs/tmpfs/tmpfs.go
+++ b/pkg/sentry/fs/tmpfs/tmpfs.go
@@ -16,6 +16,8 @@
 package tmpfs
 
 import (
+	"math"
+
 	"gvisor.dev/gvisor/pkg/abi/linux"
 	"gvisor.dev/gvisor/pkg/context"
 	"gvisor.dev/gvisor/pkg/sentry/fs"
@@ -32,9 +34,15 @@ import (
 var fsInfo = fs.Info{
 	Type: linux.TMPFS_MAGIC,
 
+	// tmpfs currently does not support configurable size limits. In Linux,
+	// such a tmpfs mount will return f_blocks == f_bfree == f_bavail == 0 from
+	// statfs(2). However, many applications treat this as having a size limit
+	// of 0. To work around this, claim to have a very large but non-zero size,
+	// chosen to ensure that BlockSize * Blocks does not overflow int64 (which
+	// applications may also handle incorrectly).
 	// TODO(b/29637826): allow configuring a tmpfs size and enforce it.
-	TotalBlocks: 0,
-	FreeBlocks:  0,
+	TotalBlocks: math.MaxInt64 / usermem.PageSize,
+	FreeBlocks:  math.MaxInt64 / usermem.PageSize,
 }
 
 // rename implements fs.InodeOperations.Rename for tmpfs nodes.
diff --git a/pkg/sentry/fsimpl/tmpfs/filesystem.go b/pkg/sentry/fsimpl/tmpfs/filesystem.go
index 7924a09115..eddfeab768 100644
--- a/pkg/sentry/fsimpl/tmpfs/filesystem.go
+++ b/pkg/sentry/fsimpl/tmpfs/filesystem.go
@@ -25,7 +25,6 @@ import (
 	"gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
 	"gvisor.dev/gvisor/pkg/sentry/vfs"
 	"gvisor.dev/gvisor/pkg/syserror"
-	"gvisor.dev/gvisor/pkg/usermem"
 )
 
 // Sync implements vfs.FilesystemImpl.Sync.
@@ -706,16 +705,7 @@ func (fs *filesystem) StatFSAt(ctx context.Context, rp *vfs.ResolvingPath) (linu
 	if _, err := resolveLocked(ctx, rp); err != nil {
 		return linux.Statfs{}, err
 	}
-	statfs := linux.Statfs{
-		Type:         linux.TMPFS_MAGIC,
-		BlockSize:    usermem.PageSize,
-		FragmentSize: usermem.PageSize,
-		NameLength:   linux.NAME_MAX,
-		// TODO(b/29637826): Allow configuring a tmpfs size and enforce it.
-		Blocks:     0,
-		BlocksFree: 0,
-	}
-	return statfs, nil
+	return globalStatfs, nil
 }
 
 // SymlinkAt implements vfs.FilesystemImpl.SymlinkAt.
diff --git a/pkg/sentry/fsimpl/tmpfs/tmpfs.go b/pkg/sentry/fsimpl/tmpfs/tmpfs.go
index 428f62aaad..a7fdf19caf 100644
--- a/pkg/sentry/fsimpl/tmpfs/tmpfs.go
+++ b/pkg/sentry/fsimpl/tmpfs/tmpfs.go
@@ -201,6 +201,25 @@ func (fs *filesystem) Release(ctx context.Context) {
 	fs.vfsfs.VirtualFilesystem().PutAnonBlockDevMinor(fs.devMinor)
 }
 
+// immutable
+var globalStatfs = linux.Statfs{
+	Type:         linux.TMPFS_MAGIC,
+	BlockSize:    usermem.PageSize,
+	FragmentSize: usermem.PageSize,
+	NameLength:   linux.NAME_MAX,
+
+	// tmpfs currently does not support configurable size limits. In Linux,
+	// such a tmpfs mount will return f_blocks == f_bfree == f_bavail == 0 from
+	// statfs(2). However, many applications treat this as having a size limit
+	// of 0. To work around this, claim to have a very large but non-zero size,
+	// chosen to ensure that BlockSize * Blocks does not overflow int64 (which
+	// applications may also handle incorrectly).
+	// TODO(b/29637826): allow configuring a tmpfs size and enforce it.
+	Blocks:          math.MaxInt64 / usermem.PageSize,
+	BlocksFree:      math.MaxInt64 / usermem.PageSize,
+	BlocksAvailable: math.MaxInt64 / usermem.PageSize,
+}
+
 // dentry implements vfs.DentryImpl.
 type dentry struct {
 	vfsd vfs.Dentry
@@ -698,6 +717,11 @@ func (fd *fileDescription) SetStat(ctx context.Context, opts vfs.SetStatOptions)
 	return nil
 }
 
+// StatFS implements vfs.FileDescriptionImpl.StatFS.
+func (fd *fileDescription) StatFS(ctx context.Context) (linux.Statfs, error) {
+	return globalStatfs, nil
+}
+
 // Listxattr implements vfs.FileDescriptionImpl.Listxattr.
 func (fd *fileDescription) Listxattr(ctx context.Context, size uint64) ([]string, error) {
 	return fd.inode().listxattr(size)

From 87e03869065f0784bf9ed76855205693128f65a4 Mon Sep 17 00:00:00 2001
From: Dean Deng <deandeng@google.com>
Date: Tue, 25 Aug 2020 21:01:45 -0700
Subject: [PATCH 086/211] Use new reference count utility throughout gvisor.

This uses the refs_vfs2 template in vfs2 as well as objects common to vfs1 and
vfs2. Note that vfs1-only refcounts are not replaced, since vfs1 will be deleted
soon anyway.

The following structs now use the new tool, with leak check enabled:
devpts:rootInode
fuse:inode
kernfs:Dentry
kernfs:dir
kernfs:readonlyDir
kernfs:StaticDirectory
proc:fdDirInode
proc:fdInfoDirInode
proc:subtasksInode
proc:taskInode
proc:tasksInode
vfs:FileDescription
vfs:MountNamespace
vfs:Filesystem
sys:dir
kernel:FSContext
kernel:ProcessGroup
kernel:Session
shm:Shm
mm:aioMappable
mm:SpecialMappable
transport:queue

And the following use the template, but because they currently are not leak
checked, a TODO is left instead of enabling leak check in this patch:
kernel:FDTable
tun:tunEndpoint

Updates #1486.

PiperOrigin-RevId: 328460377
---
 pkg/refs_vfs2/BUILD                           |  2 +-
 pkg/refs_vfs2/refs_template.go                | 17 +++-
 pkg/sentry/fsimpl/devpts/BUILD                | 15 ++++
 pkg/sentry/fsimpl/devpts/devpts.go            |  7 ++
 pkg/sentry/fsimpl/fuse/BUILD                  | 13 +++
 pkg/sentry/fsimpl/fuse/fusefs.go              |  7 ++
 pkg/sentry/fsimpl/kernfs/BUILD                | 54 ++++++++++-
 pkg/sentry/fsimpl/kernfs/inode_impl_util.go   | 27 +++---
 pkg/sentry/fsimpl/kernfs/kernfs.go            | 24 +++--
 pkg/sentry/fsimpl/kernfs/kernfs_test.go       | 12 +++
 pkg/sentry/fsimpl/proc/BUILD                  | 61 +++++++++++++
 pkg/sentry/fsimpl/proc/subtasks.go            |  7 ++
 pkg/sentry/fsimpl/proc/task.go                |  8 ++
 pkg/sentry/fsimpl/proc/task_fds.go            | 16 +++-
 pkg/sentry/fsimpl/proc/task_net.go            |  6 +-
 pkg/sentry/fsimpl/proc/tasks.go               |  7 ++
 pkg/sentry/fsimpl/sys/BUILD                   | 15 ++++
 pkg/sentry/fsimpl/sys/sys.go                  |  9 +-
 pkg/sentry/kernel/BUILD                       | 48 ++++++++++
 pkg/sentry/kernel/fd_table.go                 | 21 +++--
 pkg/sentry/kernel/fd_table_unsafe.go          |  2 +
 pkg/sentry/kernel/fs_context.go               | 89 ++++++++++---------
 pkg/sentry/kernel/sessions.go                 | 29 +++---
 pkg/sentry/kernel/shm/BUILD                   | 13 +++
 pkg/sentry/kernel/shm/shm.go                  | 19 ++--
 pkg/sentry/mm/BUILD                           | 24 +++++
 pkg/sentry/mm/aio_context.go                  |  7 +-
 pkg/sentry/mm/special_mappable.go             |  7 +-
 pkg/sentry/socket/unix/transport/BUILD        | 12 +++
 .../socket/unix/transport/connectioned.go     |  8 +-
 .../socket/unix/transport/connectionless.go   |  2 +-
 pkg/sentry/socket/unix/transport/queue.go     | 13 +--
 pkg/sentry/vfs/BUILD                          | 37 ++++++++
 pkg/sentry/vfs/README.md                      |  9 --
 pkg/sentry/vfs/file_description.go            | 39 +-------
 pkg/sentry/vfs/filesystem.go                  | 37 +-------
 pkg/sentry/vfs/mount.go                       | 21 ++---
 pkg/tcpip/link/tun/BUILD                      | 14 +++
 pkg/tcpip/link/tun/device.go                  |  9 +-
 39 files changed, 531 insertions(+), 236 deletions(-)

diff --git a/pkg/refs_vfs2/BUILD b/pkg/refs_vfs2/BUILD
index 7b3e10683c..577b827a5b 100644
--- a/pkg/refs_vfs2/BUILD
+++ b/pkg/refs_vfs2/BUILD
@@ -11,7 +11,7 @@ go_template(
     types = [
         "T",
     ],
-    visibility = ["//pkg/sentry:internal"],
+    visibility = ["//:sandbox"],
     deps = [
         "//pkg/log",
         "//pkg/refs",
diff --git a/pkg/refs_vfs2/refs_template.go b/pkg/refs_vfs2/refs_template.go
index 99c43c065b..d9b552896a 100644
--- a/pkg/refs_vfs2/refs_template.go
+++ b/pkg/refs_vfs2/refs_template.go
@@ -12,11 +12,15 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-// Package refs_template defines a template that can be used by reference counted
-// objects.
+// Package refs_template defines a template that can be used by reference
+// counted objects. The "owner" template parameter is used in log messages to
+// indicate the type of reference-counted object that exhibited a reference
+// leak. As a result, structs that are embedded in other structs should not use
+// this template, since it will make tracking down leaks more difficult.
 package refs_template
 
 import (
+	"fmt"
 	"runtime"
 	"sync/atomic"
 
@@ -38,6 +42,11 @@ var ownerType *T
 // Note that the number of references is actually refCount + 1 so that a default
 // zero-value Refs object contains one reference.
 //
+// TODO(gvisor.dev/issue/1486): Store stack traces when leak check is enabled in
+// a map with 16-bit hashes, and store the hash in the top 16 bits of refCount.
+// This will allow us to add stack trace information to the leak messages
+// without growing the size of Refs.
+//
 // +stateify savable
 type Refs struct {
 	// refCount is composed of two fields:
@@ -82,7 +91,7 @@ func (r *Refs) ReadRefs() int64 {
 //go:nosplit
 func (r *Refs) IncRef() {
 	if v := atomic.AddInt64(&r.refCount, 1); v <= 0 {
-		panic("Incrementing non-positive ref count")
+		panic(fmt.Sprintf("Incrementing non-positive ref count %p owned by %T", r, ownerType))
 	}
 }
 
@@ -122,7 +131,7 @@ func (r *Refs) TryIncRef() bool {
 func (r *Refs) DecRef(destroy func()) {
 	switch v := atomic.AddInt64(&r.refCount, -1); {
 	case v < -1:
-		panic("Decrementing non-positive ref count")
+		panic(fmt.Sprintf("Decrementing non-positive ref count %p, owned by %T", r, ownerType))
 
 	case v == -1:
 		// Call the destructor.
diff --git a/pkg/sentry/fsimpl/devpts/BUILD b/pkg/sentry/fsimpl/devpts/BUILD
index 93512c9b6d..3f64fab3a7 100644
--- a/pkg/sentry/fsimpl/devpts/BUILD
+++ b/pkg/sentry/fsimpl/devpts/BUILD
@@ -1,7 +1,19 @@
 load("//tools:defs.bzl", "go_library", "go_test")
+load("//tools/go_generics:defs.bzl", "go_template_instance")
 
 licenses(["notice"])
 
+go_template_instance(
+    name = "root_inode_refs",
+    out = "root_inode_refs.go",
+    package = "devpts",
+    prefix = "rootInode",
+    template = "//pkg/refs_vfs2:refs_template",
+    types = {
+        "T": "rootInode",
+    },
+)
+
 go_library(
     name = "devpts",
     srcs = [
@@ -9,6 +21,7 @@ go_library(
         "line_discipline.go",
         "master.go",
         "queue.go",
+        "root_inode_refs.go",
         "slave.go",
         "terminal.go",
     ],
@@ -16,6 +29,8 @@ go_library(
     deps = [
         "//pkg/abi/linux",
         "//pkg/context",
+        "//pkg/log",
+        "//pkg/refs",
         "//pkg/safemem",
         "//pkg/sentry/arch",
         "//pkg/sentry/fs/lock",
diff --git a/pkg/sentry/fsimpl/devpts/devpts.go b/pkg/sentry/fsimpl/devpts/devpts.go
index 3f3a099bd1..0eaff90873 100644
--- a/pkg/sentry/fsimpl/devpts/devpts.go
+++ b/pkg/sentry/fsimpl/devpts/devpts.go
@@ -83,6 +83,7 @@ func (fstype FilesystemType) newFilesystem(vfsObj *vfs.VirtualFilesystem, creds
 	}
 	root.InodeAttrs.Init(creds, linux.UNNAMED_MAJOR, devMinor, 1, linux.ModeDirectory|0555)
 	root.OrderedChildren.Init(kernfs.OrderedChildrenOptions{})
+	root.EnableLeakCheck()
 	root.dentry.Init(root)
 
 	// Construct the pts master inode and dentry. Linux always uses inode
@@ -110,6 +111,7 @@ func (fs *filesystem) Release(ctx context.Context) {
 
 // rootInode is the root directory inode for the devpts mounts.
 type rootInode struct {
+	rootInodeRefs
 	kernfs.AlwaysValid
 	kernfs.InodeAttrs
 	kernfs.InodeDirectoryNoNewChildren
@@ -233,3 +235,8 @@ func (i *rootInode) IterDirents(ctx context.Context, cb vfs.IterDirentsCallback,
 	}
 	return offset, nil
 }
+
+// DecRef implements kernfs.Inode.
+func (i *rootInode) DecRef(context.Context) {
+	i.rootInodeRefs.DecRef(i.Destroy)
+}
diff --git a/pkg/sentry/fsimpl/fuse/BUILD b/pkg/sentry/fsimpl/fuse/BUILD
index 999111deb3..53a4f3012a 100644
--- a/pkg/sentry/fsimpl/fuse/BUILD
+++ b/pkg/sentry/fsimpl/fuse/BUILD
@@ -15,6 +15,17 @@ go_template_instance(
     },
 )
 
+go_template_instance(
+    name = "inode_refs",
+    out = "inode_refs.go",
+    package = "fuse",
+    prefix = "inode",
+    template = "//pkg/refs_vfs2:refs_template",
+    types = {
+        "T": "inode",
+    },
+)
+
 go_library(
     name = "fuse",
     srcs = [
@@ -22,6 +33,7 @@ go_library(
         "dev.go",
         "fusefs.go",
         "init.go",
+        "inode_refs.go",
         "register.go",
         "request_list.go",
     ],
@@ -30,6 +42,7 @@ go_library(
         "//pkg/abi/linux",
         "//pkg/context",
         "//pkg/log",
+        "//pkg/refs",
         "//pkg/sentry/fsimpl/devtmpfs",
         "//pkg/sentry/fsimpl/kernfs",
         "//pkg/sentry/kernel",
diff --git a/pkg/sentry/fsimpl/fuse/fusefs.go b/pkg/sentry/fsimpl/fuse/fusefs.go
index 44021ee4bc..9717c0e15b 100644
--- a/pkg/sentry/fsimpl/fuse/fusefs.go
+++ b/pkg/sentry/fsimpl/fuse/fusefs.go
@@ -198,6 +198,7 @@ func (fs *filesystem) Release(ctx context.Context) {
 
 // inode implements kernfs.Inode.
 type inode struct {
+	inodeRefs
 	kernfs.InodeAttrs
 	kernfs.InodeNoDynamicLookup
 	kernfs.InodeNotSymlink
@@ -213,6 +214,7 @@ func (fs *filesystem) newInode(creds *auth.Credentials, mode linux.FileMode) *ke
 	i := &inode{}
 	i.InodeAttrs.Init(creds, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), linux.ModeDirectory|0755)
 	i.OrderedChildren.Init(kernfs.OrderedChildrenOptions{})
+	i.EnableLeakCheck()
 	i.dentry.Init(i)
 
 	return &i.dentry
@@ -324,3 +326,8 @@ func (i *inode) Stat(ctx context.Context, fs *vfs.Filesystem, opts vfs.StatOptio
 
 	return statFromFUSEAttr(out.Attr, opts.Mask, fusefs.devMinor), nil
 }
+
+// DecRef implements kernfs.Inode.
+func (i *inode) DecRef(context.Context) {
+	i.inodeRefs.DecRef(i.Destroy)
+}
diff --git a/pkg/sentry/fsimpl/kernfs/BUILD b/pkg/sentry/fsimpl/kernfs/BUILD
index 3835557feb..637dca70cf 100644
--- a/pkg/sentry/fsimpl/kernfs/BUILD
+++ b/pkg/sentry/fsimpl/kernfs/BUILD
@@ -26,9 +26,54 @@ go_template_instance(
     },
 )
 
+go_template_instance(
+    name = "dentry_refs",
+    out = "dentry_refs.go",
+    package = "kernfs",
+    prefix = "Dentry",
+    template = "//pkg/refs_vfs2:refs_template",
+    types = {
+        "T": "Dentry",
+    },
+)
+
+go_template_instance(
+    name = "static_directory_refs",
+    out = "static_directory_refs.go",
+    package = "kernfs",
+    prefix = "StaticDirectory",
+    template = "//pkg/refs_vfs2:refs_template",
+    types = {
+        "T": "StaticDirectory",
+    },
+)
+
+go_template_instance(
+    name = "dir_refs",
+    out = "dir_refs.go",
+    package = "kernfs_test",
+    prefix = "dir",
+    template = "//pkg/refs_vfs2:refs_template",
+    types = {
+        "T": "dir",
+    },
+)
+
+go_template_instance(
+    name = "readonly_dir_refs",
+    out = "readonly_dir_refs.go",
+    package = "kernfs_test",
+    prefix = "readonlyDir",
+    template = "//pkg/refs_vfs2:refs_template",
+    types = {
+        "T": "readonlyDir",
+    },
+)
+
 go_library(
     name = "kernfs",
     srcs = [
+        "dentry_refs.go",
         "dynamic_bytes_file.go",
         "fd_impl_util.go",
         "filesystem.go",
@@ -36,6 +81,7 @@ go_library(
         "inode_impl_util.go",
         "kernfs.go",
         "slot_list.go",
+        "static_directory_refs.go",
         "symlink.go",
     ],
     visibility = ["//pkg/sentry:internal"],
@@ -59,11 +105,17 @@ go_library(
 go_test(
     name = "kernfs_test",
     size = "small",
-    srcs = ["kernfs_test.go"],
+    srcs = [
+        "dir_refs.go",
+        "kernfs_test.go",
+        "readonly_dir_refs.go",
+    ],
     deps = [
         ":kernfs",
         "//pkg/abi/linux",
         "//pkg/context",
+        "//pkg/log",
+        "//pkg/refs",
         "//pkg/sentry/contexttest",
         "//pkg/sentry/fsimpl/testutil",
         "//pkg/sentry/kernel/auth",
diff --git a/pkg/sentry/fsimpl/kernfs/inode_impl_util.go b/pkg/sentry/fsimpl/kernfs/inode_impl_util.go
index 885856868f..f442a56060 100644
--- a/pkg/sentry/fsimpl/kernfs/inode_impl_util.go
+++ b/pkg/sentry/fsimpl/kernfs/inode_impl_util.go
@@ -20,7 +20,6 @@ import (
 
 	"gvisor.dev/gvisor/pkg/abi/linux"
 	"gvisor.dev/gvisor/pkg/context"
-	"gvisor.dev/gvisor/pkg/refs"
 	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
 	"gvisor.dev/gvisor/pkg/sentry/vfs"
 	"gvisor.dev/gvisor/pkg/sync"
@@ -344,8 +343,6 @@ type OrderedChildrenOptions struct {
 //
 // Must be initialize with Init before first use.
 type OrderedChildren struct {
-	refs.AtomicRefCount
-
 	// Can children be modified by user syscalls? It set to false, interface
 	// methods that would modify the children return EPERM. Immutable.
 	writable bool
@@ -361,14 +358,14 @@ func (o *OrderedChildren) Init(opts OrderedChildrenOptions) {
 	o.set = make(map[string]*slot)
 }
 
-// DecRef implements Inode.DecRef.
-func (o *OrderedChildren) DecRef(ctx context.Context) {
-	o.AtomicRefCount.DecRefWithDestructor(ctx, func(context.Context) {
-		o.mu.Lock()
-		defer o.mu.Unlock()
-		o.order.Reset()
-		o.set = nil
-	})
+// Destroy clears the children stored in o. It should be called by structs
+// embedding OrderedChildren upon destruction, i.e. when their reference count
+// reaches zero.
+func (o *OrderedChildren) Destroy() {
+	o.mu.Lock()
+	defer o.mu.Unlock()
+	o.order.Reset()
+	o.set = nil
 }
 
 // Populate inserts children into this OrderedChildren, and d's dentry
@@ -549,6 +546,7 @@ func (InodeSymlink) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.D
 //
 // +stateify savable
 type StaticDirectory struct {
+	StaticDirectoryRefs
 	InodeNotSymlink
 	InodeDirectoryNoNewChildren
 	InodeAttrs
@@ -594,11 +592,16 @@ func (s *StaticDirectory) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd
 	return fd.VFSFileDescription(), nil
 }
 
-// SetStat implements Inode.SetStat not allowing inode attributes to be changed.
+// SetStat implements kernfs.Inode.SetStat not allowing inode attributes to be changed.
 func (*StaticDirectory) SetStat(context.Context, *vfs.Filesystem, *auth.Credentials, vfs.SetStatOptions) error {
 	return syserror.EPERM
 }
 
+// DecRef implements kernfs.Inode.
+func (s *StaticDirectory) DecRef(context.Context) {
+	s.StaticDirectoryRefs.DecRef(s.Destroy)
+}
+
 // AlwaysValid partially implements kernfs.inodeDynamicLookup.
 type AlwaysValid struct{}
 
diff --git a/pkg/sentry/fsimpl/kernfs/kernfs.go b/pkg/sentry/fsimpl/kernfs/kernfs.go
index 51dbc050c1..ca36858005 100644
--- a/pkg/sentry/fsimpl/kernfs/kernfs.go
+++ b/pkg/sentry/fsimpl/kernfs/kernfs.go
@@ -57,7 +57,6 @@ import (
 
 	"gvisor.dev/gvisor/pkg/abi/linux"
 	"gvisor.dev/gvisor/pkg/context"
-	"gvisor.dev/gvisor/pkg/refs"
 	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
 	"gvisor.dev/gvisor/pkg/sentry/vfs"
 	"gvisor.dev/gvisor/pkg/sync"
@@ -161,9 +160,9 @@ const (
 //
 // Must be initialized by Init prior to first use.
 type Dentry struct {
-	vfsd vfs.Dentry
+	DentryRefs
 
-	refs.AtomicRefCount
+	vfsd vfs.Dentry
 
 	// flags caches useful information about the dentry from the inode. See the
 	// dflags* consts above. Must be accessed by atomic ops.
@@ -194,6 +193,7 @@ func (d *Dentry) Init(inode Inode) {
 	if ftype == linux.ModeSymlink {
 		d.flags |= dflagsIsSymlink
 	}
+	d.EnableLeakCheck()
 }
 
 // VFSDentry returns the generic vfs dentry for this kernfs dentry.
@@ -213,16 +213,14 @@ func (d *Dentry) isSymlink() bool {
 
 // DecRef implements vfs.DentryImpl.DecRef.
 func (d *Dentry) DecRef(ctx context.Context) {
-	d.AtomicRefCount.DecRefWithDestructor(ctx, d.destroy)
-}
-
-// Precondition: Dentry must be removed from VFS' dentry cache.
-func (d *Dentry) destroy(ctx context.Context) {
-	d.inode.DecRef(ctx) // IncRef from Init.
-	d.inode = nil
-	if d.parent != nil {
-		d.parent.DecRef(ctx) // IncRef from Dentry.InsertChild.
-	}
+	// Before the destructor is called, Dentry must be removed from VFS' dentry cache.
+	d.DentryRefs.DecRef(func() {
+		d.inode.DecRef(ctx) // IncRef from Init.
+		d.inode = nil
+		if d.parent != nil {
+			d.parent.DecRef(ctx) // IncRef from Dentry.InsertChild.
+		}
+	})
 }
 
 // InotifyWithParent implements vfs.DentryImpl.InotifyWithParent.
diff --git a/pkg/sentry/fsimpl/kernfs/kernfs_test.go b/pkg/sentry/fsimpl/kernfs/kernfs_test.go
index e5c28c0e4c..e376d1736a 100644
--- a/pkg/sentry/fsimpl/kernfs/kernfs_test.go
+++ b/pkg/sentry/fsimpl/kernfs/kernfs_test.go
@@ -96,6 +96,7 @@ func (*attrs) SetStat(context.Context, *vfs.Filesystem, *auth.Credentials, vfs.S
 }
 
 type readonlyDir struct {
+	readonlyDirRefs
 	attrs
 	kernfs.InodeNotSymlink
 	kernfs.InodeNoDynamicLookup
@@ -111,6 +112,7 @@ func (fs *filesystem) newReadonlyDir(creds *auth.Credentials, mode linux.FileMod
 	dir := &readonlyDir{}
 	dir.attrs.Init(creds, 0 /* devMajor */, 0 /* devMinor */, fs.NextIno(), linux.ModeDirectory|mode)
 	dir.OrderedChildren.Init(kernfs.OrderedChildrenOptions{})
+	dir.EnableLeakCheck()
 	dir.dentry.Init(dir)
 
 	dir.IncLinks(dir.OrderedChildren.Populate(&dir.dentry, contents))
@@ -128,7 +130,12 @@ func (d *readonlyDir) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs
 	return fd.VFSFileDescription(), nil
 }
 
+func (d *readonlyDir) DecRef(context.Context) {
+	d.readonlyDirRefs.DecRef(d.Destroy)
+}
+
 type dir struct {
+	dirRefs
 	attrs
 	kernfs.InodeNotSymlink
 	kernfs.InodeNoDynamicLookup
@@ -145,6 +152,7 @@ func (fs *filesystem) newDir(creds *auth.Credentials, mode linux.FileMode, conte
 	dir.fs = fs
 	dir.attrs.Init(creds, 0 /* devMajor */, 0 /* devMinor */, fs.NextIno(), linux.ModeDirectory|mode)
 	dir.OrderedChildren.Init(kernfs.OrderedChildrenOptions{Writable: true})
+	dir.EnableLeakCheck()
 	dir.dentry.Init(dir)
 
 	dir.IncLinks(dir.OrderedChildren.Populate(&dir.dentry, contents))
@@ -162,6 +170,10 @@ func (d *dir) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentry,
 	return fd.VFSFileDescription(), nil
 }
 
+func (d *dir) DecRef(context.Context) {
+	d.dirRefs.DecRef(d.Destroy)
+}
+
 func (d *dir) NewDir(ctx context.Context, name string, opts vfs.MkdirOptions) (*vfs.Dentry, error) {
 	creds := auth.CredentialsFromContext(ctx)
 	dir := d.fs.newDir(creds, opts.Mode, nil)
diff --git a/pkg/sentry/fsimpl/proc/BUILD b/pkg/sentry/fsimpl/proc/BUILD
index 14ecfd3005..a45b44440a 100644
--- a/pkg/sentry/fsimpl/proc/BUILD
+++ b/pkg/sentry/fsimpl/proc/BUILD
@@ -1,18 +1,79 @@
 load("//tools:defs.bzl", "go_library", "go_test")
+load("//tools/go_generics:defs.bzl", "go_template_instance")
 
 licenses(["notice"])
 
+go_template_instance(
+    name = "fd_dir_inode_refs",
+    out = "fd_dir_inode_refs.go",
+    package = "proc",
+    prefix = "fdDirInode",
+    template = "//pkg/refs_vfs2:refs_template",
+    types = {
+        "T": "fdDirInode",
+    },
+)
+
+go_template_instance(
+    name = "fd_info_dir_inode_refs",
+    out = "fd_info_dir_inode_refs.go",
+    package = "proc",
+    prefix = "fdInfoDirInode",
+    template = "//pkg/refs_vfs2:refs_template",
+    types = {
+        "T": "fdInfoDirInode",
+    },
+)
+
+go_template_instance(
+    name = "subtasks_inode_refs",
+    out = "subtasks_inode_refs.go",
+    package = "proc",
+    prefix = "subtasksInode",
+    template = "//pkg/refs_vfs2:refs_template",
+    types = {
+        "T": "subtasksInode",
+    },
+)
+
+go_template_instance(
+    name = "task_inode_refs",
+    out = "task_inode_refs.go",
+    package = "proc",
+    prefix = "taskInode",
+    template = "//pkg/refs_vfs2:refs_template",
+    types = {
+        "T": "taskInode",
+    },
+)
+
+go_template_instance(
+    name = "tasks_inode_refs",
+    out = "tasks_inode_refs.go",
+    package = "proc",
+    prefix = "tasksInode",
+    template = "//pkg/refs_vfs2:refs_template",
+    types = {
+        "T": "tasksInode",
+    },
+)
+
 go_library(
     name = "proc",
     srcs = [
+        "fd_dir_inode_refs.go",
+        "fd_info_dir_inode_refs.go",
         "filesystem.go",
         "subtasks.go",
+        "subtasks_inode_refs.go",
         "task.go",
         "task_fds.go",
         "task_files.go",
+        "task_inode_refs.go",
         "task_net.go",
         "tasks.go",
         "tasks_files.go",
+        "tasks_inode_refs.go",
         "tasks_sys.go",
     ],
     visibility = ["//pkg/sentry:internal"],
diff --git a/pkg/sentry/fsimpl/proc/subtasks.go b/pkg/sentry/fsimpl/proc/subtasks.go
index f25747da32..01c0efb3a4 100644
--- a/pkg/sentry/fsimpl/proc/subtasks.go
+++ b/pkg/sentry/fsimpl/proc/subtasks.go
@@ -31,6 +31,7 @@ import (
 //
 // +stateify savable
 type subtasksInode struct {
+	subtasksInodeRefs
 	kernfs.InodeNotSymlink
 	kernfs.InodeDirectoryNoNewChildren
 	kernfs.InodeAttrs
@@ -57,6 +58,7 @@ func (fs *filesystem) newSubtasks(task *kernel.Task, pidns *kernel.PIDNamespace,
 	// Note: credentials are overridden by taskOwnedInode.
 	subInode.InodeAttrs.Init(task.Credentials(), linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), linux.ModeDirectory|0555)
 	subInode.OrderedChildren.Init(kernfs.OrderedChildrenOptions{})
+	subInode.EnableLeakCheck()
 
 	inode := &taskOwnedInode{Inode: subInode, owner: task}
 	dentry := &kernfs.Dentry{}
@@ -182,3 +184,8 @@ func (i *subtasksInode) Stat(ctx context.Context, vsfs *vfs.Filesystem, opts vfs
 func (*subtasksInode) SetStat(context.Context, *vfs.Filesystem, *auth.Credentials, vfs.SetStatOptions) error {
 	return syserror.EPERM
 }
+
+// DecRef implements kernfs.Inode.
+func (i *subtasksInode) DecRef(context.Context) {
+	i.subtasksInodeRefs.DecRef(i.Destroy)
+}
diff --git a/pkg/sentry/fsimpl/proc/task.go b/pkg/sentry/fsimpl/proc/task.go
index 109b31b4cd..66b557abd7 100644
--- a/pkg/sentry/fsimpl/proc/task.go
+++ b/pkg/sentry/fsimpl/proc/task.go
@@ -32,6 +32,7 @@ import (
 //
 // +stateify savable
 type taskInode struct {
+	taskInodeRefs
 	kernfs.InodeNotSymlink
 	kernfs.InodeDirectoryNoNewChildren
 	kernfs.InodeNoDynamicLookup
@@ -84,6 +85,7 @@ func (fs *filesystem) newTaskInode(task *kernel.Task, pidns *kernel.PIDNamespace
 	taskInode := &taskInode{task: task}
 	// Note: credentials are overridden by taskOwnedInode.
 	taskInode.InodeAttrs.Init(task.Credentials(), linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), linux.ModeDirectory|0555)
+	taskInode.EnableLeakCheck()
 
 	inode := &taskOwnedInode{Inode: taskInode, owner: task}
 	dentry := &kernfs.Dentry{}
@@ -119,6 +121,11 @@ func (*taskInode) SetStat(context.Context, *vfs.Filesystem, *auth.Credentials, v
 	return syserror.EPERM
 }
 
+// DecRef implements kernfs.Inode.
+func (i *taskInode) DecRef(context.Context) {
+	i.taskInodeRefs.DecRef(i.Destroy)
+}
+
 // taskOwnedInode implements kernfs.Inode and overrides inode owner with task
 // effective user and group.
 type taskOwnedInode struct {
@@ -147,6 +154,7 @@ func (fs *filesystem) newTaskOwnedDir(task *kernel.Task, ino uint64, perm linux.
 	dir.Init(task.Credentials(), linux.UNNAMED_MAJOR, fs.devMinor, ino, perm, kernfs.GenericDirectoryFDOptions{
 		SeekEnd: kernfs.SeekEndZero,
 	})
+	dir.EnableLeakCheck()
 
 	inode := &taskOwnedInode{Inode: dir, owner: task}
 	d := &kernfs.Dentry{}
diff --git a/pkg/sentry/fsimpl/proc/task_fds.go b/pkg/sentry/fsimpl/proc/task_fds.go
index e8fcb9aa11..0527b2de8f 100644
--- a/pkg/sentry/fsimpl/proc/task_fds.go
+++ b/pkg/sentry/fsimpl/proc/task_fds.go
@@ -22,7 +22,6 @@ import (
 
 	"gvisor.dev/gvisor/pkg/abi/linux"
 	"gvisor.dev/gvisor/pkg/context"
-	"gvisor.dev/gvisor/pkg/refs"
 	"gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
 	"gvisor.dev/gvisor/pkg/sentry/kernel"
 	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
@@ -101,6 +100,7 @@ func (i *fdDir) IterDirents(ctx context.Context, cb vfs.IterDirentsCallback, off
 //
 // +stateify savable
 type fdDirInode struct {
+	fdDirInodeRefs
 	kernfs.InodeNotSymlink
 	kernfs.InodeDirectoryNoNewChildren
 	kernfs.InodeAttrs
@@ -120,6 +120,7 @@ func (fs *filesystem) newFDDirInode(task *kernel.Task) *kernfs.Dentry {
 		},
 	}
 	inode.InodeAttrs.Init(task.Credentials(), linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), linux.ModeDirectory|0555)
+	inode.EnableLeakCheck()
 
 	dentry := &kernfs.Dentry{}
 	dentry.Init(inode)
@@ -175,6 +176,11 @@ func (i *fdDirInode) CheckPermissions(ctx context.Context, creds *auth.Credentia
 	return err
 }
 
+// DecRef implements kernfs.Inode.
+func (i *fdDirInode) DecRef(context.Context) {
+	i.fdDirInodeRefs.DecRef(i.Destroy)
+}
+
 // fdSymlink is an symlink for the /proc/[pid]/fd/[fd] file.
 //
 // +stateify savable
@@ -227,6 +233,7 @@ func (s *fdSymlink) Getlink(ctx context.Context, mnt *vfs.Mount) (vfs.VirtualDen
 //
 // +stateify savable
 type fdInfoDirInode struct {
+	fdInfoDirInodeRefs
 	kernfs.InodeNotSymlink
 	kernfs.InodeDirectoryNoNewChildren
 	kernfs.InodeAttrs
@@ -245,6 +252,7 @@ func (fs *filesystem) newFDInfoDirInode(task *kernel.Task) *kernfs.Dentry {
 		},
 	}
 	inode.InodeAttrs.Init(task.Credentials(), linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), linux.ModeDirectory|0555)
+	inode.EnableLeakCheck()
 
 	dentry := &kernfs.Dentry{}
 	dentry.Init(inode)
@@ -282,12 +290,16 @@ func (i *fdInfoDirInode) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *
 	return fd.VFSFileDescription(), nil
 }
 
+// DecRef implements kernfs.Inode.
+func (i *fdInfoDirInode) DecRef(context.Context) {
+	i.fdInfoDirInodeRefs.DecRef(i.Destroy)
+}
+
 // fdInfoData implements vfs.DynamicBytesSource for /proc/[pid]/fdinfo/[fd].
 //
 // +stateify savable
 type fdInfoData struct {
 	kernfs.DynamicBytesFile
-	refs.AtomicRefCount
 
 	task *kernel.Task
 	fd   int32
diff --git a/pkg/sentry/fsimpl/proc/task_net.go b/pkg/sentry/fsimpl/proc/task_net.go
index a4c884bf98..4e69782c74 100644
--- a/pkg/sentry/fsimpl/proc/task_net.go
+++ b/pkg/sentry/fsimpl/proc/task_net.go
@@ -262,7 +262,7 @@ func (n *netUnixData) Generate(ctx context.Context, buf *bytes.Buffer) error {
 		// For now, we always redact this pointer.
 		fmt.Fprintf(buf, "%#016p: %08X %08X %08X %04X %02X %8d",
 			(*unix.SocketOperations)(nil), // Num, pointer to kernel socket struct.
-			s.Refs()-1,                    // RefCount, don't count our own ref.
+			s.ReadRefs()-1,                // RefCount, don't count our own ref.
 			0,                             // Protocol, always 0 for UDS.
 			sockFlags,                     // Flags.
 			sops.Endpoint().Type(),        // Type.
@@ -430,7 +430,7 @@ func commonGenerateTCP(ctx context.Context, buf *bytes.Buffer, k *kernel.Kernel,
 
 		// Field: refcount. Don't count the ref we obtain while deferencing
 		// the weakref to this socket.
-		fmt.Fprintf(buf, "%d ", s.Refs()-1)
+		fmt.Fprintf(buf, "%d ", s.ReadRefs()-1)
 
 		// Field: Socket struct address. Redacted due to the same reason as
 		// the 'Num' field in /proc/net/unix, see netUnix.ReadSeqFileData.
@@ -589,7 +589,7 @@ func (d *netUDPData) Generate(ctx context.Context, buf *bytes.Buffer) error {
 
 		// Field: ref; reference count on the socket inode. Don't count the ref
 		// we obtain while deferencing the weakref to this socket.
-		fmt.Fprintf(buf, "%d ", s.Refs()-1)
+		fmt.Fprintf(buf, "%d ", s.ReadRefs()-1)
 
 		// Field: Socket struct address. Redacted due to the same reason as
 		// the 'Num' field in /proc/net/unix, see netUnix.ReadSeqFileData.
diff --git a/pkg/sentry/fsimpl/proc/tasks.go b/pkg/sentry/fsimpl/proc/tasks.go
index 1391992b7e..863c4467ef 100644
--- a/pkg/sentry/fsimpl/proc/tasks.go
+++ b/pkg/sentry/fsimpl/proc/tasks.go
@@ -37,6 +37,7 @@ const (
 //
 // +stateify savable
 type tasksInode struct {
+	tasksInodeRefs
 	kernfs.InodeNotSymlink
 	kernfs.InodeDirectoryNoNewChildren
 	kernfs.InodeAttrs
@@ -84,6 +85,7 @@ func (fs *filesystem) newTasksInode(k *kernel.Kernel, pidns *kernel.PIDNamespace
 		cgroupControllers: cgroupControllers,
 	}
 	inode.InodeAttrs.Init(root, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), linux.ModeDirectory|0555)
+	inode.EnableLeakCheck()
 
 	dentry := &kernfs.Dentry{}
 	dentry.Init(inode)
@@ -226,6 +228,11 @@ func (i *tasksInode) Stat(ctx context.Context, vsfs *vfs.Filesystem, opts vfs.St
 	return stat, nil
 }
 
+// DecRef implements kernfs.Inode.
+func (i *tasksInode) DecRef(context.Context) {
+	i.tasksInodeRefs.DecRef(i.Destroy)
+}
+
 // staticFileSetStat implements a special static file that allows inode
 // attributes to be set. This is to support /proc files that are readonly, but
 // allow attributes to be set.
diff --git a/pkg/sentry/fsimpl/sys/BUILD b/pkg/sentry/fsimpl/sys/BUILD
index f9b232da6e..906cd52cbd 100644
--- a/pkg/sentry/fsimpl/sys/BUILD
+++ b/pkg/sentry/fsimpl/sys/BUILD
@@ -1,10 +1,23 @@
 load("//tools:defs.bzl", "go_library", "go_test")
+load("//tools/go_generics:defs.bzl", "go_template_instance")
 
 licenses(["notice"])
 
+go_template_instance(
+    name = "dir_refs",
+    out = "dir_refs.go",
+    package = "sys",
+    prefix = "dir",
+    template = "//pkg/refs_vfs2:refs_template",
+    types = {
+        "T": "dir",
+    },
+)
+
 go_library(
     name = "sys",
     srcs = [
+        "dir_refs.go",
         "kcov.go",
         "sys.go",
     ],
@@ -13,6 +26,8 @@ go_library(
         "//pkg/abi/linux",
         "//pkg/context",
         "//pkg/coverage",
+        "//pkg/log",
+        "//pkg/refs",
         "//pkg/sentry/arch",
         "//pkg/sentry/fsimpl/kernfs",
         "//pkg/sentry/kernel",
diff --git a/pkg/sentry/fsimpl/sys/sys.go b/pkg/sentry/fsimpl/sys/sys.go
index 1f042d9f78..ea30a4ec2f 100644
--- a/pkg/sentry/fsimpl/sys/sys.go
+++ b/pkg/sentry/fsimpl/sys/sys.go
@@ -118,6 +118,7 @@ func (fs *filesystem) Release(ctx context.Context) {
 
 // dir implements kernfs.Inode.
 type dir struct {
+	dirRefs
 	kernfs.InodeAttrs
 	kernfs.InodeNoDynamicLookup
 	kernfs.InodeNotSymlink
@@ -133,6 +134,7 @@ func (fs *filesystem) newDir(creds *auth.Credentials, mode linux.FileMode, conte
 	d := &dir{}
 	d.InodeAttrs.Init(creds, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), linux.ModeDirectory|0755)
 	d.OrderedChildren.Init(kernfs.OrderedChildrenOptions{})
+	d.EnableLeakCheck()
 	d.dentry.Init(d)
 
 	d.IncLinks(d.OrderedChildren.Populate(&d.dentry, contents))
@@ -140,7 +142,7 @@ func (fs *filesystem) newDir(creds *auth.Credentials, mode linux.FileMode, conte
 	return &d.dentry
 }
 
-// SetStat implements Inode.SetStat not allowing inode attributes to be changed.
+// SetStat implements kernfs.Inode.SetStat not allowing inode attributes to be changed.
 func (*dir) SetStat(context.Context, *vfs.Filesystem, *auth.Credentials, vfs.SetStatOptions) error {
 	return syserror.EPERM
 }
@@ -156,6 +158,11 @@ func (d *dir) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentry,
 	return fd.VFSFileDescription(), nil
 }
 
+// DecRef implements kernfs.Inode.DecRef.
+func (d *dir) DecRef(context.Context) {
+	d.dirRefs.DecRef(d.Destroy)
+}
+
 // cpuFile implements kernfs.Inode.
 type cpuFile struct {
 	kernfs.DynamicBytesFile
diff --git a/pkg/sentry/kernel/BUILD b/pkg/sentry/kernel/BUILD
index d1ecceba3c..d436daab46 100644
--- a/pkg/sentry/kernel/BUILD
+++ b/pkg/sentry/kernel/BUILD
@@ -74,6 +74,50 @@ go_template_instance(
     },
 )
 
+go_template_instance(
+    name = "fd_table_refs",
+    out = "fd_table_refs.go",
+    package = "kernel",
+    prefix = "FDTable",
+    template = "//pkg/refs_vfs2:refs_template",
+    types = {
+        "T": "FDTable",
+    },
+)
+
+go_template_instance(
+    name = "fs_context_refs",
+    out = "fs_context_refs.go",
+    package = "kernel",
+    prefix = "FSContext",
+    template = "//pkg/refs_vfs2:refs_template",
+    types = {
+        "T": "FSContext",
+    },
+)
+
+go_template_instance(
+    name = "process_group_refs",
+    out = "process_group_refs.go",
+    package = "kernel",
+    prefix = "ProcessGroup",
+    template = "//pkg/refs_vfs2:refs_template",
+    types = {
+        "T": "ProcessGroup",
+    },
+)
+
+go_template_instance(
+    name = "session_refs",
+    out = "session_refs.go",
+    package = "kernel",
+    prefix = "Session",
+    template = "//pkg/refs_vfs2:refs_template",
+    types = {
+        "T": "Session",
+    },
+)
+
 proto_library(
     name = "uncaught_signal",
     srcs = ["uncaught_signal.proto"],
@@ -88,8 +132,10 @@ go_library(
         "aio.go",
         "context.go",
         "fd_table.go",
+        "fd_table_refs.go",
         "fd_table_unsafe.go",
         "fs_context.go",
+        "fs_context_refs.go",
         "ipc_namespace.go",
         "kcov.go",
         "kcov_unsafe.go",
@@ -101,6 +147,7 @@ go_library(
         "pending_signals_state.go",
         "posixtimer.go",
         "process_group_list.go",
+        "process_group_refs.go",
         "ptrace.go",
         "ptrace_amd64.go",
         "ptrace_arm64.go",
@@ -108,6 +155,7 @@ go_library(
         "seccomp.go",
         "seqatomic_taskgoroutineschedinfo_unsafe.go",
         "session_list.go",
+        "session_refs.go",
         "sessions.go",
         "signal.go",
         "signal_handlers.go",
diff --git a/pkg/sentry/kernel/fd_table.go b/pkg/sentry/kernel/fd_table.go
index ce53af69bd..5773244ac7 100644
--- a/pkg/sentry/kernel/fd_table.go
+++ b/pkg/sentry/kernel/fd_table.go
@@ -23,7 +23,6 @@ import (
 
 	"gvisor.dev/gvisor/pkg/abi/linux"
 	"gvisor.dev/gvisor/pkg/context"
-	"gvisor.dev/gvisor/pkg/refs"
 	"gvisor.dev/gvisor/pkg/sentry/fs"
 	"gvisor.dev/gvisor/pkg/sentry/fs/lock"
 	"gvisor.dev/gvisor/pkg/sentry/limits"
@@ -78,7 +77,8 @@ type descriptor struct {
 //
 // +stateify savable
 type FDTable struct {
-	refs.AtomicRefCount
+	FDTableRefs
+
 	k *Kernel
 
 	// mu protects below.
@@ -176,16 +176,15 @@ func (k *Kernel) NewFDTable() *FDTable {
 	return f
 }
 
-// destroy removes all of the file descriptors from the map.
-func (f *FDTable) destroy(ctx context.Context) {
-	f.RemoveIf(ctx, func(*fs.File, *vfs.FileDescription, FDFlags) bool {
-		return true
-	})
-}
-
-// DecRef implements RefCounter.DecRef with destructor f.destroy.
+// DecRef implements RefCounter.DecRef.
+//
+// If f reaches zero references, all of its file descriptors are removed.
 func (f *FDTable) DecRef(ctx context.Context) {
-	f.DecRefWithDestructor(ctx, f.destroy)
+	f.FDTableRefs.DecRef(func() {
+		f.RemoveIf(ctx, func(*fs.File, *vfs.FileDescription, FDFlags) bool {
+			return true
+		})
+	})
 }
 
 // Size returns the number of file descriptor slots currently allocated.
diff --git a/pkg/sentry/kernel/fd_table_unsafe.go b/pkg/sentry/kernel/fd_table_unsafe.go
index 7fd97dc53e..6b8feb1074 100644
--- a/pkg/sentry/kernel/fd_table_unsafe.go
+++ b/pkg/sentry/kernel/fd_table_unsafe.go
@@ -31,6 +31,8 @@ type descriptorTable struct {
 }
 
 // init initializes the table.
+//
+// TODO(gvisor.dev/1486): Enable leak check for FDTable.
 func (f *FDTable) init() {
 	var slice []unsafe.Pointer // Empty slice.
 	atomic.StorePointer(&f.slice, unsafe.Pointer(&slice))
diff --git a/pkg/sentry/kernel/fs_context.go b/pkg/sentry/kernel/fs_context.go
index 8f2d36d5a5..d46d1e1c18 100644
--- a/pkg/sentry/kernel/fs_context.go
+++ b/pkg/sentry/kernel/fs_context.go
@@ -18,7 +18,6 @@ import (
 	"fmt"
 
 	"gvisor.dev/gvisor/pkg/context"
-	"gvisor.dev/gvisor/pkg/refs"
 	"gvisor.dev/gvisor/pkg/sentry/fs"
 	"gvisor.dev/gvisor/pkg/sentry/vfs"
 	"gvisor.dev/gvisor/pkg/sync"
@@ -30,7 +29,7 @@ import (
 //
 // +stateify savable
 type FSContext struct {
-	refs.AtomicRefCount
+	FSContextRefs
 
 	// mu protects below.
 	mu sync.Mutex `state:"nosave"`
@@ -64,7 +63,7 @@ func newFSContext(root, cwd *fs.Dirent, umask uint) *FSContext {
 		cwd:   cwd,
 		umask: umask,
 	}
-	f.EnableLeakCheck("kernel.FSContext")
+	f.EnableLeakCheck()
 	return &f
 }
 
@@ -77,54 +76,56 @@ func NewFSContextVFS2(root, cwd vfs.VirtualDentry, umask uint) *FSContext {
 		cwdVFS2:  cwd,
 		umask:    umask,
 	}
-	f.EnableLeakCheck("kernel.FSContext")
+	f.EnableLeakCheck()
 	return &f
 }
 
-// destroy is the destructor for an FSContext.
+// DecRef implements RefCounter.DecRef.
 //
-// This will call DecRef on both root and cwd Dirents.  If either call to
-// DecRef returns an error, then it will be propagated.  If both calls to
-// DecRef return an error, then the one from root.DecRef will be propagated.
+// When f reaches zero references, DecRef will be called on both root and cwd
+// Dirents.
 //
 // Note that there may still be calls to WorkingDirectory() or RootDirectory()
 // (that return nil).  This is because valid references may still be held via
 // proc files or other mechanisms.
-func (f *FSContext) destroy(ctx context.Context) {
-	// Hold f.mu so that we don't race with RootDirectory() and
-	// WorkingDirectory().
-	f.mu.Lock()
-	defer f.mu.Unlock()
-
-	if VFS2Enabled {
-		f.rootVFS2.DecRef(ctx)
-		f.rootVFS2 = vfs.VirtualDentry{}
-		f.cwdVFS2.DecRef(ctx)
-		f.cwdVFS2 = vfs.VirtualDentry{}
-	} else {
-		f.root.DecRef(ctx)
-		f.root = nil
-		f.cwd.DecRef(ctx)
-		f.cwd = nil
-	}
-}
-
-// DecRef implements RefCounter.DecRef with destructor f.destroy.
 func (f *FSContext) DecRef(ctx context.Context) {
-	f.DecRefWithDestructor(ctx, f.destroy)
+	f.FSContextRefs.DecRef(func() {
+		// Hold f.mu so that we don't race with RootDirectory() and
+		// WorkingDirectory().
+		f.mu.Lock()
+		defer f.mu.Unlock()
+
+		if VFS2Enabled {
+			f.rootVFS2.DecRef(ctx)
+			f.rootVFS2 = vfs.VirtualDentry{}
+			f.cwdVFS2.DecRef(ctx)
+			f.cwdVFS2 = vfs.VirtualDentry{}
+		} else {
+			f.root.DecRef(ctx)
+			f.root = nil
+			f.cwd.DecRef(ctx)
+			f.cwd = nil
+		}
+	})
 }
 
 // Fork forks this FSContext.
 //
-// This is not a valid call after destroy.
+// This is not a valid call after f is destroyed.
 func (f *FSContext) Fork() *FSContext {
 	f.mu.Lock()
 	defer f.mu.Unlock()
 
 	if VFS2Enabled {
+		if !f.cwdVFS2.Ok() {
+			panic("FSContext.Fork() called after destroy")
+		}
 		f.cwdVFS2.IncRef()
 		f.rootVFS2.IncRef()
 	} else {
+		if f.cwd == nil {
+			panic("FSContext.Fork() called after destroy")
+		}
 		f.cwd.IncRef()
 		f.root.IncRef()
 	}
@@ -140,8 +141,8 @@ func (f *FSContext) Fork() *FSContext {
 
 // WorkingDirectory returns the current working directory.
 //
-// This will return nil if called after destroy(), otherwise it will return a
-// Dirent with a reference taken.
+// This will return nil if called after f is destroyed, otherwise it will return
+// a Dirent with a reference taken.
 func (f *FSContext) WorkingDirectory() *fs.Dirent {
 	f.mu.Lock()
 	defer f.mu.Unlock()
@@ -152,8 +153,8 @@ func (f *FSContext) WorkingDirectory() *fs.Dirent {
 
 // WorkingDirectoryVFS2 returns the current working directory.
 //
-// This will return nil if called after destroy(), otherwise it will return a
-// Dirent with a reference taken.
+// This will return nil if called after f is destroyed, otherwise it will return
+// a Dirent with a reference taken.
 func (f *FSContext) WorkingDirectoryVFS2() vfs.VirtualDentry {
 	f.mu.Lock()
 	defer f.mu.Unlock()
@@ -165,7 +166,7 @@ func (f *FSContext) WorkingDirectoryVFS2() vfs.VirtualDentry {
 // SetWorkingDirectory sets the current working directory.
 // This will take an extra reference on the Dirent.
 //
-// This is not a valid call after destroy.
+// This is not a valid call after f is destroyed.
 func (f *FSContext) SetWorkingDirectory(ctx context.Context, d *fs.Dirent) {
 	if d == nil {
 		panic("FSContext.SetWorkingDirectory called with nil dirent")
@@ -187,11 +188,15 @@ func (f *FSContext) SetWorkingDirectory(ctx context.Context, d *fs.Dirent) {
 // SetWorkingDirectoryVFS2 sets the current working directory.
 // This will take an extra reference on the VirtualDentry.
 //
-// This is not a valid call after destroy.
+// This is not a valid call after f is destroyed.
 func (f *FSContext) SetWorkingDirectoryVFS2(ctx context.Context, d vfs.VirtualDentry) {
 	f.mu.Lock()
 	defer f.mu.Unlock()
 
+	if !f.cwdVFS2.Ok() {
+		panic(fmt.Sprintf("FSContext.SetWorkingDirectoryVFS2(%v)) called after destroy", d))
+	}
+
 	old := f.cwdVFS2
 	f.cwdVFS2 = d
 	d.IncRef()
@@ -200,8 +205,8 @@ func (f *FSContext) SetWorkingDirectoryVFS2(ctx context.Context, d vfs.VirtualDe
 
 // RootDirectory returns the current filesystem root.
 //
-// This will return nil if called after destroy(), otherwise it will return a
-// Dirent with a reference taken.
+// This will return nil if called after f is destroyed, otherwise it will return
+// a Dirent with a reference taken.
 func (f *FSContext) RootDirectory() *fs.Dirent {
 	f.mu.Lock()
 	defer f.mu.Unlock()
@@ -213,8 +218,8 @@ func (f *FSContext) RootDirectory() *fs.Dirent {
 
 // RootDirectoryVFS2 returns the current filesystem root.
 //
-// This will return nil if called after destroy(), otherwise it will return a
-// Dirent with a reference taken.
+// This will return nil if called after f is destroyed, otherwise it will return
+// a Dirent with a reference taken.
 func (f *FSContext) RootDirectoryVFS2() vfs.VirtualDentry {
 	f.mu.Lock()
 	defer f.mu.Unlock()
@@ -226,7 +231,7 @@ func (f *FSContext) RootDirectoryVFS2() vfs.VirtualDentry {
 // SetRootDirectory sets the root directory.
 // This will take an extra reference on the Dirent.
 //
-// This is not a valid call after free.
+// This is not a valid call after f is destroyed.
 func (f *FSContext) SetRootDirectory(ctx context.Context, d *fs.Dirent) {
 	if d == nil {
 		panic("FSContext.SetRootDirectory called with nil dirent")
@@ -247,7 +252,7 @@ func (f *FSContext) SetRootDirectory(ctx context.Context, d *fs.Dirent) {
 
 // SetRootDirectoryVFS2 sets the root directory. It takes a reference on vd.
 //
-// This is not a valid call after free.
+// This is not a valid call after f is destroyed.
 func (f *FSContext) SetRootDirectoryVFS2(ctx context.Context, vd vfs.VirtualDentry) {
 	if !vd.Ok() {
 		panic("FSContext.SetRootDirectoryVFS2 called with zero-value VirtualDentry")
diff --git a/pkg/sentry/kernel/sessions.go b/pkg/sentry/kernel/sessions.go
index 5c4c622c29..df5c8421b1 100644
--- a/pkg/sentry/kernel/sessions.go
+++ b/pkg/sentry/kernel/sessions.go
@@ -16,8 +16,6 @@ package kernel
 
 import (
 	"gvisor.dev/gvisor/pkg/abi/linux"
-	"gvisor.dev/gvisor/pkg/context"
-	"gvisor.dev/gvisor/pkg/refs"
 	"gvisor.dev/gvisor/pkg/sentry/arch"
 	"gvisor.dev/gvisor/pkg/syserror"
 )
@@ -32,7 +30,7 @@ type ProcessGroupID ThreadID
 //
 // +stateify savable
 type Session struct {
-	refs refs.AtomicRefCount
+	SessionRefs
 
 	// leader is the originator of the Session.
 	//
@@ -62,16 +60,11 @@ type Session struct {
 	sessionEntry
 }
 
-// incRef grabs a reference.
-func (s *Session) incRef() {
-	s.refs.IncRef()
-}
-
-// decRef drops a reference.
+// DecRef drops a reference.
 //
 // Precondition: callers must hold TaskSet.mu for writing.
-func (s *Session) decRef() {
-	s.refs.DecRefWithDestructor(nil, func(context.Context) {
+func (s *Session) DecRef() {
+	s.SessionRefs.DecRef(func() {
 		// Remove translations from the leader.
 		for ns := s.leader.pidns; ns != nil; ns = ns.parent {
 			id := ns.sids[s]
@@ -88,7 +81,7 @@ func (s *Session) decRef() {
 //
 // +stateify savable
 type ProcessGroup struct {
-	refs refs.AtomicRefCount // not exported.
+	refs ProcessGroupRefs
 
 	// originator is the originator of the group.
 	//
@@ -163,7 +156,7 @@ func (pg *ProcessGroup) decRefWithParent(parentPG *ProcessGroup) {
 	}
 
 	alive := true
-	pg.refs.DecRefWithDestructor(nil, func(context.Context) {
+	pg.refs.DecRef(func() {
 		alive = false // don't bother with handleOrphan.
 
 		// Remove translations from the originator.
@@ -175,7 +168,7 @@ func (pg *ProcessGroup) decRefWithParent(parentPG *ProcessGroup) {
 
 		// Remove the list of process groups.
 		pg.session.processGroups.Remove(pg)
-		pg.session.decRef()
+		pg.session.DecRef()
 	})
 	if alive {
 		pg.handleOrphan()
@@ -302,7 +295,7 @@ func (tg *ThreadGroup) createSession() error {
 		id:     SessionID(id),
 		leader: tg,
 	}
-	s.refs.EnableLeakCheck("kernel.Session")
+	s.EnableLeakCheck()
 
 	// Create a new ProcessGroup, belonging to that Session.
 	// This also has a single reference (assigned below).
@@ -316,7 +309,7 @@ func (tg *ThreadGroup) createSession() error {
 		session:    s,
 		ancestors:  0,
 	}
-	pg.refs.EnableLeakCheck("kernel.ProcessGroup")
+	pg.refs.EnableLeakCheck()
 
 	// Tie them and return the result.
 	s.processGroups.PushBack(pg)
@@ -396,13 +389,13 @@ func (tg *ThreadGroup) CreateProcessGroup() error {
 	//
 	// We manually adjust the ancestors if the parent is in the same
 	// session.
-	tg.processGroup.session.incRef()
+	tg.processGroup.session.IncRef()
 	pg := ProcessGroup{
 		id:         ProcessGroupID(id),
 		originator: tg,
 		session:    tg.processGroup.session,
 	}
-	pg.refs.EnableLeakCheck("kernel.ProcessGroup")
+	pg.refs.EnableLeakCheck()
 
 	if tg.leader.parent != nil && tg.leader.parent.tg.processGroup.session == pg.session {
 		pg.ancestors++
diff --git a/pkg/sentry/kernel/shm/BUILD b/pkg/sentry/kernel/shm/BUILD
index c211fc8d0d..b7e4b480d8 100644
--- a/pkg/sentry/kernel/shm/BUILD
+++ b/pkg/sentry/kernel/shm/BUILD
@@ -1,12 +1,25 @@
 load("//tools:defs.bzl", "go_library")
+load("//tools/go_generics:defs.bzl", "go_template_instance")
 
 package(licenses = ["notice"])
 
+go_template_instance(
+    name = "shm_refs",
+    out = "shm_refs.go",
+    package = "shm",
+    prefix = "Shm",
+    template = "//pkg/refs_vfs2:refs_template",
+    types = {
+        "T": "Shm",
+    },
+)
+
 go_library(
     name = "shm",
     srcs = [
         "device.go",
         "shm.go",
+        "shm_refs.go",
     ],
     visibility = ["//pkg/sentry:internal"],
     deps = [
diff --git a/pkg/sentry/kernel/shm/shm.go b/pkg/sentry/kernel/shm/shm.go
index 13ec7afe0d..00c03585e7 100644
--- a/pkg/sentry/kernel/shm/shm.go
+++ b/pkg/sentry/kernel/shm/shm.go
@@ -39,7 +39,6 @@ import (
 	"gvisor.dev/gvisor/pkg/abi/linux"
 	"gvisor.dev/gvisor/pkg/context"
 	"gvisor.dev/gvisor/pkg/log"
-	"gvisor.dev/gvisor/pkg/refs"
 	"gvisor.dev/gvisor/pkg/sentry/fs"
 	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
 	ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
@@ -252,7 +251,7 @@ func (r *Registry) newShm(ctx context.Context, pid int32, key Key, creator fs.Fi
 		creatorPID:    pid,
 		changeTime:    ktime.NowFromContext(ctx),
 	}
-	shm.EnableLeakCheck("kernel.Shm")
+	shm.EnableLeakCheck()
 
 	// Find the next available ID.
 	for id := r.lastIDUsed + 1; id != r.lastIDUsed; id++ {
@@ -337,14 +336,14 @@ func (r *Registry) remove(s *Shm) {
 //
 // +stateify savable
 type Shm struct {
-	// AtomicRefCount tracks the number of references to this segment.
+	// ShmRefs tracks the number of references to this segment.
 	//
 	// A segment holds a reference to itself until it is marked for
 	// destruction.
 	//
 	// In addition to direct users, the MemoryManager will hold references
 	// via MappingIdentity.
-	refs.AtomicRefCount
+	ShmRefs
 
 	mfp pgalloc.MemoryFileProvider
 
@@ -428,11 +427,14 @@ func (s *Shm) InodeID() uint64 {
 	return uint64(s.ID)
 }
 
-// DecRef overrides refs.RefCount.DecRef with a destructor.
+// DecRef drops a reference on s.
 //
 // Precondition: Caller must not hold s.mu.
 func (s *Shm) DecRef(ctx context.Context) {
-	s.DecRefWithDestructor(ctx, s.destroy)
+	s.ShmRefs.DecRef(func() {
+		s.mfp.MemoryFile().DecRef(s.fr)
+		s.registry.remove(s)
+	})
 }
 
 // Msync implements memmap.MappingIdentity.Msync. Msync is a no-op for shm
@@ -642,11 +644,6 @@ func (s *Shm) Set(ctx context.Context, ds *linux.ShmidDS) error {
 	return nil
 }
 
-func (s *Shm) destroy(context.Context) {
-	s.mfp.MemoryFile().DecRef(s.fr)
-	s.registry.remove(s)
-}
-
 // MarkDestroyed marks a segment for destruction. The segment is actually
 // destroyed once it has no references. MarkDestroyed may be called multiple
 // times, and is safe to call after a segment has already been destroyed. See
diff --git a/pkg/sentry/mm/BUILD b/pkg/sentry/mm/BUILD
index f9d0837a1b..b4a47cccaf 100644
--- a/pkg/sentry/mm/BUILD
+++ b/pkg/sentry/mm/BUILD
@@ -73,12 +73,35 @@ go_template_instance(
     },
 )
 
+go_template_instance(
+    name = "aio_mappable_refs",
+    out = "aio_mappable_refs.go",
+    package = "mm",
+    prefix = "aioMappable",
+    template = "//pkg/refs_vfs2:refs_template",
+    types = {
+        "T": "aioMappable",
+    },
+)
+
+go_template_instance(
+    name = "special_mappable_refs",
+    out = "special_mappable_refs.go",
+    package = "mm",
+    prefix = "SpecialMappable",
+    template = "//pkg/refs_vfs2:refs_template",
+    types = {
+        "T": "SpecialMappable",
+    },
+)
+
 go_library(
     name = "mm",
     srcs = [
         "address_space.go",
         "aio_context.go",
         "aio_context_state.go",
+        "aio_mappable_refs.go",
         "debug.go",
         "file_refcount_set.go",
         "io.go",
@@ -92,6 +115,7 @@ go_library(
         "save_restore.go",
         "shm.go",
         "special_mappable.go",
+        "special_mappable_refs.go",
         "syscalls.go",
         "vma.go",
         "vma_set.go",
diff --git a/pkg/sentry/mm/aio_context.go b/pkg/sentry/mm/aio_context.go
index 16fea53c43..7bf48cb2c2 100644
--- a/pkg/sentry/mm/aio_context.go
+++ b/pkg/sentry/mm/aio_context.go
@@ -17,7 +17,6 @@ package mm
 import (
 	"gvisor.dev/gvisor/pkg/abi/linux"
 	"gvisor.dev/gvisor/pkg/context"
-	"gvisor.dev/gvisor/pkg/refs"
 	"gvisor.dev/gvisor/pkg/sentry/memmap"
 	"gvisor.dev/gvisor/pkg/sentry/pgalloc"
 	"gvisor.dev/gvisor/pkg/sentry/usage"
@@ -239,7 +238,7 @@ func (ctx *AIOContext) Drain() {
 //
 // +stateify savable
 type aioMappable struct {
-	refs.AtomicRefCount
+	aioMappableRefs
 
 	mfp pgalloc.MemoryFileProvider
 	fr  memmap.FileRange
@@ -253,13 +252,13 @@ func newAIOMappable(mfp pgalloc.MemoryFileProvider) (*aioMappable, error) {
 		return nil, err
 	}
 	m := aioMappable{mfp: mfp, fr: fr}
-	m.EnableLeakCheck("mm.aioMappable")
+	m.EnableLeakCheck()
 	return &m, nil
 }
 
 // DecRef implements refs.RefCounter.DecRef.
 func (m *aioMappable) DecRef(ctx context.Context) {
-	m.AtomicRefCount.DecRefWithDestructor(ctx, func(context.Context) {
+	m.aioMappableRefs.DecRef(func() {
 		m.mfp.MemoryFile().DecRef(m.fr)
 	})
 }
diff --git a/pkg/sentry/mm/special_mappable.go b/pkg/sentry/mm/special_mappable.go
index 4cdb52eb6f..f4c93baeba 100644
--- a/pkg/sentry/mm/special_mappable.go
+++ b/pkg/sentry/mm/special_mappable.go
@@ -16,7 +16,6 @@ package mm
 
 import (
 	"gvisor.dev/gvisor/pkg/context"
-	"gvisor.dev/gvisor/pkg/refs"
 	"gvisor.dev/gvisor/pkg/sentry/memmap"
 	"gvisor.dev/gvisor/pkg/sentry/pgalloc"
 	"gvisor.dev/gvisor/pkg/sentry/usage"
@@ -31,7 +30,7 @@ import (
 //
 // +stateify savable
 type SpecialMappable struct {
-	refs.AtomicRefCount
+	SpecialMappableRefs
 
 	mfp  pgalloc.MemoryFileProvider
 	fr   memmap.FileRange
@@ -45,13 +44,13 @@ type SpecialMappable struct {
 // Preconditions: fr.Length() != 0.
 func NewSpecialMappable(name string, mfp pgalloc.MemoryFileProvider, fr memmap.FileRange) *SpecialMappable {
 	m := SpecialMappable{mfp: mfp, fr: fr, name: name}
-	m.EnableLeakCheck("mm.SpecialMappable")
+	m.EnableLeakCheck()
 	return &m
 }
 
 // DecRef implements refs.RefCounter.DecRef.
 func (m *SpecialMappable) DecRef(ctx context.Context) {
-	m.AtomicRefCount.DecRefWithDestructor(ctx, func(context.Context) {
+	m.SpecialMappableRefs.DecRef(func() {
 		m.mfp.MemoryFile().DecRef(m.fr)
 	})
 }
diff --git a/pkg/sentry/socket/unix/transport/BUILD b/pkg/sentry/socket/unix/transport/BUILD
index c708b60305..26c3a51b98 100644
--- a/pkg/sentry/socket/unix/transport/BUILD
+++ b/pkg/sentry/socket/unix/transport/BUILD
@@ -15,6 +15,17 @@ go_template_instance(
     },
 )
 
+go_template_instance(
+    name = "queue_refs",
+    out = "queue_refs.go",
+    package = "transport",
+    prefix = "queue",
+    template = "//pkg/refs_vfs2:refs_template",
+    types = {
+        "T": "queue",
+    },
+)
+
 go_library(
     name = "transport",
     srcs = [
@@ -22,6 +33,7 @@ go_library(
         "connectioned_state.go",
         "connectionless.go",
         "queue.go",
+        "queue_refs.go",
         "transport_message_list.go",
         "unix.go",
     ],
diff --git a/pkg/sentry/socket/unix/transport/connectioned.go b/pkg/sentry/socket/unix/transport/connectioned.go
index c67b602f04..e3a75b519a 100644
--- a/pkg/sentry/socket/unix/transport/connectioned.go
+++ b/pkg/sentry/socket/unix/transport/connectioned.go
@@ -142,9 +142,9 @@ func NewPair(ctx context.Context, stype linux.SockType, uid UniqueIDProvider) (E
 	}
 
 	q1 := &queue{ReaderQueue: a.Queue, WriterQueue: b.Queue, limit: initialLimit}
-	q1.EnableLeakCheck("transport.queue")
+	q1.EnableLeakCheck()
 	q2 := &queue{ReaderQueue: b.Queue, WriterQueue: a.Queue, limit: initialLimit}
-	q2.EnableLeakCheck("transport.queue")
+	q2.EnableLeakCheck()
 
 	if stype == linux.SOCK_STREAM {
 		a.receiver = &streamQueueReceiver{queueReceiver: queueReceiver{q1}}
@@ -300,14 +300,14 @@ func (e *connectionedEndpoint) BidirectionalConnect(ctx context.Context, ce Conn
 	}
 
 	readQueue := &queue{ReaderQueue: ce.WaiterQueue(), WriterQueue: ne.Queue, limit: initialLimit}
-	readQueue.EnableLeakCheck("transport.queue")
+	readQueue.EnableLeakCheck()
 	ne.connected = &connectedEndpoint{
 		endpoint:   ce,
 		writeQueue: readQueue,
 	}
 
 	writeQueue := &queue{ReaderQueue: ne.Queue, WriterQueue: ce.WaiterQueue(), limit: initialLimit}
-	writeQueue.EnableLeakCheck("transport.queue")
+	writeQueue.EnableLeakCheck()
 	if e.stype == linux.SOCK_STREAM {
 		ne.receiver = &streamQueueReceiver{queueReceiver: queueReceiver{readQueue: writeQueue}}
 	} else {
diff --git a/pkg/sentry/socket/unix/transport/connectionless.go b/pkg/sentry/socket/unix/transport/connectionless.go
index 70ee8f9b8a..4751b2fd87 100644
--- a/pkg/sentry/socket/unix/transport/connectionless.go
+++ b/pkg/sentry/socket/unix/transport/connectionless.go
@@ -42,7 +42,7 @@ var (
 func NewConnectionless(ctx context.Context) Endpoint {
 	ep := &connectionlessEndpoint{baseEndpoint{Queue: &waiter.Queue{}}}
 	q := queue{ReaderQueue: ep.Queue, WriterQueue: &waiter.Queue{}, limit: initialLimit}
-	q.EnableLeakCheck("transport.queue")
+	q.EnableLeakCheck()
 	ep.receiver = &queueReceiver{readQueue: &q}
 	return ep
 }
diff --git a/pkg/sentry/socket/unix/transport/queue.go b/pkg/sentry/socket/unix/transport/queue.go
index ef6043e199..342def28f7 100644
--- a/pkg/sentry/socket/unix/transport/queue.go
+++ b/pkg/sentry/socket/unix/transport/queue.go
@@ -16,7 +16,6 @@ package transport
 
 import (
 	"gvisor.dev/gvisor/pkg/context"
-	"gvisor.dev/gvisor/pkg/refs"
 	"gvisor.dev/gvisor/pkg/sync"
 	"gvisor.dev/gvisor/pkg/syserr"
 	"gvisor.dev/gvisor/pkg/tcpip"
@@ -28,7 +27,7 @@ import (
 //
 // +stateify savable
 type queue struct {
-	refs.AtomicRefCount
+	queueRefs
 
 	ReaderQueue *waiter.Queue
 	WriterQueue *waiter.Queue
@@ -68,11 +67,13 @@ func (q *queue) Reset(ctx context.Context) {
 	q.mu.Unlock()
 }
 
-// DecRef implements RefCounter.DecRef with destructor q.Reset.
+// DecRef implements RefCounter.DecRef.
 func (q *queue) DecRef(ctx context.Context) {
-	q.DecRefWithDestructor(ctx, q.Reset)
-	// We don't need to notify after resetting because no one cares about
-	// this queue after all references have been dropped.
+	q.queueRefs.DecRef(func() {
+		// We don't need to notify after resetting because no one cares about
+		// this queue after all references have been dropped.
+		q.Reset(ctx)
+	})
 }
 
 // IsReadable determines if q is currently readable.
diff --git a/pkg/sentry/vfs/BUILD b/pkg/sentry/vfs/BUILD
index 642769e7cf..8093ca55c0 100644
--- a/pkg/sentry/vfs/BUILD
+++ b/pkg/sentry/vfs/BUILD
@@ -27,6 +27,39 @@ go_template_instance(
     },
 )
 
+go_template_instance(
+    name = "file_description_refs",
+    out = "file_description_refs.go",
+    package = "vfs",
+    prefix = "FileDescription",
+    template = "//pkg/refs_vfs2:refs_template",
+    types = {
+        "T": "FileDescription",
+    },
+)
+
+go_template_instance(
+    name = "mount_namespace_refs",
+    out = "mount_namespace_refs.go",
+    package = "vfs",
+    prefix = "MountNamespace",
+    template = "//pkg/refs_vfs2:refs_template",
+    types = {
+        "T": "MountNamespace",
+    },
+)
+
+go_template_instance(
+    name = "filesystem_refs",
+    out = "filesystem_refs.go",
+    package = "vfs",
+    prefix = "Filesystem",
+    template = "//pkg/refs_vfs2:refs_template",
+    types = {
+        "T": "Filesystem",
+    },
+)
+
 go_library(
     name = "vfs",
     srcs = [
@@ -40,12 +73,15 @@ go_library(
         "event_list.go",
         "file_description.go",
         "file_description_impl_util.go",
+        "file_description_refs.go",
         "filesystem.go",
         "filesystem_impl_util.go",
+        "filesystem_refs.go",
         "filesystem_type.go",
         "inotify.go",
         "lock.go",
         "mount.go",
+        "mount_namespace_refs.go",
         "mount_unsafe.go",
         "options.go",
         "pathname.go",
@@ -63,6 +99,7 @@ go_library(
         "//pkg/fspath",
         "//pkg/gohacks",
         "//pkg/log",
+        "//pkg/refs",
         "//pkg/safemem",
         "//pkg/sentry/arch",
         "//pkg/sentry/fs",
diff --git a/pkg/sentry/vfs/README.md b/pkg/sentry/vfs/README.md
index 4b9faf2ea7..5aad31b782 100644
--- a/pkg/sentry/vfs/README.md
+++ b/pkg/sentry/vfs/README.md
@@ -184,12 +184,3 @@ This construction, which is essentially a type-safe analogue to Linux's
     -   File locking
 
     -   `O_ASYNC`
-
--   Reference counts in the `vfs` package do not use the `refs` package since
-    `refs.AtomicRefCount` adds 64 bytes of overhead to each 8-byte reference
-    count, resulting in considerable cache bloat. 24 bytes of this overhead is
-    for weak reference support, which have poor performance and will not be used
-    by VFS2. The remaining 40 bytes is to store a descriptive string and stack
-    trace for reference leak checking; we can support reference leak checking
-    without incurring this space overhead by including the applicable
-    information directly in finalizers for applicable types.
diff --git a/pkg/sentry/vfs/file_description.go b/pkg/sentry/vfs/file_description.go
index 3219a9e137..22a54fa487 100644
--- a/pkg/sentry/vfs/file_description.go
+++ b/pkg/sentry/vfs/file_description.go
@@ -38,9 +38,7 @@ import (
 //
 // FileDescription is analogous to Linux's struct file.
 type FileDescription struct {
-	// refs is the reference count. refs is accessed using atomic memory
-	// operations.
-	refs int64
+	FileDescriptionRefs
 
 	// flagsMu protects statusFlags and asyncHandler below.
 	flagsMu sync.Mutex
@@ -131,7 +129,7 @@ func (fd *FileDescription) Init(impl FileDescriptionImpl, flags uint32, mnt *Mou
 		}
 	}
 
-	fd.refs = 1
+	fd.EnableLeakCheck()
 
 	// Remove "file creation flags" to mirror the behavior from file.f_flags in
 	// fs/open.c:do_dentry_open.
@@ -149,30 +147,9 @@ func (fd *FileDescription) Init(impl FileDescriptionImpl, flags uint32, mnt *Mou
 	return nil
 }
 
-// IncRef increments fd's reference count.
-func (fd *FileDescription) IncRef() {
-	atomic.AddInt64(&fd.refs, 1)
-}
-
-// TryIncRef increments fd's reference count and returns true. If fd's
-// reference count is already zero, TryIncRef does nothing and returns false.
-//
-// TryIncRef does not require that a reference is held on fd.
-func (fd *FileDescription) TryIncRef() bool {
-	for {
-		refs := atomic.LoadInt64(&fd.refs)
-		if refs <= 0 {
-			return false
-		}
-		if atomic.CompareAndSwapInt64(&fd.refs, refs, refs+1) {
-			return true
-		}
-	}
-}
-
 // DecRef decrements fd's reference count.
 func (fd *FileDescription) DecRef(ctx context.Context) {
-	if refs := atomic.AddInt64(&fd.refs, -1); refs == 0 {
+	fd.FileDescriptionRefs.DecRef(func() {
 		// Unregister fd from all epoll instances.
 		fd.epollMu.Lock()
 		epolls := fd.epolls
@@ -208,15 +185,7 @@ func (fd *FileDescription) DecRef(ctx context.Context) {
 		}
 		fd.asyncHandler = nil
 		fd.flagsMu.Unlock()
-	} else if refs < 0 {
-		panic("FileDescription.DecRef() called without holding a reference")
-	}
-}
-
-// Refs returns the current number of references. The returned count
-// is inherently racy and is unsafe to use without external synchronization.
-func (fd *FileDescription) Refs() int64 {
-	return atomic.LoadInt64(&fd.refs)
+	})
 }
 
 // Mount returns the mount on which fd was opened. It does not take a reference
diff --git a/pkg/sentry/vfs/filesystem.go b/pkg/sentry/vfs/filesystem.go
index 2c60cfab28..46851f638e 100644
--- a/pkg/sentry/vfs/filesystem.go
+++ b/pkg/sentry/vfs/filesystem.go
@@ -15,8 +15,6 @@
 package vfs
 
 import (
-	"sync/atomic"
-
 	"gvisor.dev/gvisor/pkg/abi/linux"
 	"gvisor.dev/gvisor/pkg/context"
 	"gvisor.dev/gvisor/pkg/fspath"
@@ -34,9 +32,7 @@ import (
 //
 // +stateify savable
 type Filesystem struct {
-	// refs is the reference count. refs is accessed using atomic memory
-	// operations.
-	refs int64
+	FilesystemRefs
 
 	// vfs is the VirtualFilesystem that uses this Filesystem. vfs is
 	// immutable.
@@ -52,7 +48,7 @@ type Filesystem struct {
 
 // Init must be called before first use of fs.
 func (fs *Filesystem) Init(vfsObj *VirtualFilesystem, fsType FilesystemType, impl FilesystemImpl) {
-	fs.refs = 1
+	fs.EnableLeakCheck()
 	fs.vfs = vfsObj
 	fs.fsType = fsType
 	fs.impl = impl
@@ -76,39 +72,14 @@ func (fs *Filesystem) Impl() FilesystemImpl {
 	return fs.impl
 }
 
-// IncRef increments fs' reference count.
-func (fs *Filesystem) IncRef() {
-	if atomic.AddInt64(&fs.refs, 1) <= 1 {
-		panic("Filesystem.IncRef() called without holding a reference")
-	}
-}
-
-// TryIncRef increments fs' reference count and returns true. If fs' reference
-// count is zero, TryIncRef does nothing and returns false.
-//
-// TryIncRef does not require that a reference is held on fs.
-func (fs *Filesystem) TryIncRef() bool {
-	for {
-		refs := atomic.LoadInt64(&fs.refs)
-		if refs <= 0 {
-			return false
-		}
-		if atomic.CompareAndSwapInt64(&fs.refs, refs, refs+1) {
-			return true
-		}
-	}
-}
-
 // DecRef decrements fs' reference count.
 func (fs *Filesystem) DecRef(ctx context.Context) {
-	if refs := atomic.AddInt64(&fs.refs, -1); refs == 0 {
+	fs.FilesystemRefs.DecRef(func() {
 		fs.vfs.filesystemsMu.Lock()
 		delete(fs.vfs.filesystems, fs)
 		fs.vfs.filesystemsMu.Unlock()
 		fs.impl.Release(ctx)
-	} else if refs < 0 {
-		panic("Filesystem.decRef() called without holding a reference")
-	}
+	})
 }
 
 // FilesystemImpl contains implementation details for a Filesystem.
diff --git a/pkg/sentry/vfs/mount.go b/pkg/sentry/vfs/mount.go
index cd5456eefa..db5fb3bb12 100644
--- a/pkg/sentry/vfs/mount.go
+++ b/pkg/sentry/vfs/mount.go
@@ -128,16 +128,14 @@ func (mnt *Mount) Options() MountOptions {
 //
 // +stateify savable
 type MountNamespace struct {
+	MountNamespaceRefs
+
 	// Owner is the usernamespace that owns this mount namespace.
 	Owner *auth.UserNamespace
 
 	// root is the MountNamespace's root mount. root is immutable.
 	root *Mount
 
-	// refs is the reference count. refs is accessed using atomic memory
-	// operations.
-	refs int64
-
 	// mountpoints maps all Dentries which are mount points in this namespace
 	// to the number of Mounts for which they are mount points. mountpoints is
 	// protected by VirtualFilesystem.mountMu.
@@ -168,9 +166,9 @@ func (vfs *VirtualFilesystem) NewMountNamespace(ctx context.Context, creds *auth
 	}
 	mntns := &MountNamespace{
 		Owner:       creds.UserNamespace,
-		refs:        1,
 		mountpoints: make(map[*Dentry]uint32),
 	}
+	mntns.EnableLeakCheck()
 	mntns.root = newMount(vfs, fs, root, mntns, &MountOptions{})
 	return mntns, nil
 }
@@ -509,17 +507,10 @@ func (mnt *Mount) DecRef(ctx context.Context) {
 	}
 }
 
-// IncRef increments mntns' reference count.
-func (mntns *MountNamespace) IncRef() {
-	if atomic.AddInt64(&mntns.refs, 1) <= 1 {
-		panic("MountNamespace.IncRef() called without holding a reference")
-	}
-}
-
 // DecRef decrements mntns' reference count.
 func (mntns *MountNamespace) DecRef(ctx context.Context) {
 	vfs := mntns.root.fs.VirtualFilesystem()
-	if refs := atomic.AddInt64(&mntns.refs, -1); refs == 0 {
+	mntns.MountNamespaceRefs.DecRef(func() {
 		vfs.mountMu.Lock()
 		vfs.mounts.seq.BeginWrite()
 		vdsToDecRef, mountsToDecRef := vfs.umountRecursiveLocked(mntns.root, &umountRecursiveOptions{
@@ -533,9 +524,7 @@ func (mntns *MountNamespace) DecRef(ctx context.Context) {
 		for _, mnt := range mountsToDecRef {
 			mnt.DecRef(ctx)
 		}
-	} else if refs < 0 {
-		panic("MountNamespace.DecRef() called without holding a reference")
-	}
+	})
 }
 
 // getMountAt returns the last Mount in the stack mounted at (mnt, d). It takes
diff --git a/pkg/tcpip/link/tun/BUILD b/pkg/tcpip/link/tun/BUILD
index 6c137f693c..0243424f65 100644
--- a/pkg/tcpip/link/tun/BUILD
+++ b/pkg/tcpip/link/tun/BUILD
@@ -1,18 +1,32 @@
 load("//tools:defs.bzl", "go_library")
+load("//tools/go_generics:defs.bzl", "go_template_instance")
 
 package(licenses = ["notice"])
 
+go_template_instance(
+    name = "tun_endpoint_refs",
+    out = "tun_endpoint_refs.go",
+    package = "tun",
+    prefix = "tunEndpoint",
+    template = "//pkg/refs_vfs2:refs_template",
+    types = {
+        "T": "tunEndpoint",
+    },
+)
+
 go_library(
     name = "tun",
     srcs = [
         "device.go",
         "protocol.go",
+        "tun_endpoint_refs.go",
         "tun_unsafe.go",
     ],
     visibility = ["//visibility:public"],
     deps = [
         "//pkg/abi/linux",
         "//pkg/context",
+        "//pkg/log",
         "//pkg/refs",
         "//pkg/sync",
         "//pkg/syserror",
diff --git a/pkg/tcpip/link/tun/device.go b/pkg/tcpip/link/tun/device.go
index 3b1510a339..b6ddbe81eb 100644
--- a/pkg/tcpip/link/tun/device.go
+++ b/pkg/tcpip/link/tun/device.go
@@ -19,7 +19,6 @@ import (
 
 	"gvisor.dev/gvisor/pkg/abi/linux"
 	"gvisor.dev/gvisor/pkg/context"
-	"gvisor.dev/gvisor/pkg/refs"
 	"gvisor.dev/gvisor/pkg/sync"
 	"gvisor.dev/gvisor/pkg/syserror"
 	"gvisor.dev/gvisor/pkg/tcpip"
@@ -135,6 +134,7 @@ func attachOrCreateNIC(s *stack.Stack, name, prefix string, linkCaps stack.LinkE
 
 		// 2. Creating a new NIC.
 		id := tcpip.NICID(s.UniqueID())
+		// TODO(gvisor.dev/1486): enable leak check for tunEndpoint.
 		endpoint := &tunEndpoint{
 			Endpoint: channel.New(defaultDevOutQueueLen, defaultDevMtu, ""),
 			stack:    s,
@@ -331,19 +331,18 @@ func (d *Device) WriteNotify() {
 // It is ref-counted as multiple opening files can attach to the same NIC.
 // The last owner is responsible for deleting the NIC.
 type tunEndpoint struct {
+	tunEndpointRefs
 	*channel.Endpoint
 
-	refs.AtomicRefCount
-
 	stack *stack.Stack
 	nicID tcpip.NICID
 	name  string
 	isTap bool
 }
 
-// DecRef decrements refcount of e, removes NIC if refcount goes to 0.
+// DecRef decrements refcount of e, removing NIC if it reaches 0.
 func (e *tunEndpoint) DecRef(ctx context.Context) {
-	e.DecRefWithDestructor(ctx, func(context.Context) {
+	e.tunEndpointRefs.DecRef(func() {
 		e.stack.RemoveNIC(e.nicID)
 	})
 }

From 5116c7be8d64934980937113870a8fbc82b14ea0 Mon Sep 17 00:00:00 2001
From: Zach Koopmans <zkoopmans@google.com>
Date: Tue, 25 Aug 2020 22:01:00 -0700
Subject: [PATCH 087/211] Fix SocketPairTest and BadSocketPairTest in
 opensource.

PiperOrigin-RevId: 328467152
---
 test/syscalls/linux/socket_inet_loopback.cc  | 11 +++++++----
 test/syscalls/linux/socket_ip_udp_generic.cc |  6 ++++--
 2 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/test/syscalls/linux/socket_inet_loopback.cc b/test/syscalls/linux/socket_inet_loopback.cc
index bd30fb86b8..7c1d6a4148 100644
--- a/test/syscalls/linux/socket_inet_loopback.cc
+++ b/test/syscalls/linux/socket_inet_loopback.cc
@@ -97,11 +97,13 @@ TEST(BadSocketPairArgs, ValidateErrForBadCallsToSocketPair) {
   ASSERT_THAT(socketpair(AF_INET6, 0, 0, fd),
               SyscallFailsWithErrno(ESOCKTNOSUPPORT));
 
-  // Invalid AF will return ENOAFSUPPORT.
+  // Invalid AF will return ENOAFSUPPORT or EPERM.
   ASSERT_THAT(socketpair(AF_MAX, 0, 0, fd),
-              SyscallFailsWithErrno(EAFNOSUPPORT));
+              ::testing::AnyOf(SyscallFailsWithErrno(EAFNOSUPPORT),
+                               SyscallFailsWithErrno(EPERM)));
   ASSERT_THAT(socketpair(8675309, 0, 0, fd),
-              SyscallFailsWithErrno(EAFNOSUPPORT));
+              ::testing::AnyOf(SyscallFailsWithErrno(EAFNOSUPPORT),
+                               SyscallFailsWithErrno(EPERM)));
 }
 
 enum class Operation {
@@ -116,7 +118,8 @@ std::string OperationToString(Operation operation) {
       return "Bind";
     case Operation::Connect:
       return "Connect";
-    case Operation::SendTo:
+    // Operation::SendTo is the default.
+    default:
       return "SendTo";
   }
 }
diff --git a/test/syscalls/linux/socket_ip_udp_generic.cc b/test/syscalls/linux/socket_ip_udp_generic.cc
index 5cad6f0174..6e4ecd6806 100644
--- a/test/syscalls/linux/socket_ip_udp_generic.cc
+++ b/test/syscalls/linux/socket_ip_udp_generic.cc
@@ -435,8 +435,10 @@ TEST_P(UDPSocketPairTest, TOSRecvMismatch) {
 
 // Test that an IPv4 socket does not support the IPv6 TClass option.
 TEST_P(UDPSocketPairTest, TClassRecvMismatch) {
-  // This should only test AF_INET sockets for the mismatch behavior.
-  SKIP_IF(GetParam().domain != AF_INET);
+  // This should only test AF_INET6 sockets for the mismatch behavior.
+  SKIP_IF(GetParam().domain != AF_INET6);
+  // IPV6_RECVTCLASS is only valid for SOCK_DGRAM and SOCK_RAW.
+  SKIP_IF(GetParam().type != SOCK_DGRAM | GetParam().type != SOCK_RAW);
 
   auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
 

From 2a322c451e0a04df55d8fa4ea6e055da39231efa Mon Sep 17 00:00:00 2001
From: Nicolas Lacasse <nlacasse@google.com>
Date: Wed, 26 Aug 2020 10:03:46 -0700
Subject: [PATCH 088/211] tmpfs: Allow xattrs in the trusted namespace if creds
 has CAP_SYS_ADMIN.

This is needed to support the overlay opaque attribute.

PiperOrigin-RevId: 328552985
---
 pkg/abi/linux/xattr.go                  |  3 +
 pkg/sentry/fsimpl/overlay/filesystem.go |  2 +-
 pkg/sentry/fsimpl/tmpfs/tmpfs.go        | 68 ++++++++++++++---------
 test/syscalls/linux/memfd.cc            | 17 +-----
 test/syscalls/linux/xattr.cc            | 74 +++++++++++++++++++++++++
 test/util/fs_util.cc                    | 20 +++++++
 test/util/fs_util.h                     |  6 ++
 7 files changed, 147 insertions(+), 43 deletions(-)

diff --git a/pkg/abi/linux/xattr.go b/pkg/abi/linux/xattr.go
index 99180b208b..8ef837f27a 100644
--- a/pkg/abi/linux/xattr.go
+++ b/pkg/abi/linux/xattr.go
@@ -23,6 +23,9 @@ const (
 	XATTR_CREATE  = 1
 	XATTR_REPLACE = 2
 
+	XATTR_TRUSTED_PREFIX     = "trusted."
+	XATTR_TRUSTED_PREFIX_LEN = len(XATTR_TRUSTED_PREFIX)
+
 	XATTR_USER_PREFIX     = "user."
 	XATTR_USER_PREFIX_LEN = len(XATTR_USER_PREFIX)
 )
diff --git a/pkg/sentry/fsimpl/overlay/filesystem.go b/pkg/sentry/fsimpl/overlay/filesystem.go
index a3cee4047d..e720bfb0be 100644
--- a/pkg/sentry/fsimpl/overlay/filesystem.go
+++ b/pkg/sentry/fsimpl/overlay/filesystem.go
@@ -30,7 +30,7 @@ import (
 // _OVL_XATTR_OPAQUE is an extended attribute key whose value is set to "y" for
 // opaque directories.
 // Linux: fs/overlayfs/overlayfs.h:OVL_XATTR_OPAQUE
-const _OVL_XATTR_OPAQUE = "trusted.overlay.opaque"
+const _OVL_XATTR_OPAQUE = linux.XATTR_TRUSTED_PREFIX + "overlay.opaque"
 
 func isWhiteout(stat *linux.Statx) bool {
 	return stat.Mode&linux.S_IFMT == linux.S_IFCHR && stat.RdevMajor == 0 && stat.RdevMinor == 0
diff --git a/pkg/sentry/fsimpl/tmpfs/tmpfs.go b/pkg/sentry/fsimpl/tmpfs/tmpfs.go
index a7fdf19caf..c4cec41308 100644
--- a/pkg/sentry/fsimpl/tmpfs/tmpfs.go
+++ b/pkg/sentry/fsimpl/tmpfs/tmpfs.go
@@ -631,49 +631,65 @@ func (i *inode) listxattr(size uint64) ([]string, error) {
 }
 
 func (i *inode) getxattr(creds *auth.Credentials, opts *vfs.GetxattrOptions) (string, error) {
-	if err := i.checkPermissions(creds, vfs.MayRead); err != nil {
+	if err := i.checkXattrPermissions(creds, opts.Name, vfs.MayRead); err != nil {
 		return "", err
 	}
-	if !strings.HasPrefix(opts.Name, linux.XATTR_USER_PREFIX) {
-		return "", syserror.EOPNOTSUPP
-	}
-	if !i.userXattrSupported() {
-		return "", syserror.ENODATA
-	}
 	return i.xattrs.Getxattr(opts)
 }
 
 func (i *inode) setxattr(creds *auth.Credentials, opts *vfs.SetxattrOptions) error {
-	if err := i.checkPermissions(creds, vfs.MayWrite); err != nil {
+	if err := i.checkXattrPermissions(creds, opts.Name, vfs.MayWrite); err != nil {
 		return err
 	}
-	if !strings.HasPrefix(opts.Name, linux.XATTR_USER_PREFIX) {
-		return syserror.EOPNOTSUPP
-	}
-	if !i.userXattrSupported() {
-		return syserror.EPERM
-	}
 	return i.xattrs.Setxattr(opts)
 }
 
 func (i *inode) removexattr(creds *auth.Credentials, name string) error {
-	if err := i.checkPermissions(creds, vfs.MayWrite); err != nil {
+	if err := i.checkXattrPermissions(creds, name, vfs.MayWrite); err != nil {
 		return err
 	}
-	if !strings.HasPrefix(name, linux.XATTR_USER_PREFIX) {
-		return syserror.EOPNOTSUPP
-	}
-	if !i.userXattrSupported() {
-		return syserror.EPERM
-	}
 	return i.xattrs.Removexattr(name)
 }
 
-// Extended attributes in the user.* namespace are only supported for regular
-// files and directories.
-func (i *inode) userXattrSupported() bool {
-	filetype := linux.S_IFMT & atomic.LoadUint32(&i.mode)
-	return filetype == linux.S_IFREG || filetype == linux.S_IFDIR
+func (i *inode) checkXattrPermissions(creds *auth.Credentials, name string, ats vfs.AccessTypes) error {
+	switch {
+	case ats&vfs.MayRead == vfs.MayRead:
+		if err := i.checkPermissions(creds, vfs.MayRead); err != nil {
+			return err
+		}
+	case ats&vfs.MayWrite == vfs.MayWrite:
+		if err := i.checkPermissions(creds, vfs.MayWrite); err != nil {
+			return err
+		}
+	default:
+		panic(fmt.Sprintf("checkXattrPermissions called with impossible AccessTypes: %v", ats))
+	}
+
+	switch {
+	case strings.HasPrefix(name, linux.XATTR_TRUSTED_PREFIX):
+		// The trusted.* namespace can only be accessed by privileged
+		// users.
+		if creds.HasCapability(linux.CAP_SYS_ADMIN) {
+			return nil
+		}
+		if ats&vfs.MayWrite == vfs.MayWrite {
+			return syserror.EPERM
+		}
+		return syserror.ENODATA
+	case strings.HasPrefix(name, linux.XATTR_USER_PREFIX):
+		// Extended attributes in the user.* namespace are only
+		// supported for regular files and directories.
+		filetype := linux.S_IFMT & atomic.LoadUint32(&i.mode)
+		if filetype == linux.S_IFREG || filetype == linux.S_IFDIR {
+			return nil
+		}
+		if ats&vfs.MayWrite == vfs.MayWrite {
+			return syserror.EPERM
+		}
+		return syserror.ENODATA
+
+	}
+	return syserror.EOPNOTSUPP
 }
 
 // fileDescription is embedded by tmpfs implementations of
diff --git a/test/syscalls/linux/memfd.cc b/test/syscalls/linux/memfd.cc
index f8b7f79380..4a450742b6 100644
--- a/test/syscalls/linux/memfd.cc
+++ b/test/syscalls/linux/memfd.cc
@@ -14,12 +14,10 @@
 
 #include <errno.h>
 #include <fcntl.h>
-#include <linux/magic.h>
 #include <linux/memfd.h>
 #include <linux/unistd.h>
 #include <string.h>
 #include <sys/mman.h>
-#include <sys/statfs.h>
 #include <sys/syscall.h>
 
 #include <vector>
@@ -53,6 +51,7 @@ namespace {
 #define F_SEAL_GROW 0x0004
 #define F_SEAL_WRITE 0x0008
 
+using ::gvisor::testing::IsTmpfs;
 using ::testing::StartsWith;
 
 const std::string kMemfdName = "some-memfd";
@@ -444,20 +443,6 @@ TEST(MemfdTest, SealsAreInodeLevelProperties) {
   EXPECT_THAT(ftruncate(memfd3.get(), kPageSize), SyscallFailsWithErrno(EPERM));
 }
 
-PosixErrorOr<bool> IsTmpfs(const std::string& path) {
-  struct statfs stat;
-  if (statfs(path.c_str(), &stat)) {
-    if (errno == ENOENT) {
-      // Nothing at path, don't raise this as an error. Instead, just report no
-      // tmpfs at path.
-      return false;
-    }
-    return PosixError(errno,
-                      absl::StrFormat("statfs(\"%s\", %#p)", path, &stat));
-  }
-  return stat.f_type == TMPFS_MAGIC;
-}
-
 // Tmpfs files also support seals, but are created with F_SEAL_SEAL.
 TEST(MemfdTest, TmpfsFilesHaveSealSeal) {
   SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(IsTmpfs("/tmp")));
diff --git a/test/syscalls/linux/xattr.cc b/test/syscalls/linux/xattr.cc
index cbcf08451c..5510a87a00 100644
--- a/test/syscalls/linux/xattr.cc
+++ b/test/syscalls/linux/xattr.cc
@@ -28,6 +28,7 @@
 #include "test/syscalls/linux/file_base.h"
 #include "test/util/capability_util.h"
 #include "test/util/file_descriptor.h"
+#include "test/util/fs_util.h"
 #include "test/util/posix_error.h"
 #include "test/util/temp_path.h"
 #include "test/util/test_util.h"
@@ -37,6 +38,8 @@ namespace testing {
 
 namespace {
 
+using ::gvisor::testing::IsTmpfs;
+
 class XattrTest : public FileTest {};
 
 TEST_F(XattrTest, XattrNonexistentFile) {
@@ -604,6 +607,77 @@ TEST_F(XattrTest, XattrWithFD) {
   EXPECT_THAT(fremovexattr(fd.get(), name), SyscallSucceeds());
 }
 
+TEST_F(XattrTest, TrustedNamespaceWithCapSysAdmin) {
+  // Trusted namespace not supported in VFS1.
+  SKIP_IF(IsRunningWithVFS1());
+
+  // TODO(b/66162845): Only gVisor tmpfs currently supports trusted namespace.
+  SKIP_IF(IsRunningOnGvisor() &&
+          !ASSERT_NO_ERRNO_AND_VALUE(IsTmpfs(test_file_name_)));
+
+  // Setting/Getting in the trusted namespace requires CAP_SYS_ADMIN.
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN)));
+
+  const char* path = test_file_name_.c_str();
+  const char name[] = "trusted.test";
+
+  // Set.
+  char val = 'a';
+  size_t size = sizeof(val);
+  EXPECT_THAT(setxattr(path, name, &val, size, /*flags=*/0), SyscallSucceeds());
+
+  // Get.
+  char got = '\0';
+  EXPECT_THAT(getxattr(path, name, &got, size), SyscallSucceedsWithValue(size));
+  EXPECT_EQ(val, got);
+
+  // List.
+  char list[sizeof(name)];
+  EXPECT_THAT(listxattr(path, list, sizeof(list)),
+              SyscallSucceedsWithValue(sizeof(name)));
+  EXPECT_STREQ(list, name);
+
+  // Remove.
+  EXPECT_THAT(removexattr(path, name), SyscallSucceeds());
+
+  // Get should now return ENODATA.
+  EXPECT_THAT(getxattr(path, name, &got, size), SyscallFailsWithErrno(ENODATA));
+}
+
+TEST_F(XattrTest, TrustedNamespaceWithoutCapSysAdmin) {
+  // Trusted namespace not supported in VFS1.
+  SKIP_IF(IsRunningWithVFS1());
+
+  // TODO(b/66162845): Only gVisor tmpfs currently supports trusted namespace.
+  SKIP_IF(IsRunningOnGvisor() &&
+          !ASSERT_NO_ERRNO_AND_VALUE(IsTmpfs(test_file_name_)));
+
+  // Drop CAP_SYS_ADMIN if we have it.
+  if (ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN))) {
+    EXPECT_NO_ERRNO(SetCapability(CAP_SYS_ADMIN, false));
+  }
+
+  const char* path = test_file_name_.c_str();
+  const char name[] = "trusted.test";
+
+  // Set fails.
+  char val = 'a';
+  size_t size = sizeof(val);
+  EXPECT_THAT(setxattr(path, name, &val, size, /*flags=*/0),
+              SyscallFailsWithErrno(EPERM));
+
+  // Get fails.
+  char got = '\0';
+  EXPECT_THAT(getxattr(path, name, &got, size), SyscallFailsWithErrno(ENODATA));
+
+  // List still works, but returns no items.
+  char list[sizeof(name)];
+  EXPECT_THAT(listxattr(path, list, sizeof(list)), SyscallSucceedsWithValue(0));
+
+  // Remove fails.
+  EXPECT_THAT(removexattr(path, name), SyscallFailsWithErrno(EPERM));
+}
+
 }  // namespace
 
 }  // namespace testing
diff --git a/test/util/fs_util.cc b/test/util/fs_util.cc
index 5418948fe8..dffa16183d 100644
--- a/test/util/fs_util.cc
+++ b/test/util/fs_util.cc
@@ -15,7 +15,11 @@
 #include "test/util/fs_util.h"
 
 #include <dirent.h>
+#ifndef __fuchsia__
+#include <linux/magic.h>
+#endif  // __fuchsia__
 #include <sys/stat.h>
+#include <sys/statfs.h>
 #include <sys/types.h>
 #include <unistd.h>
 
@@ -629,5 +633,21 @@ PosixErrorOr<std::string> ProcessExePath(int pid) {
   return ReadLink(absl::StrCat("/proc/", pid, "/exe"));
 }
 
+#ifndef __fuchsia__
+PosixErrorOr<bool> IsTmpfs(const std::string& path) {
+  struct statfs stat;
+  if (statfs(path.c_str(), &stat)) {
+    if (errno == ENOENT) {
+      // Nothing at path, don't raise this as an error. Instead, just report no
+      // tmpfs at path.
+      return false;
+    }
+    return PosixError(errno,
+                      absl::StrFormat("statfs(\"%s\", %#p)", path, &stat));
+  }
+  return stat.f_type == TMPFS_MAGIC;
+}
+#endif  // __fuchsia__
+
 }  // namespace testing
 }  // namespace gvisor
diff --git a/test/util/fs_util.h b/test/util/fs_util.h
index 8cdac23a18..0441906579 100644
--- a/test/util/fs_util.h
+++ b/test/util/fs_util.h
@@ -17,6 +17,7 @@
 
 #include <dirent.h>
 #include <sys/stat.h>
+#include <sys/statfs.h>
 #include <sys/types.h>
 #include <unistd.h>
 
@@ -178,6 +179,11 @@ std::string CleanPath(absl::string_view path);
 // Returns the full path to the executable of the given pid or a PosixError.
 PosixErrorOr<std::string> ProcessExePath(int pid);
 
+#ifndef __fuchsia__
+// IsTmpfs returns true if the file at path is backed by tmpfs.
+PosixErrorOr<bool> IsTmpfs(const std::string& path);
+#endif  // __fucshia__
+
 namespace internal {
 // Not part of the public API.
 std::string JoinPathImpl(std::initializer_list<absl::string_view> paths);

From 0473edd47ca9722076012988e6a39907ea024e24 Mon Sep 17 00:00:00 2001
From: Ayush Ranjan <ayushranjan@google.com>
Date: Wed, 26 Aug 2020 12:09:47 -0700
Subject: [PATCH 089/211] [runtime-tests] Exclude flaky nodejs test.

PiperOrigin-RevId: 328579755
---
 test/runtimes/exclude_nodejs12.4.0.csv | 1 +
 1 file changed, 1 insertion(+)

diff --git a/test/runtimes/exclude_nodejs12.4.0.csv b/test/runtimes/exclude_nodejs12.4.0.csv
index 1d8e65fd02..5866ee56dd 100644
--- a/test/runtimes/exclude_nodejs12.4.0.csv
+++ b/test/runtimes/exclude_nodejs12.4.0.csv
@@ -49,6 +49,7 @@ pseudo-tty/test-tty-wrap.js,b/162801321,
 pummel/test-heapdump-http2.js,,Flaky
 pummel/test-net-pingpong.js,,
 pummel/test-vm-memleak.js,b/162799436,
+pummel/test-watch-file.js,,Flaky - Timeout
 sequential/test-child-process-pass-fd.js,b/63926391,Flaky
 sequential/test-https-connect-localport.js,,Flaky - EADDRINUSE
 sequential/test-net-bytes-per-incoming-chunk-overhead.js,,flaky - timeout

From d872b342b2c2291420a9570edcf340040754bb44 Mon Sep 17 00:00:00 2001
From: Nicolas Lacasse <nlacasse@google.com>
Date: Wed, 26 Aug 2020 12:28:58 -0700
Subject: [PATCH 090/211] Remove spurious fd.IncRef().

PiperOrigin-RevId: 328583461
---
 pkg/sentry/fsimpl/overlay/non_directory.go | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pkg/sentry/fsimpl/overlay/non_directory.go b/pkg/sentry/fsimpl/overlay/non_directory.go
index d3060a481c..268b325375 100644
--- a/pkg/sentry/fsimpl/overlay/non_directory.go
+++ b/pkg/sentry/fsimpl/overlay/non_directory.go
@@ -121,7 +121,6 @@ func (fd *nonDirectoryFD) OnClose(ctx context.Context) error {
 		fd.cachedFlags = statusFlags
 	}
 	wrappedFD := fd.cachedFD
-	defer wrappedFD.IncRef()
 	fd.mu.Unlock()
 	return wrappedFD.OnClose(ctx)
 }

From f63cddc6b4826007ca2a755d30b2df65ea21c518 Mon Sep 17 00:00:00 2001
From: Adin Scannell <ascannell@google.com>
Date: Wed, 26 Aug 2020 14:40:30 -0700
Subject: [PATCH 091/211] Support stdlib analyzers with nogo.

This immediately revealed an escape analysis violation (!), where
the sync.Map was being used in a context that escapes were not
allowed. This is a relatively minor fix and is included.

PiperOrigin-RevId: 328611237
---
 pkg/sentry/platform/kvm/bluepill_fault.go     |   4 +
 pkg/sentry/platform/kvm/kvm_const.go          |   2 +
 pkg/sentry/platform/kvm/machine.go            |  40 ++-
 tools/bazeldefs/defs.bzl                      |  11 +-
 tools/checkescape/BUILD                       |   2 +-
 tools/checkescape/checkescape.go              |  19 +-
 tools/checkescape/test1/test1.go              |  15 -
 tools/checkescape/test2/test2.go              |   6 -
 .../gomarshal/generator_interfaces.go         |   2 +-
 tools/nogo/BUILD                              |  13 +-
 tools/nogo/build.go                           |   4 +-
 tools/nogo/config.go                          |   8 +
 tools/nogo/data/data.go                       |  21 --
 tools/nogo/defs.bzl                           | 185 +++++++---
 tools/nogo/{data => dump}/BUILD               |   4 +-
 tools/nogo/dump/dump.go                       |  78 +++++
 tools/nogo/nogo.go                            | 323 ++++++++++++++----
 17 files changed, 560 insertions(+), 177 deletions(-)
 delete mode 100644 tools/nogo/data/data.go
 rename tools/nogo/{data => dump}/BUILD (77%)
 create mode 100644 tools/nogo/dump/dump.go

diff --git a/pkg/sentry/platform/kvm/bluepill_fault.go b/pkg/sentry/platform/kvm/bluepill_fault.go
index e34f46aeb7..a182e4f222 100644
--- a/pkg/sentry/platform/kvm/bluepill_fault.go
+++ b/pkg/sentry/platform/kvm/bluepill_fault.go
@@ -98,6 +98,10 @@ func handleBluepillFault(m *machine, physical uintptr, phyRegions []physicalRegi
 	}
 	errno := m.setMemoryRegion(int(slot), physicalStart, length, virtualStart, flags)
 	if errno == 0 {
+		// Store the physical address in the slot. This is used to
+		// avoid calls to handleBluepillFault in the future (see
+		// machine.mapPhysical).
+		atomic.StoreUintptr(&m.usedSlots[slot], physical)
 		// Successfully added region; we can increment nextSlot and
 		// allow another set to proceed here.
 		atomic.StoreUint32(&m.nextSlot, slot+1)
diff --git a/pkg/sentry/platform/kvm/kvm_const.go b/pkg/sentry/platform/kvm/kvm_const.go
index 3bf918446f..5c4b18899f 100644
--- a/pkg/sentry/platform/kvm/kvm_const.go
+++ b/pkg/sentry/platform/kvm/kvm_const.go
@@ -56,6 +56,7 @@ const (
 
 // KVM capability options.
 const (
+	_KVM_CAP_MAX_MEMSLOTS          = 0x0a
 	_KVM_CAP_MAX_VCPUS             = 0x42
 	_KVM_CAP_ARM_VM_IPA_SIZE       = 0xa5
 	_KVM_CAP_VCPU_EVENTS           = 0x29
@@ -64,6 +65,7 @@ const (
 
 // KVM limits.
 const (
+	_KVM_NR_MEMSLOTS      = 0x100
 	_KVM_NR_VCPUS         = 0xff
 	_KVM_NR_INTERRUPTS    = 0x100
 	_KVM_NR_CPUID_ENTRIES = 0x100
diff --git a/pkg/sentry/platform/kvm/machine.go b/pkg/sentry/platform/kvm/machine.go
index 6c54712d1f..372a4cbd73 100644
--- a/pkg/sentry/platform/kvm/machine.go
+++ b/pkg/sentry/platform/kvm/machine.go
@@ -43,9 +43,6 @@ type machine struct {
 	// kernel is the set of global structures.
 	kernel ring0.Kernel
 
-	// mappingCache is used for mapPhysical.
-	mappingCache sync.Map
-
 	// mu protects vCPUs.
 	mu sync.RWMutex
 
@@ -63,6 +60,12 @@ type machine struct {
 	// maxVCPUs is the maximum number of vCPUs supported by the machine.
 	maxVCPUs int
 
+	// maxSlots is the maximum number of memory slots supported by the machine.
+	maxSlots int
+
+	// usedSlots is the set of used physical addresses (sorted).
+	usedSlots []uintptr
+
 	// nextID is the next vCPU ID.
 	nextID uint32
 }
@@ -184,6 +187,7 @@ func newMachine(vm int) (*machine, error) {
 		PageTables: pagetables.New(newAllocator()),
 	})
 
+	// Pull the maximum vCPUs.
 	maxVCPUs, _, errno := syscall.RawSyscall(syscall.SYS_IOCTL, uintptr(m.fd), _KVM_CHECK_EXTENSION, _KVM_CAP_MAX_VCPUS)
 	if errno != 0 {
 		m.maxVCPUs = _KVM_NR_VCPUS
@@ -191,11 +195,19 @@ func newMachine(vm int) (*machine, error) {
 		m.maxVCPUs = int(maxVCPUs)
 	}
 	log.Debugf("The maximum number of vCPUs is %d.", m.maxVCPUs)
-
-	// Create the vCPUs map/slices.
 	m.vCPUsByTID = make(map[uint64]*vCPU)
 	m.vCPUsByID = make([]*vCPU, m.maxVCPUs)
 
+	// Pull the maximum slots.
+	maxSlots, _, errno := syscall.RawSyscall(syscall.SYS_IOCTL, uintptr(m.fd), _KVM_CHECK_EXTENSION, _KVM_CAP_MAX_MEMSLOTS)
+	if errno != 0 {
+		m.maxSlots = _KVM_NR_MEMSLOTS
+	} else {
+		m.maxSlots = int(maxSlots)
+	}
+	log.Debugf("The maximum number of slots is %d.", m.maxSlots)
+	m.usedSlots = make([]uintptr, m.maxSlots)
+
 	// Apply the physical mappings. Note that these mappings may point to
 	// guest physical addresses that are not actually available. These
 	// physical pages are mapped on demand, see kernel_unsafe.go.
@@ -272,6 +284,20 @@ func newMachine(vm int) (*machine, error) {
 	return m, nil
 }
 
+// hasSlot returns true iff the given address is mapped.
+//
+// This must be done via a linear scan.
+//
+//go:nosplit
+func (m *machine) hasSlot(physical uintptr) bool {
+	for i := 0; i < len(m.usedSlots); i++ {
+		if p := atomic.LoadUintptr(&m.usedSlots[i]); p == physical {
+			return true
+		}
+	}
+	return false
+}
+
 // mapPhysical checks for the mapping of a physical range, and installs one if
 // not available. This attempts to be efficient for calls in the hot path.
 //
@@ -286,8 +312,8 @@ func (m *machine) mapPhysical(physical, length uintptr, phyRegions []physicalReg
 			panic("mapPhysical on unknown physical address")
 		}
 
-		if _, ok := m.mappingCache.LoadOrStore(physicalStart, true); !ok {
-			// Not present in the cache; requires setting the slot.
+		// Is this already mapped? Check the usedSlots.
+		if !m.hasSlot(physicalStart) {
 			if _, ok := handleBluepillFault(m, physical, phyRegions, flags); !ok {
 				panic("handleBluepillFault failed")
 			}
diff --git a/tools/bazeldefs/defs.bzl b/tools/bazeldefs/defs.bzl
index 4bbcda054f..dad5fc3b2c 100644
--- a/tools/bazeldefs/defs.bzl
+++ b/tools/bazeldefs/defs.bzl
@@ -147,7 +147,7 @@ def go_rule(rule, implementation, **kwargs):
     Returns:
         The result of invoking the rule.
     """
-    attrs = kwargs.pop("attrs", [])
+    attrs = kwargs.pop("attrs", dict())
     attrs["_go_context_data"] = attr.label(default = "@io_bazel_rules_go//:go_context_data")
     attrs["_stdlib"] = attr.label(default = "@io_bazel_rules_go//:stdlib")
     toolchains = kwargs.get("toolchains", []) + ["@io_bazel_rules_go//go:toolchain"]
@@ -158,12 +158,17 @@ def go_test_library(target):
         return target.attr.embed[0]
     return None
 
-def go_context(ctx):
+def go_context(ctx, std = False):
+    # We don't change anything for the standard library analysis. All Go files
+    # are available in all instances. Note that this includes the standard
+    # library sources, which are analyzed by nogo.
     go_ctx = _go_context(ctx)
     return struct(
         go = go_ctx.go,
         env = go_ctx.env,
-        runfiles = depset([go_ctx.go] + go_ctx.sdk.tools + go_ctx.stdlib.libs),
+        nogo_args = [],
+        stdlib_srcs = go_ctx.sdk.srcs,
+        runfiles = depset([go_ctx.go] + go_ctx.sdk.srcs + go_ctx.sdk.tools + go_ctx.stdlib.libs),
         goos = go_ctx.sdk.goos,
         goarch = go_ctx.sdk.goarch,
         tags = go_ctx.tags,
diff --git a/tools/checkescape/BUILD b/tools/checkescape/BUILD
index b8c3ddf44e..6273aa779c 100644
--- a/tools/checkescape/BUILD
+++ b/tools/checkescape/BUILD
@@ -8,7 +8,7 @@ go_library(
     nogo = False,
     visibility = ["//tools/nogo:__subpackages__"],
     deps = [
-        "//tools/nogo/data",
+        "//tools/nogo/dump",
         "@org_golang_x_tools//go/analysis:go_tool_library",
         "@org_golang_x_tools//go/analysis/passes/buildssa:go_tool_library",
         "@org_golang_x_tools//go/ssa:go_tool_library",
diff --git a/tools/checkescape/checkescape.go b/tools/checkescape/checkescape.go
index f8def48234..aab3c36a11 100644
--- a/tools/checkescape/checkescape.go
+++ b/tools/checkescape/checkescape.go
@@ -66,7 +66,6 @@ import (
 	"go/token"
 	"go/types"
 	"io"
-	"os"
 	"path/filepath"
 	"strconv"
 	"strings"
@@ -74,7 +73,7 @@ import (
 	"golang.org/x/tools/go/analysis"
 	"golang.org/x/tools/go/analysis/passes/buildssa"
 	"golang.org/x/tools/go/ssa"
-	"gvisor.dev/gvisor/tools/nogo/data"
+	"gvisor.dev/gvisor/tools/nogo/dump"
 )
 
 const (
@@ -256,15 +255,14 @@ func (ec *EscapeCount) Record(reason EscapeReason) bool {
 // used only to remove false positives for escape analysis. The call will be
 // elided if escape analysis is able to put the object on the heap exclusively.
 func loadObjdump() (map[LinePosition]string, error) {
-	f, err := os.Open(data.Objdump)
+	cmd, out, err := dump.Command()
 	if err != nil {
 		return nil, err
 	}
-	defer f.Close()
 
 	// Build the map.
 	m := make(map[LinePosition]string)
-	r := bufio.NewReader(f)
+	r := bufio.NewReader(out)
 	var (
 		lastField string
 		lastPos   LinePosition
@@ -329,6 +327,11 @@ func loadObjdump() (map[LinePosition]string, error) {
 		}
 	}
 
+	// Wait for the dump to finish.
+	if err := cmd.Wait(); err != nil {
+		return nil, err
+	}
+
 	return m, nil
 }
 
@@ -413,12 +416,6 @@ func run(pass *analysis.Pass) (interface{}, error) {
 					return escapes(unknownPackage, "no package", inst, ec)
 				}
 
-				// Atomic functions are instrinics. We can
-				// assume that they don't escape.
-				if x.Pkg.Pkg.Name() == "atomic" {
-					return nil
-				}
-
 				// Is this a local function? If yes, call the
 				// function to load the local function. The
 				// local escapes are the escapes found in the
diff --git a/tools/checkescape/test1/test1.go b/tools/checkescape/test1/test1.go
index 68d3f72ccb..a1d36459fc 100644
--- a/tools/checkescape/test1/test1.go
+++ b/tools/checkescape/test1/test1.go
@@ -17,7 +17,6 @@ package test1
 
 import (
 	"fmt"
-	"reflect"
 )
 
 // Interface is a generic interface.
@@ -163,20 +162,6 @@ func dynamicRec(f func()) {
 	Dynamic(f)
 }
 
-// +mustescape:local,unknown
-//go:noinline
-//go:nosplit
-func Unknown() {
-	_ = reflect.TypeOf((*Type)(nil)) // Does not actually escape.
-}
-
-// +mustescape:unknown
-//go:noinline
-//go:nosplit
-func unknownRec() {
-	Unknown()
-}
-
 //go:noinline
 //go:nosplit
 func internalFunc() {
diff --git a/tools/checkescape/test2/test2.go b/tools/checkescape/test2/test2.go
index 7fce3e3bec..2d5865f474 100644
--- a/tools/checkescape/test2/test2.go
+++ b/tools/checkescape/test2/test2.go
@@ -81,12 +81,6 @@ func dynamicCrossPkg(f func()) {
 	test1.Dynamic(f)
 }
 
-// +mustescape:unknown
-//go:noinline
-func unknownCrossPkg() {
-	test1.Unknown()
-}
-
 // +mustescape:stack
 //go:noinline
 func splitCrosssPkt() {
diff --git a/tools/go_marshal/gomarshal/generator_interfaces.go b/tools/go_marshal/gomarshal/generator_interfaces.go
index e3c3dac63e..cf76b5241b 100644
--- a/tools/go_marshal/gomarshal/generator_interfaces.go
+++ b/tools/go_marshal/gomarshal/generator_interfaces.go
@@ -224,7 +224,7 @@ func (g *interfaceGenerator) emitNoEscapeSliceDataPointer(srcPtr, dstVar string)
 func (g *interfaceGenerator) emitKeepAlive(ptrVar string) {
 	g.emit("// Since we bypassed the compiler's escape analysis, indicate that %s\n", ptrVar)
 	g.emit("// must live until the use above.\n")
-	g.emit("runtime.KeepAlive(%s)\n", ptrVar)
+	g.emit("runtime.KeepAlive(%s) // escapes: replaced by intrinsic.\n", ptrVar)
 }
 
 func (g *interfaceGenerator) expandBinaryExpr(b *strings.Builder, e *ast.BinaryExpr) {
diff --git a/tools/nogo/BUILD b/tools/nogo/BUILD
index e1bfb9a2cd..fb35c5ffd3 100644
--- a/tools/nogo/BUILD
+++ b/tools/nogo/BUILD
@@ -1,7 +1,18 @@
 load("//tools:defs.bzl", "bzl_library", "go_library")
+load("//tools/nogo:defs.bzl", "nogo_dump_tool", "nogo_stdlib")
 
 package(licenses = ["notice"])
 
+nogo_dump_tool(
+    name = "dump_tool",
+    visibility = ["//visibility:public"],
+)
+
+nogo_stdlib(
+    name = "stdlib",
+    visibility = ["//visibility:public"],
+)
+
 go_library(
     name = "nogo",
     srcs = [
@@ -16,7 +27,7 @@ go_library(
     deps = [
         "//tools/checkescape",
         "//tools/checkunsafe",
-        "//tools/nogo/data",
+        "//tools/nogo/dump",
         "@org_golang_x_tools//go/analysis:go_tool_library",
         "@org_golang_x_tools//go/analysis/internal/facts:go_tool_library",
         "@org_golang_x_tools//go/analysis/passes/asmdecl:go_tool_library",
diff --git a/tools/nogo/build.go b/tools/nogo/build.go
index 433d13738b..37947b5c32 100644
--- a/tools/nogo/build.go
+++ b/tools/nogo/build.go
@@ -31,10 +31,10 @@ var (
 )
 
 // findStdPkg needs to find the bundled standard library packages.
-func (i *importer) findStdPkg(path string) (io.ReadCloser, error) {
+func findStdPkg(GOOS, GOARCH, path string) (io.ReadCloser, error) {
 	if path == "C" {
 		// Cgo builds cannot be analyzed. Skip.
 		return nil, ErrSkip
 	}
-	return os.Open(fmt.Sprintf("external/go_sdk/pkg/%s_%s/%s.a", i.GOOS, i.GOARCH, path))
+	return os.Open(fmt.Sprintf("external/go_sdk/pkg/%s_%s/%s.a", GOOS, GOARCH, path))
 }
diff --git a/tools/nogo/config.go b/tools/nogo/config.go
index 6958fca692..451cd4a4ca 100644
--- a/tools/nogo/config.go
+++ b/tools/nogo/config.go
@@ -84,6 +84,14 @@ var analyzerConfig = map[*analysis.Analyzer]matcher{
 		externalExcluded(
 			".*protobuf/.*.go",              // Bad conversions.
 			".*flate/huffman_bit_writer.go", // Bad conversion.
+
+			// Runtime internal violations.
+			".*reflect/value.go",
+			".*encoding/xml/xml.go",
+			".*runtime/pprof/internal/profile/proto.go",
+			".*fmt/scan.go",
+			".*go/types/conversions.go",
+			".*golang.org/x/net/dns/dnsmessage/message.go",
 		),
 	),
 	shadow.Analyzer:      disableMatches(),  // Disabled for now.
diff --git a/tools/nogo/data/data.go b/tools/nogo/data/data.go
deleted file mode 100644
index eb84d0d279..0000000000
--- a/tools/nogo/data/data.go
+++ /dev/null
@@ -1,21 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Package data contains shared data for nogo analysis.
-//
-// This is used to break a dependency cycle.
-package data
-
-// Objdump is the dumped binary under analysis.
-var Objdump string
diff --git a/tools/nogo/defs.bzl b/tools/nogo/defs.bzl
index 5377620b0d..963084d532 100644
--- a/tools/nogo/defs.bzl
+++ b/tools/nogo/defs.bzl
@@ -2,6 +2,103 @@
 
 load("//tools/bazeldefs:defs.bzl", "go_context", "go_importpath", "go_rule", "go_test_library")
 
+def _nogo_dump_tool_impl(ctx):
+    # Extract the Go context.
+    go_ctx = go_context(ctx)
+
+    # Construct the magic dump command.
+    #
+    # Note that in some cases, the input is being fed into the tool via stdin.
+    # Unfortunately, the Go objdump tool expects to see a seekable file [1], so
+    # we need the tool to handle this case by creating a temporary file.
+    #
+    # [1] https://github.com/golang/go/issues/41051
+    env_prefix = " ".join(["%s=%s" % (key, value) for (key, value) in go_ctx.env.items()])
+    dumper = ctx.actions.declare_file(ctx.label.name)
+    ctx.actions.write(dumper, "\n".join([
+        "#!/bin/bash",
+        "set -euo pipefail",
+        "if [[ $# -eq 0 ]]; then",
+        " T=$(mktemp -u -t libXXXXXX.a)",
+        " cat /dev/stdin > ${T}",
+        "else",
+        " T=$1;",
+        "fi",
+        "%s %s tool objdump ${T}" % (
+            env_prefix,
+            go_ctx.go.path,
+        ),
+        "if [[ $# -eq 0 ]]; then",
+        " rm -rf ${T}",
+        "fi",
+        "",
+    ]), is_executable = True)
+
+    # Include the full runfiles.
+    return [DefaultInfo(
+        runfiles = ctx.runfiles(files = go_ctx.runfiles.to_list()),
+        executable = dumper,
+    )]
+
+nogo_dump_tool = go_rule(
+    rule,
+    implementation = _nogo_dump_tool_impl,
+)
+
+# NogoStdlibInfo is the set of standard library facts.
+NogoStdlibInfo = provider(
+    "information for nogo analysis (standard library facts)",
+    fields = {
+        "facts": "serialized standard library facts",
+    },
+)
+
+def _nogo_stdlib_impl(ctx):
+    # Extract the Go context.
+    go_ctx = go_context(ctx)
+
+    # Build the standard library facts.
+    facts = ctx.actions.declare_file(ctx.label.name + ".facts")
+    config = struct(
+        Srcs = [f.path for f in go_ctx.stdlib_srcs],
+        GOOS = go_ctx.goos,
+        GOARCH = go_ctx.goarch,
+        Tags = go_ctx.tags,
+        FactOutput = facts.path,
+    )
+    config_file = ctx.actions.declare_file(ctx.label.name + ".cfg")
+    ctx.actions.write(config_file, config.to_json())
+    ctx.actions.run(
+        inputs = [config_file] + go_ctx.stdlib_srcs,
+        outputs = [facts],
+        tools = depset(go_ctx.runfiles.to_list() + ctx.files._dump_tool),
+        executable = ctx.files._nogo[0],
+        mnemonic = "GoStandardLibraryAnalysis",
+        progress_message = "Analyzing Go Standard Library",
+        arguments = go_ctx.nogo_args + [
+            "-dump_tool=%s" % ctx.files._dump_tool[0].path,
+            "-stdlib=%s" % config_file.path,
+        ],
+    )
+
+    # Return the stdlib facts as output.
+    return [NogoStdlibInfo(
+        facts = facts,
+    )]
+
+nogo_stdlib = go_rule(
+    rule,
+    implementation = _nogo_stdlib_impl,
+    attrs = {
+        "_nogo": attr.label(
+            default = "//tools/nogo/check:check",
+        ),
+        "_dump_tool": attr.label(
+            default = "//tools/nogo:dump_tool",
+        ),
+    },
+)
+
 # NogoInfo is the serialized set of package facts for a nogo analysis.
 #
 # Each go_library rule will generate a corresponding nogo rule, which will run
@@ -33,6 +130,9 @@ def _nogo_aspect_impl(target, ctx):
     else:
         return [NogoInfo()]
 
+    # Extract the Go context.
+    go_ctx = go_context(ctx)
+
     # If we're using the "library" attribute, then we need to aggregate the
     # original library sources and dependencies into this target to perform
     # proper type analysis.
@@ -45,10 +145,6 @@ def _nogo_aspect_impl(target, ctx):
             if hasattr(info, "deps"):
                 deps = deps + info.deps
 
-    # Construct the Go environment from the go_ctx.env dictionary.
-    go_ctx = go_context(ctx)
-    env_prefix = " ".join(["%s=%s" % (key, value) for (key, value) in go_ctx.env.items()])
-
     # Start with all target files and srcs as input.
     inputs = target.files.to_list() + srcs
 
@@ -64,26 +160,7 @@ def _nogo_aspect_impl(target, ctx):
     else:
         # Use the raw binary for go_binary and go_test targets.
         target_objfile = binaries[0]
-    disasm_file = ctx.actions.declare_file(target.label.name + ".out")
-    dumper = ctx.actions.declare_file("%s-dumper" % ctx.label.name)
-    ctx.actions.write(dumper, "\n".join([
-        "#!/bin/bash",
-        "%s %s tool objdump %s > %s\n" % (
-            env_prefix,
-            go_ctx.go.path,
-            target_objfile.path,
-            disasm_file.path,
-        ),
-    ]), is_executable = True)
-    ctx.actions.run(
-        inputs = [target_objfile],
-        outputs = [disasm_file],
-        tools = go_ctx.runfiles,
-        mnemonic = "GoObjdump",
-        progress_message = "Objdump %s" % target.label,
-        executable = dumper,
-    )
-    inputs.append(disasm_file)
+    inputs.append(target_objfile)
 
     # Extract the importpath for this package.
     if ctx.rule.kind == "go_test":
@@ -96,25 +173,9 @@ def _nogo_aspect_impl(target, ctx):
     else:
         importpath = go_importpath(target)
 
-    # The nogo tool requires a configfile serialized in JSON format to do its
-    # work. This must line up with the nogo.Config fields.
-    facts = ctx.actions.declare_file(target.label.name + ".facts")
-    config = struct(
-        ImportPath = importpath,
-        GoFiles = [src.path for src in srcs if src.path.endswith(".go")],
-        NonGoFiles = [src.path for src in srcs if not src.path.endswith(".go")],
-        # Google's internal build system needs a bit more help to find std.
-        StdZip = go_ctx.std_zip.short_path if hasattr(go_ctx, "std_zip") else "",
-        GOOS = go_ctx.goos,
-        GOARCH = go_ctx.goarch,
-        Tags = go_ctx.tags,
-        FactMap = {},  # Constructed below.
-        ImportMap = {},  # Constructed below.
-        FactOutput = facts.path,
-        Objdump = disasm_file.path,
-    )
-
     # Collect all info from shadow dependencies.
+    fact_map = dict()
+    import_map = dict()
     for dep in deps:
         # There will be no file attribute set for all transitive dependencies
         # that are not go_library or go_binary rules, such as a proto rules.
@@ -129,27 +190,46 @@ def _nogo_aspect_impl(target, ctx):
         x_files = [f.path for f in info.binaries if f.path.endswith(".x")]
         if not len(x_files):
             x_files = [f.path for f in info.binaries if f.path.endswith(".a")]
-        config.ImportMap[info.importpath] = x_files[0]
-        config.FactMap[info.importpath] = info.facts.path
+        import_map[info.importpath] = x_files[0]
+        fact_map[info.importpath] = info.facts.path
 
         # Ensure the above are available as inputs.
         inputs.append(info.facts)
         inputs += info.binaries
 
-    # Write the configuration and run the tool.
+    # Add the standard library facts.
+    stdlib_facts = ctx.attr._nogo_stdlib[NogoStdlibInfo].facts
+    inputs.append(stdlib_facts)
+
+    # The nogo tool operates on a configuration serialized in JSON format.
+    facts = ctx.actions.declare_file(target.label.name + ".facts")
+    config = struct(
+        ImportPath = importpath,
+        GoFiles = [src.path for src in srcs if src.path.endswith(".go")],
+        NonGoFiles = [src.path for src in srcs if not src.path.endswith(".go")],
+        GOOS = go_ctx.goos,
+        GOARCH = go_ctx.goarch,
+        Tags = go_ctx.tags,
+        FactMap = fact_map,
+        ImportMap = import_map,
+        StdlibFacts = stdlib_facts.path,
+        FactOutput = facts.path,
+    )
     config_file = ctx.actions.declare_file(target.label.name + ".cfg")
     ctx.actions.write(config_file, config.to_json())
     inputs.append(config_file)
-
-    # Run the nogo tool itself.
     ctx.actions.run(
         inputs = inputs,
         outputs = [facts],
-        tools = go_ctx.runfiles,
+        tools = depset(go_ctx.runfiles.to_list() + ctx.files._dump_tool),
         executable = ctx.files._nogo[0],
         mnemonic = "GoStaticAnalysis",
         progress_message = "Analyzing %s" % target.label,
-        arguments = ["-config=%s" % config_file.path],
+        arguments = go_ctx.nogo_args + [
+            "-binary=%s" % target_objfile.path,
+            "-dump_tool=%s" % ctx.files._dump_tool[0].path,
+            "-package=%s" % config_file.path,
+        ],
     )
 
     # Return the package facts as output.
@@ -172,7 +252,12 @@ nogo_aspect = go_rule(
     attrs = {
         "_nogo": attr.label(
             default = "//tools/nogo/check:check",
-            allow_single_file = True,
+        ),
+        "_nogo_stdlib": attr.label(
+            default = "//tools/nogo:stdlib",
+        ),
+        "_dump_tool": attr.label(
+            default = "//tools/nogo:dump_tool",
         ),
     },
 )
diff --git a/tools/nogo/data/BUILD b/tools/nogo/dump/BUILD
similarity index 77%
rename from tools/nogo/data/BUILD
rename to tools/nogo/dump/BUILD
index b7564cc440..dfa29d6519 100644
--- a/tools/nogo/data/BUILD
+++ b/tools/nogo/dump/BUILD
@@ -3,8 +3,8 @@ load("//tools:defs.bzl", "go_library")
 package(licenses = ["notice"])
 
 go_library(
-    name = "data",
-    srcs = ["data.go"],
+    name = "dump",
+    srcs = ["dump.go"],
     nogo = False,
     visibility = ["//tools:__subpackages__"],
 )
diff --git a/tools/nogo/dump/dump.go b/tools/nogo/dump/dump.go
new file mode 100644
index 0000000000..f06567e0fd
--- /dev/null
+++ b/tools/nogo/dump/dump.go
@@ -0,0 +1,78 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package dump contains data dump tools.
+//
+// The interface used by the package corresponds to the tool generated by the
+// nogo_dump_tool rule.
+//
+// This package is separate in order to avoid a dependency cycle.
+package dump
+
+import (
+	"flag"
+	"fmt"
+	"io"
+	"os"
+	"os/exec"
+)
+
+var (
+	// Binary is the binary under analysis.
+	//
+	// See Reader, below.
+	binary = flag.String("binary", "", "binary under analysis")
+
+	// Reader is the input stream.
+	//
+	// This may be set instead of Binary.
+	Reader io.Reader
+
+	// Tool is the tool used to dump a binary.
+	tool = flag.String("dump_tool", "", "tool used to dump a binary")
+)
+
+// Command returns a command that will emit the dumped object on stdout.
+//
+// You must call Wait on the resulting command.
+func Command() (*exec.Cmd, io.Reader, error) {
+	var (
+		args  []string
+		stdin io.Reader
+	)
+	if *binary != "" {
+		args = append(args, *binary)
+		*binary = "" // Clear.
+	} else if Reader != nil {
+		stdin = Reader
+		Reader = nil // Clear.
+	} else {
+		// We have no input stream or binary.
+		return nil, nil, fmt.Errorf("no binary or reader provided!")
+	}
+
+	// Construct our command.
+	cmd := exec.Command(*tool, args...)
+	cmd.Stdin = stdin
+	cmd.Stderr = os.Stderr
+	out, err := cmd.StdoutPipe()
+	if err != nil {
+		return nil, nil, err
+	}
+	if err := cmd.Start(); err != nil {
+		return nil, nil, err
+	}
+
+	return cmd, out, err
+}
diff --git a/tools/nogo/nogo.go b/tools/nogo/nogo.go
index ea1e970767..e44f32d4cf 100644
--- a/tools/nogo/nogo.go
+++ b/tools/nogo/nogo.go
@@ -32,51 +32,97 @@ import (
 	"io/ioutil"
 	"log"
 	"os"
+	"path"
 	"path/filepath"
 	"reflect"
+	"strings"
 
 	"golang.org/x/tools/go/analysis"
 	"golang.org/x/tools/go/analysis/internal/facts"
 	"golang.org/x/tools/go/gcexportdata"
-	"gvisor.dev/gvisor/tools/nogo/data"
+	"gvisor.dev/gvisor/tools/nogo/dump"
 )
 
-// pkgConfig is serialized as the configuration.
+// stdlibConfig is serialized as the configuration.
 //
-// This contains everything required for the analysis.
-type pkgConfig struct {
-	ImportPath string
-	GoFiles    []string
-	NonGoFiles []string
-	Tags       []string
+// This contains everything required for stdlib analysis.
+type stdlibConfig struct {
+	Srcs       []string
 	GOOS       string
 	GOARCH     string
-	ImportMap  map[string]string
-	FactMap    map[string]string
+	Tags       []string
 	FactOutput string
-	Objdump    string
-	StdZip     string
 }
 
-// loadFacts finds and loads facts per FactMap.
-func (c *pkgConfig) loadFacts(path string) ([]byte, error) {
-	realPath, ok := c.FactMap[path]
-	if !ok {
-		return nil, nil // No facts available.
+// packageConfig is serialized as the configuration.
+//
+// This contains everything required for single package analysis.
+type packageConfig struct {
+	ImportPath  string
+	GoFiles     []string
+	NonGoFiles  []string
+	Tags        []string
+	GOOS        string
+	GOARCH      string
+	ImportMap   map[string]string
+	FactMap     map[string]string
+	FactOutput  string
+	StdlibFacts string
+}
+
+// loader is a fact-loader function.
+type loader func(string) ([]byte, error)
+
+// saver is a fact-saver function.
+type saver func([]byte) error
+
+// factLoader returns a function that loads facts.
+//
+// This resolves all standard library facts and imported package facts up
+// front. The returned loader function will never return an error, only
+// empty facts.
+//
+// This is done because all stdlib data is stored together, and we don't want
+// to load this data many times over.
+func (c *packageConfig) factLoader() (loader, error) {
+	allFacts := make(map[string][]byte)
+	if c.StdlibFacts != "" {
+		data, err := ioutil.ReadFile(c.StdlibFacts)
+		if err != nil {
+			return nil, fmt.Errorf("error loading stdlib facts from %q: %w", c.StdlibFacts, err)
+		}
+		var stdlibFacts map[string][]byte
+		if err := json.Unmarshal(data, &stdlibFacts); err != nil {
+			return nil, fmt.Errorf("error loading stdlib facts: %w", err)
+		}
+		for pkg, data := range stdlibFacts {
+			allFacts[pkg] = data
+		}
+	}
+	for pkg, file := range c.FactMap {
+		data, err := ioutil.ReadFile(file)
+		if err != nil {
+			return nil, fmt.Errorf("error loading %q: %w", file, err)
+		}
+		allFacts[pkg] = data
 	}
+	return func(path string) ([]byte, error) {
+		return allFacts[path], nil
+	}, nil
+}
 
-	// Read the files file.
-	data, err := ioutil.ReadFile(realPath)
-	if err != nil {
-		return nil, err
+// factSaver may be used directly as a saver.
+func (c *packageConfig) factSaver(factData []byte) error {
+	if c.FactOutput == "" {
+		return nil // Nothing to save.
 	}
-	return data, nil
+	return ioutil.WriteFile(c.FactOutput, factData, 0644)
 }
 
 // shouldInclude indicates whether the file should be included.
 //
 // NOTE: This does only basic parsing of tags.
-func (c *pkgConfig) shouldInclude(path string) (bool, error) {
+func (c *packageConfig) shouldInclude(path string) (bool, error) {
 	ctx := build.Default
 	ctx.GOOS = c.GOOS
 	ctx.GOARCH = c.GOARCH
@@ -90,10 +136,11 @@ func (c *pkgConfig) shouldInclude(path string) (bool, error) {
 // files, and the facts. Note that this importer implementation will always
 // pass when a given package is not available.
 type importer struct {
-	pkgConfig
-	fset    *token.FileSet
-	cache   map[string]*types.Package
-	lastErr error
+	*packageConfig
+	fset     *token.FileSet
+	cache    map[string]*types.Package
+	lastErr  error
+	callback func(string) error
 }
 
 // Import implements types.Importer.Import.
@@ -104,6 +151,17 @@ func (i *importer) Import(path string) (*types.Package, error) {
 		// analyzers are specifically looking for this.
 		return types.Unsafe, nil
 	}
+
+	// Call the internal callback. This is used to resolve loading order
+	// for the standard library. See checkStdlib.
+	if i.callback != nil {
+		if err := i.callback(path); err != nil {
+			i.lastErr = err
+			return nil, err
+		}
+	}
+
+	// Actually load the data.
 	realPath, ok := i.ImportMap[path]
 	var (
 		rc  io.ReadCloser
@@ -112,7 +170,7 @@ func (i *importer) Import(path string) (*types.Package, error) {
 	if !ok {
 		// Not found in the import path. Attempt to find the package
 		// via the standard library.
-		rc, err = i.findStdPkg(path)
+		rc, err = findStdPkg(i.GOOS, i.GOARCH, path)
 	} else {
 		// Open the file.
 		rc, err = os.Open(realPath)
@@ -135,6 +193,139 @@ func (i *importer) Import(path string) (*types.Package, error) {
 // ErrSkip indicates the package should be skipped.
 var ErrSkip = errors.New("skipped")
 
+// checkStdlib checks the standard library.
+//
+// This constructs a synthetic package configuration for each library in the
+// standard library sources, and call checkPackage repeatedly.
+//
+// Note that not all parts of the source are expected to build. We skip obvious
+// test files, and cmd files, which should not be dependencies.
+func checkStdlib(config *stdlibConfig) ([]string, error) {
+	if len(config.Srcs) == 0 {
+		return nil, nil
+	}
+
+	// Ensure all paths are normalized.
+	for i := 0; i < len(config.Srcs); i++ {
+		config.Srcs[i] = path.Clean(config.Srcs[i])
+	}
+
+	// Calculate the root directory.
+	longestPrefix := path.Dir(config.Srcs[0])
+	for _, file := range config.Srcs[1:] {
+		for i := 0; i < len(file) && i < len(longestPrefix); i++ {
+			if file[i] != longestPrefix[i] {
+				// Truncate here; will stop the loop.
+				longestPrefix = longestPrefix[:i]
+				break
+			}
+		}
+	}
+	if len(longestPrefix) > 0 && longestPrefix[len(longestPrefix)-1] != '/' {
+		longestPrefix += "/"
+	}
+
+	// Aggregate all files by directory.
+	packages := make(map[string]*packageConfig)
+	for _, file := range config.Srcs {
+		d := path.Dir(file)
+		if len(longestPrefix) >= len(d) {
+			continue // Not a file.
+		}
+		pkg := path.Dir(file)[len(longestPrefix):]
+		// Skip cmd packages and obvious test files: see above.
+		if strings.HasPrefix(pkg, "cmd/") || strings.HasSuffix(file, "_test.go") {
+			continue
+		}
+		c, ok := packages[pkg]
+		if !ok {
+			c = &packageConfig{
+				ImportPath: pkg,
+				GOOS:       config.GOOS,
+				GOARCH:     config.GOARCH,
+				Tags:       config.Tags,
+			}
+			packages[pkg] = c
+		}
+		// Add the files appropriately. Note that they will be further
+		// filtered by architecture and build tags below, so this need
+		// not be done immediately.
+		if strings.HasSuffix(file, ".go") {
+			c.GoFiles = append(c.GoFiles, file)
+		} else {
+			c.NonGoFiles = append(c.NonGoFiles, file)
+		}
+	}
+
+	// Closure to check a single package.
+	allFindings := make([]string, 0)
+	stdlibFacts := make(map[string][]byte)
+	var checkOne func(pkg string) error // Recursive.
+	checkOne = func(pkg string) error {
+		// Is this already done?
+		if _, ok := stdlibFacts[pkg]; ok {
+			return nil
+		}
+
+		// Lookup the configuration.
+		config, ok := packages[pkg]
+		if !ok {
+			return nil // Not known.
+		}
+
+		// Find the binary package, and provide to objdump.
+		rc, err := findStdPkg(config.GOOS, config.GOARCH, pkg)
+		if err != nil {
+			// If there's no binary for this package, it is likely
+			// not built with the distribution. That's fine, we can
+			// just skip analysis.
+			return nil
+		}
+
+		// Provide the input.
+		oldReader := dump.Reader
+		dump.Reader = rc // For analysis.
+		defer func() {
+			rc.Close()
+			dump.Reader = oldReader // Restore.
+		}()
+
+		// Run the analysis.
+		findings, err := checkPackage(config, func(factData []byte) error {
+			stdlibFacts[pkg] = factData
+			return nil
+		}, checkOne)
+		if err != nil {
+			// If we can't analyze a package from the standard library,
+			// then we skip it. It will simply not have any findings.
+			return nil
+		}
+		allFindings = append(allFindings, findings...)
+		return nil
+	}
+
+	// Check all packages.
+	//
+	// Note that this may call checkOne recursively, so it's not guaranteed
+	// to evaluate in the order provided here. We do ensure however, that
+	// all packages are evaluated.
+	for pkg := range packages {
+		checkOne(pkg)
+	}
+
+	// Write out all findings.
+	factData, err := json.Marshal(stdlibFacts)
+	if err != nil {
+		return nil, fmt.Errorf("error saving stdlib facts: %w", err)
+	}
+	if err := ioutil.WriteFile(config.FactOutput, factData, 0644); err != nil {
+		return nil, fmt.Errorf("error saving findings to %q: %v", config.FactOutput, err)
+	}
+
+	// Return all findings.
+	return allFindings, nil
+}
+
 // checkPackage runs all analyzers.
 //
 // The implementation was adapted from [1], which was in turn adpated from [2].
@@ -143,11 +334,12 @@ var ErrSkip = errors.New("skipped")
 //
 // [1] bazelbuid/rules_go/tools/builders/nogo_main.go
 // [2] golang.org/x/tools/go/checker/internal/checker
-func checkPackage(config pkgConfig) ([]string, error) {
+func checkPackage(config *packageConfig, factSaver saver, importCallback func(string) error) ([]string, error) {
 	imp := &importer{
-		pkgConfig: config,
-		fset:      token.NewFileSet(),
-		cache:     make(map[string]*types.Package),
+		packageConfig: config,
+		fset:          token.NewFileSet(),
+		cache:         make(map[string]*types.Package),
+		callback:      importCallback,
 	}
 
 	// Load all source files.
@@ -184,14 +376,15 @@ func checkPackage(config pkgConfig) ([]string, error) {
 	}
 
 	// Load all package facts.
-	facts, err := facts.Decode(types, config.loadFacts)
+	loader, err := config.factLoader()
+	if err != nil {
+		return nil, fmt.Errorf("error loading facts: %w", err)
+	}
+	facts, err := facts.Decode(types, loader)
 	if err != nil {
 		return nil, fmt.Errorf("error decoding facts: %w", err)
 	}
 
-	// Set the binary global for use.
-	data.Objdump = config.Objdump
-
 	// Register fact types and establish dependencies between analyzers.
 	// The visit closure will execute recursively, and populate results
 	// will all required analysis results.
@@ -263,11 +456,9 @@ func checkPackage(config pkgConfig) ([]string, error) {
 	}
 
 	// Write the output file.
-	if config.FactOutput != "" {
-		factData := facts.Encode()
-		if err := ioutil.WriteFile(config.FactOutput, factData, 0644); err != nil {
-			return nil, fmt.Errorf("error: unable to open facts output %q: %v", config.FactOutput, err)
-		}
+	factData := facts.Encode()
+	if err := factSaver(factData); err != nil {
+		return nil, fmt.Errorf("error: unable to save facts: %v", err)
 	}
 
 	// Convert all diagnostics to strings.
@@ -284,38 +475,56 @@ func checkPackage(config pkgConfig) ([]string, error) {
 }
 
 var (
-	configFile = flag.String("config", "", "configuration file (in JSON format)")
+	packageFile = flag.String("package", "", "package configuration file (in JSON format)")
+	stdlibFile  = flag.String("stdlib", "", "stdlib configuration file (in JSON format)")
 )
 
-// Main is the entrypoint; it should be called directly from main.
-//
-// N.B. This package registers it's own flags.
-func Main() {
-	// Parse all flags.
-	flag.Parse()
-
+func loadConfig(file string, config interface{}) interface{} {
 	// Load the configuration.
-	f, err := os.Open(*configFile)
+	f, err := os.Open(file)
 	if err != nil {
-		log.Fatalf("unable to open configuration %q: %v", *configFile, err)
+		log.Fatalf("unable to open configuration %q: %v", file, err)
 	}
 	defer f.Close()
-	config := new(pkgConfig)
 	dec := json.NewDecoder(f)
 	dec.DisallowUnknownFields()
 	if err := dec.Decode(config); err != nil {
 		log.Fatalf("unable to decode configuration: %v", err)
 	}
+	return config
+}
 
-	// Process the package.
-	findings, err := checkPackage(*config)
+// Main is the entrypoint; it should be called directly from main.
+//
+// N.B. This package registers it's own flags.
+func Main() {
+	// Parse all flags.
+	flag.Parse()
+
+	var (
+		findings []string
+		err      error
+	)
+
+	// Check the configuration.
+	if *packageFile != "" && *stdlibFile != "" {
+		log.Fatalf("unable to perform stdlib and package analysis; provide only one!")
+	} else if *stdlibFile != "" {
+		c := loadConfig(*stdlibFile, new(stdlibConfig)).(*stdlibConfig)
+		findings, err = checkStdlib(c)
+	} else if *packageFile != "" {
+		c := loadConfig(*packageFile, new(packageConfig)).(*packageConfig)
+		findings, err = checkPackage(c, c.factSaver, nil)
+	} else {
+		log.Fatalf("please provide at least one of package or stdlib!")
+	}
+
+	// Handle findings & errors.
 	if err != nil {
 		log.Fatalf("error checking package: %v", err)
 	}
-
-	// No findings?
 	if len(findings) == 0 {
-		os.Exit(0)
+		return
 	}
 
 	// Print findings and exit with non-zero code.

From 81d6499848783d79989f2a0280accfcfc9753378 Mon Sep 17 00:00:00 2001
From: makocchi-git <makocchi@gmail.com>
Date: Fri, 21 Aug 2020 13:48:49 +0900
Subject: [PATCH 092/211] use is-active instead of status

---
 debian/postinst.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/debian/postinst.sh b/debian/postinst.sh
index d1e28e17bf..6a326f823a 100755
--- a/debian/postinst.sh
+++ b/debian/postinst.sh
@@ -21,7 +21,7 @@ fi
 # Update docker configuration.
 if [ -f /etc/docker/daemon.json ]; then
   runsc install
-  if systemctl status docker 2>/dev/null; then
+  if systemctl is-active -q docker; then
     systemctl restart docker || echo "unable to restart docker; you must do so manually." >&2
   fi
 fi

From dd8b3ffcb8eb7f7867dbea2c721f7fb7d0ec0342 Mon Sep 17 00:00:00 2001
From: Bin Lu <bin.lu@arm.com>
Date: Mon, 24 Aug 2020 22:40:20 -0400
Subject: [PATCH 093/211] Device major number greater than 2 digits in
 /proc/self/maps on arm64 N1 machine

Signed-off-by: Bin Lu <bin.lu@arm.com>
---
 pkg/sentry/platform/kvm/virtual_map.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pkg/sentry/platform/kvm/virtual_map.go b/pkg/sentry/platform/kvm/virtual_map.go
index c8897d34f7..4dcdbf8a7d 100644
--- a/pkg/sentry/platform/kvm/virtual_map.go
+++ b/pkg/sentry/platform/kvm/virtual_map.go
@@ -34,7 +34,7 @@ type virtualRegion struct {
 }
 
 // mapsLine matches a single line from /proc/PID/maps.
-var mapsLine = regexp.MustCompile("([0-9a-f]+)-([0-9a-f]+) ([r-][w-][x-][sp]) ([0-9a-f]+) [0-9a-f]{2}:[0-9a-f]{2,} [0-9]+\\s+(.*)")
+var mapsLine = regexp.MustCompile("([0-9a-f]+)-([0-9a-f]+) ([r-][w-][x-][sp]) ([0-9a-f]+) [0-9a-f]{2,3}:[0-9a-f]{2,} [0-9]+\\s+(.*)")
 
 // excludeRegion returns true if these regions should be excluded from the
 // physical map. Virtual regions need to be excluded if get_user_pages will

From 4705782bf39e7202a5fd66a966fac94baf36492b Mon Sep 17 00:00:00 2001
From: Fabricio Voznika <fvoznika@google.com>
Date: Wed, 26 Aug 2020 20:22:39 -0700
Subject: [PATCH 094/211] Make flag propagation automatic

Use reflection and tags to provide automatic conversion from
Config to flags. This makes adding new flags less error-prone,
skips flags using default values (easier to read), and makes
tests correctly use default flag values for test Configs.

Updates #3494

PiperOrigin-RevId: 328662070
---
 pkg/refs/refcounter.go          |  33 +++
 pkg/sentry/watchdog/watchdog.go |  28 +-
 pkg/test/testutil/testutil.go   |  31 ++-
 runsc/boot/loader_test.go       |  20 +-
 runsc/boot/strace.go            |   4 +-
 runsc/config/BUILD              |  15 +-
 runsc/config/config.go          | 443 ++++++++++++++------------------
 runsc/config/config_test.go     | 185 +++++++++++++
 runsc/config/flags.go           | 168 ++++++++++++
 runsc/flag/flag.go              |  14 +-
 runsc/main.go                   | 170 ++----------
 runsc/sandbox/network.go        |   2 +-
 12 files changed, 691 insertions(+), 422 deletions(-)
 create mode 100644 runsc/config/config_test.go
 create mode 100644 runsc/config/flags.go

diff --git a/pkg/refs/refcounter.go b/pkg/refs/refcounter.go
index d9d5e6bcb9..57d8542b96 100644
--- a/pkg/refs/refcounter.go
+++ b/pkg/refs/refcounter.go
@@ -234,6 +234,39 @@ const (
 	LeaksLogTraces
 )
 
+// Set implements flag.Value.
+func (l *LeakMode) Set(v string) error {
+	switch v {
+	case "disabled":
+		*l = NoLeakChecking
+	case "log-names":
+		*l = LeaksLogWarning
+	case "log-traces":
+		*l = LeaksLogTraces
+	default:
+		return fmt.Errorf("invalid ref leak mode %q", v)
+	}
+	return nil
+}
+
+// Get implements flag.Value.
+func (l *LeakMode) Get() interface{} {
+	return *l
+}
+
+// String implements flag.Value.
+func (l *LeakMode) String() string {
+	switch *l {
+	case NoLeakChecking:
+		return "disabled"
+	case LeaksLogWarning:
+		return "log-names"
+	case LeaksLogTraces:
+		return "log-traces"
+	}
+	panic(fmt.Sprintf("invalid ref leak mode %q", *l))
+}
+
 // leakMode stores the current mode for the reference leak checker.
 //
 // Values must be one of the LeakMode values.
diff --git a/pkg/sentry/watchdog/watchdog.go b/pkg/sentry/watchdog/watchdog.go
index 7482733662..bbafb8b7fd 100644
--- a/pkg/sentry/watchdog/watchdog.go
+++ b/pkg/sentry/watchdog/watchdog.go
@@ -96,15 +96,33 @@ const (
 	Panic
 )
 
+// Set implements flag.Value.
+func (a *Action) Set(v string) error {
+	switch v {
+	case "log", "logwarning":
+		*a = LogWarning
+	case "panic":
+		*a = Panic
+	default:
+		return fmt.Errorf("invalid watchdog action %q", v)
+	}
+	return nil
+}
+
+// Get implements flag.Value.
+func (a *Action) Get() interface{} {
+	return *a
+}
+
 // String returns Action's string representation.
-func (a Action) String() string {
-	switch a {
+func (a *Action) String() string {
+	switch *a {
 	case LogWarning:
-		return "LogWarning"
+		return "logWarning"
 	case Panic:
-		return "Panic"
+		return "panic"
 	default:
-		panic(fmt.Sprintf("Invalid action: %d", a))
+		panic(fmt.Sprintf("Invalid watchdog action: %d", *a))
 	}
 }
 
diff --git a/pkg/test/testutil/testutil.go b/pkg/test/testutil/testutil.go
index 42d79f5c2a..b7f873392d 100644
--- a/pkg/test/testutil/testutil.go
+++ b/pkg/test/testutil/testutil.go
@@ -138,20 +138,23 @@ func TestConfig(t *testing.T) *config.Config {
 	if dir, ok := os.LookupEnv("TEST_UNDECLARED_OUTPUTS_DIR"); ok {
 		logDir = dir + "/"
 	}
-	return &config.Config{
-		Debug:              true,
-		DebugLog:           path.Join(logDir, "runsc.log."+t.Name()+".%TIMESTAMP%.%COMMAND%"),
-		LogFormat:          "text",
-		DebugLogFormat:     "text",
-		LogPackets:         true,
-		Network:            config.NetworkNone,
-		Strace:             true,
-		Platform:           "ptrace",
-		FileAccess:         config.FileAccessExclusive,
-		NumNetworkChannels: 1,
-
-		TestOnlyAllowRunAsCurrentUserWithoutChroot: true,
-	}
+
+	// Only register flags if config is being used. Otherwise anyone that uses
+	// testutil will get flags registered and they may conflict.
+	config.RegisterFlags()
+
+	conf, err := config.NewFromFlags()
+	if err != nil {
+		panic(err)
+	}
+	// Change test defaults.
+	conf.Debug = true
+	conf.DebugLog = path.Join(logDir, "runsc.log."+t.Name()+".%TIMESTAMP%.%COMMAND%")
+	conf.LogPackets = true
+	conf.Network = config.NetworkNone
+	conf.Strace = true
+	conf.TestOnlyAllowRunAsCurrentUserWithoutChroot = true
+	return conf
 }
 
 // NewSpecWithArgs creates a simple spec with the given args suitable for use
diff --git a/runsc/boot/loader_test.go b/runsc/boot/loader_test.go
index 03cbaec333..2343ce76cc 100644
--- a/runsc/boot/loader_test.go
+++ b/runsc/boot/loader_test.go
@@ -44,15 +44,19 @@ func init() {
 	if err := fsgofer.OpenProcSelfFD(); err != nil {
 		panic(err)
 	}
+	config.RegisterFlags()
 }
 
 func testConfig() *config.Config {
-	return &config.Config{
-		RootDir:        "unused_root_dir",
-		Network:        config.NetworkNone,
-		DisableSeccomp: true,
-		Platform:       "ptrace",
+	conf, err := config.NewFromFlags()
+	if err != nil {
+		panic(err)
 	}
+	// Change test defaults.
+	conf.RootDir = "unused_root_dir"
+	conf.Network = config.NetworkNone
+	conf.DisableSeccomp = true
+	return conf
 }
 
 // testSpec returns a simple spec that can be used in tests.
@@ -546,7 +550,7 @@ func TestRestoreEnvironment(t *testing.T) {
 						{
 							Dev:        "9pfs-/",
 							Flags:      fs.MountSourceFlags{ReadOnly: true},
-							DataString: "trans=fd,rfdno=0,wfdno=0,privateunixsocket=true,cache=remote_revalidating",
+							DataString: "trans=fd,rfdno=0,wfdno=0,privateunixsocket=true",
 						},
 					},
 					"tmpfs": {
@@ -600,7 +604,7 @@ func TestRestoreEnvironment(t *testing.T) {
 						{
 							Dev:        "9pfs-/",
 							Flags:      fs.MountSourceFlags{ReadOnly: true},
-							DataString: "trans=fd,rfdno=0,wfdno=0,privateunixsocket=true,cache=remote_revalidating",
+							DataString: "trans=fd,rfdno=0,wfdno=0,privateunixsocket=true",
 						},
 						{
 							Dev:        "9pfs-/dev/fd-foo",
@@ -658,7 +662,7 @@ func TestRestoreEnvironment(t *testing.T) {
 						{
 							Dev:        "9pfs-/",
 							Flags:      fs.MountSourceFlags{ReadOnly: true},
-							DataString: "trans=fd,rfdno=0,wfdno=0,privateunixsocket=true,cache=remote_revalidating",
+							DataString: "trans=fd,rfdno=0,wfdno=0,privateunixsocket=true",
 						},
 					},
 					"tmpfs": {
diff --git a/runsc/boot/strace.go b/runsc/boot/strace.go
index 176981f741..c21648a32d 100644
--- a/runsc/boot/strace.go
+++ b/runsc/boot/strace.go
@@ -15,6 +15,8 @@
 package boot
 
 import (
+	"strings"
+
 	"gvisor.dev/gvisor/pkg/sentry/strace"
 	"gvisor.dev/gvisor/runsc/config"
 )
@@ -37,5 +39,5 @@ func enableStrace(conf *config.Config) error {
 		strace.EnableAll(strace.SinkTypeLog)
 		return nil
 	}
-	return strace.Enable(conf.StraceSyscalls, strace.SinkTypeLog)
+	return strace.Enable(strings.Split(conf.StraceSyscalls, ","), strace.SinkTypeLog)
 }
diff --git a/runsc/config/BUILD b/runsc/config/BUILD
index 3c8713d535..b1672bb9d6 100644
--- a/runsc/config/BUILD
+++ b/runsc/config/BUILD
@@ -1,4 +1,4 @@
-load("//tools:defs.bzl", "go_library")
+load("//tools:defs.bzl", "go_library", "go_test")
 
 package(licenses = ["notice"])
 
@@ -6,10 +6,23 @@ go_library(
     name = "config",
     srcs = [
         "config.go",
+        "flags.go",
     ],
     visibility = ["//:sandbox"],
     deps = [
         "//pkg/refs",
         "//pkg/sentry/watchdog",
+        "//pkg/sync",
+        "//runsc/flag",
     ],
 )
+
+go_test(
+    name = "config_test",
+    size = "small",
+    srcs = [
+        "config_test.go",
+    ],
+    library = ":config",
+    deps = ["//runsc/flag"],
+)
diff --git a/runsc/config/config.go b/runsc/config/config.go
index 8cf0378d5b..bca27ebf1a 100644
--- a/runsc/config/config.go
+++ b/runsc/config/config.go
@@ -19,254 +19,105 @@ package config
 
 import (
 	"fmt"
-	"strconv"
-	"strings"
 
 	"gvisor.dev/gvisor/pkg/refs"
 	"gvisor.dev/gvisor/pkg/sentry/watchdog"
 )
 
-// FileAccessType tells how the filesystem is accessed.
-type FileAccessType int
-
-const (
-	// FileAccessShared sends IO requests to a Gofer process that validates the
-	// requests and forwards them to the host.
-	FileAccessShared FileAccessType = iota
-
-	// FileAccessExclusive is the same as FileAccessShared, but enables
-	// extra caching for improved performance. It should only be used if
-	// the sandbox has exclusive access to the filesystem.
-	FileAccessExclusive
-)
-
-// MakeFileAccessType converts type from string.
-func MakeFileAccessType(s string) (FileAccessType, error) {
-	switch s {
-	case "shared":
-		return FileAccessShared, nil
-	case "exclusive":
-		return FileAccessExclusive, nil
-	default:
-		return 0, fmt.Errorf("invalid file access type %q", s)
-	}
-}
-
-func (f FileAccessType) String() string {
-	switch f {
-	case FileAccessShared:
-		return "shared"
-	case FileAccessExclusive:
-		return "exclusive"
-	default:
-		return fmt.Sprintf("unknown(%d)", f)
-	}
-}
-
-// NetworkType tells which network stack to use.
-type NetworkType int
-
-const (
-	// NetworkSandbox uses internal network stack, isolated from the host.
-	NetworkSandbox NetworkType = iota
-
-	// NetworkHost redirects network related syscalls to the host network.
-	NetworkHost
-
-	// NetworkNone sets up just loopback using netstack.
-	NetworkNone
-)
-
-// MakeNetworkType converts type from string.
-func MakeNetworkType(s string) (NetworkType, error) {
-	switch s {
-	case "sandbox":
-		return NetworkSandbox, nil
-	case "host":
-		return NetworkHost, nil
-	case "none":
-		return NetworkNone, nil
-	default:
-		return 0, fmt.Errorf("invalid network type %q", s)
-	}
-}
-
-func (n NetworkType) String() string {
-	switch n {
-	case NetworkSandbox:
-		return "sandbox"
-	case NetworkHost:
-		return "host"
-	case NetworkNone:
-		return "none"
-	default:
-		return fmt.Sprintf("unknown(%d)", n)
-	}
-}
-
-// MakeWatchdogAction converts type from string.
-func MakeWatchdogAction(s string) (watchdog.Action, error) {
-	switch strings.ToLower(s) {
-	case "log", "logwarning":
-		return watchdog.LogWarning, nil
-	case "panic":
-		return watchdog.Panic, nil
-	default:
-		return 0, fmt.Errorf("invalid watchdog action %q", s)
-	}
-}
-
-// MakeRefsLeakMode converts type from string.
-func MakeRefsLeakMode(s string) (refs.LeakMode, error) {
-	switch strings.ToLower(s) {
-	case "disabled":
-		return refs.NoLeakChecking, nil
-	case "log-names":
-		return refs.LeaksLogWarning, nil
-	case "log-traces":
-		return refs.LeaksLogTraces, nil
-	default:
-		return 0, fmt.Errorf("invalid refs leakmode %q", s)
-	}
-}
-
-func refsLeakModeToString(mode refs.LeakMode) string {
-	switch mode {
-	// If not set, default it to disabled.
-	case refs.UninitializedLeakChecking, refs.NoLeakChecking:
-		return "disabled"
-	case refs.LeaksLogWarning:
-		return "log-names"
-	case refs.LeaksLogTraces:
-		return "log-traces"
-	default:
-		panic(fmt.Sprintf("Invalid leakmode: %d", mode))
-	}
-}
-
-// QueueingDiscipline is used to specify the kind of Queueing Discipline to
-// apply for a give FDBasedLink.
-type QueueingDiscipline int
-
-const (
-	// QDiscNone disables any queueing for the underlying FD.
-	QDiscNone QueueingDiscipline = iota
-
-	// QDiscFIFO applies a simple fifo based queue to the underlying
-	// FD.
-	QDiscFIFO
-)
-
-// MakeQueueingDiscipline if possible the equivalent QueuingDiscipline for s
-// else returns an error.
-func MakeQueueingDiscipline(s string) (QueueingDiscipline, error) {
-	switch s {
-	case "none":
-		return QDiscNone, nil
-	case "fifo":
-		return QDiscFIFO, nil
-	default:
-		return 0, fmt.Errorf("unsupported qdisc specified: %q", s)
-	}
-}
-
-// String implements fmt.Stringer.
-func (q QueueingDiscipline) String() string {
-	switch q {
-	case QDiscNone:
-		return "none"
-	case QDiscFIFO:
-		return "fifo"
-	default:
-		panic(fmt.Sprintf("Invalid queueing discipline: %d", q))
-	}
-}
-
 // Config holds configuration that is not part of the runtime spec.
+//
+// Follow these steps to add a new flag:
+//   1. Create a new field in Config.
+//   2. Add a field tag with the flag name
+//   3. Register a new flag in flags.go, with name and description
+//   4. Add any necessary validation into validate()
+//   5. If adding an enum, follow the same pattern as FileAccessType
+//
 type Config struct {
 	// RootDir is the runtime root directory.
-	RootDir string
+	RootDir string `flag:"root"`
 
 	// Debug indicates that debug logging should be enabled.
-	Debug bool
+	Debug bool `flag:"debug"`
 
 	// LogFilename is the filename to log to, if not empty.
-	LogFilename string
+	LogFilename string `flag:"log"`
 
 	// LogFormat is the log format.
-	LogFormat string
+	LogFormat string `flag:"log-format"`
 
 	// DebugLog is the path to log debug information to, if not empty.
-	DebugLog string
+	DebugLog string `flag:"debug-log"`
 
 	// PanicLog is the path to log GO's runtime messages, if not empty.
-	PanicLog string
+	PanicLog string `flag:"panic-log"`
 
 	// DebugLogFormat is the log format for debug.
-	DebugLogFormat string
+	DebugLogFormat string `flag:"debug-log-format"`
 
 	// FileAccess indicates how the filesystem is accessed.
-	FileAccess FileAccessType
+	FileAccess FileAccessType `flag:"file-access"`
 
 	// Overlay is whether to wrap the root filesystem in an overlay.
-	Overlay bool
+	Overlay bool `flag:"overlay"`
 
 	// FSGoferHostUDS enables the gofer to mount a host UDS.
-	FSGoferHostUDS bool
+	FSGoferHostUDS bool `flag:"fsgofer-host-uds"`
 
 	// Network indicates what type of network to use.
-	Network NetworkType
+	Network NetworkType `flag:"network"`
 
 	// EnableRaw indicates whether raw sockets should be enabled. Raw
 	// sockets are disabled by stripping CAP_NET_RAW from the list of
 	// capabilities.
-	EnableRaw bool
+	EnableRaw bool `flag:"net-raw"`
 
 	// HardwareGSO indicates that hardware segmentation offload is enabled.
-	HardwareGSO bool
+	HardwareGSO bool `flag:"gso"`
 
 	// SoftwareGSO indicates that software segmentation offload is enabled.
-	SoftwareGSO bool
+	SoftwareGSO bool `flag:"software-gso"`
 
 	// TXChecksumOffload indicates that TX Checksum Offload is enabled.
-	TXChecksumOffload bool
+	TXChecksumOffload bool `flag:"tx-checksum-offload"`
 
 	// RXChecksumOffload indicates that RX Checksum Offload is enabled.
-	RXChecksumOffload bool
+	RXChecksumOffload bool `flag:"rx-checksum-offload"`
 
 	// QDisc indicates the type of queuening discipline to use by default
 	// for non-loopback interfaces.
-	QDisc QueueingDiscipline
+	QDisc QueueingDiscipline `flag:"qdisc"`
 
 	// LogPackets indicates that all network packets should be logged.
-	LogPackets bool
+	LogPackets bool `flag:"log-packets"`
 
 	// Platform is the platform to run on.
-	Platform string
+	Platform string `flag:"platform"`
 
 	// Strace indicates that strace should be enabled.
-	Strace bool
+	Strace bool `flag:"strace"`
 
-	// StraceSyscalls is the set of syscalls to trace.  If StraceEnable is
-	// true and this list is empty, then all syscalls will be traced.
-	StraceSyscalls []string
+	// StraceSyscalls is the set of syscalls to trace (comma-separated values).
+	// If StraceEnable is true and this string is empty, then all syscalls will
+	// be traced.
+	StraceSyscalls string `flag:"strace-syscalls"`
 
 	// StraceLogSize is the max size of data blobs to display.
-	StraceLogSize uint
+	StraceLogSize uint `flag:"strace-log-size"`
 
 	// DisableSeccomp indicates whether seccomp syscall filters should be
 	// disabled. Pardon the double negation, but default to enabled is important.
 	DisableSeccomp bool
 
 	// WatchdogAction sets what action the watchdog takes when triggered.
-	WatchdogAction watchdog.Action
+	WatchdogAction watchdog.Action `flag:"watchdog-action"`
 
 	// PanicSignal registers signal handling that panics. Usually set to
 	// SIGUSR2(12) to troubleshoot hangs. -1 disables it.
-	PanicSignal int
+	PanicSignal int `flag:"panic-signal"`
 
 	// ProfileEnable is set to prepare the sandbox to be profiled.
-	ProfileEnable bool
+	ProfileEnable bool `flag:"profile"`
 
 	// RestoreFile is the path to the saved container image
 	RestoreFile string
@@ -274,105 +125,209 @@ type Config struct {
 	// NumNetworkChannels controls the number of AF_PACKET sockets that map
 	// to the same underlying network device. This allows netstack to better
 	// scale for high throughput use cases.
-	NumNetworkChannels int
+	NumNetworkChannels int `flag:"num-network-channels"`
 
 	// Rootless allows the sandbox to be started with a user that is not root.
 	// Defense is depth measures are weaker with rootless. Specifically, the
 	// sandbox and Gofer process run as root inside a user namespace with root
 	// mapped to the caller's user.
-	Rootless bool
+	Rootless bool `flag:"rootless"`
 
 	// AlsoLogToStderr allows to send log messages to stderr.
-	AlsoLogToStderr bool
+	AlsoLogToStderr bool `flag:"alsologtostderr"`
 
 	// ReferenceLeakMode sets reference leak check mode
-	ReferenceLeakMode refs.LeakMode
+	ReferenceLeak refs.LeakMode `flag:"ref-leak-mode"`
 
 	// OverlayfsStaleRead instructs the sandbox to assume that the root mount
 	// is on a Linux overlayfs mount, which does not necessarily preserve
 	// coherence between read-only and subsequent writable file descriptors
 	// representing the "same" file.
-	OverlayfsStaleRead bool
+	OverlayfsStaleRead bool `flag:"overlayfs-stale-read"`
 
 	// CPUNumFromQuota sets CPU number count to available CPU quota, using
 	// least integer value greater than or equal to quota.
 	//
 	// E.g. 0.2 CPU quota will result in 1, and 1.9 in 2.
-	CPUNumFromQuota bool
+	CPUNumFromQuota bool `flag:"cpu-num-from-quota"`
 
-	// Enables VFS2 (not plumbed through yet).
-	VFS2 bool
+	// Enables VFS2.
+	VFS2 bool `flag:"vfs2"`
 
-	// Enables FUSE usage (not plumbed through yet).
-	FUSE bool
+	// Enables FUSE usage.
+	FUSE bool `flag:"fuse"`
 
 	// TestOnlyAllowRunAsCurrentUserWithoutChroot should only be used in
 	// tests. It allows runsc to start the sandbox process as the current
 	// user, and without chrooting the sandbox process. This can be
 	// necessary in test environments that have limited capabilities.
-	TestOnlyAllowRunAsCurrentUserWithoutChroot bool
+	TestOnlyAllowRunAsCurrentUserWithoutChroot bool `flag:"TESTONLY-unsafe-nonroot"`
 
 	// TestOnlyTestNameEnv should only be used in tests. It looks up for the
 	// test name in the container environment variables and adds it to the debug
 	// log file name. This is done to help identify the log with the test when
 	// multiple tests are run in parallel, since there is no way to pass
 	// parameters to the runtime from docker.
-	TestOnlyTestNameEnv string
+	TestOnlyTestNameEnv string `flag:"TESTONLY-test-name-env"`
+}
+
+func (c *Config) validate() error {
+	if c.FileAccess == FileAccessShared && c.Overlay {
+		return fmt.Errorf("overlay flag is incompatible with shared file access")
+	}
+	if c.NumNetworkChannels <= 0 {
+		return fmt.Errorf("num_network_channels must be > 0, got: %d", c.NumNetworkChannels)
+	}
+	return nil
 }
 
-// ToFlags returns a slice of flags that correspond to the given Config.
-func (c *Config) ToFlags() []string {
-	f := []string{
-		"--root=" + c.RootDir,
-		"--debug=" + strconv.FormatBool(c.Debug),
-		"--log=" + c.LogFilename,
-		"--log-format=" + c.LogFormat,
-		"--debug-log=" + c.DebugLog,
-		"--panic-log=" + c.PanicLog,
-		"--debug-log-format=" + c.DebugLogFormat,
-		"--file-access=" + c.FileAccess.String(),
-		"--overlay=" + strconv.FormatBool(c.Overlay),
-		"--fsgofer-host-uds=" + strconv.FormatBool(c.FSGoferHostUDS),
-		"--network=" + c.Network.String(),
-		"--log-packets=" + strconv.FormatBool(c.LogPackets),
-		"--platform=" + c.Platform,
-		"--strace=" + strconv.FormatBool(c.Strace),
-		"--strace-syscalls=" + strings.Join(c.StraceSyscalls, ","),
-		"--strace-log-size=" + strconv.Itoa(int(c.StraceLogSize)),
-		"--watchdog-action=" + c.WatchdogAction.String(),
-		"--panic-signal=" + strconv.Itoa(c.PanicSignal),
-		"--profile=" + strconv.FormatBool(c.ProfileEnable),
-		"--net-raw=" + strconv.FormatBool(c.EnableRaw),
-		"--num-network-channels=" + strconv.Itoa(c.NumNetworkChannels),
-		"--rootless=" + strconv.FormatBool(c.Rootless),
-		"--alsologtostderr=" + strconv.FormatBool(c.AlsoLogToStderr),
-		"--ref-leak-mode=" + refsLeakModeToString(c.ReferenceLeakMode),
-		"--gso=" + strconv.FormatBool(c.HardwareGSO),
-		"--software-gso=" + strconv.FormatBool(c.SoftwareGSO),
-		"--rx-checksum-offload=" + strconv.FormatBool(c.RXChecksumOffload),
-		"--tx-checksum-offload=" + strconv.FormatBool(c.TXChecksumOffload),
-		"--overlayfs-stale-read=" + strconv.FormatBool(c.OverlayfsStaleRead),
-		"--qdisc=" + c.QDisc.String(),
-		"--vfs2=" + strconv.FormatBool(c.VFS2),
-		"--fuse=" + strconv.FormatBool(c.FUSE),
+// FileAccessType tells how the filesystem is accessed.
+type FileAccessType int
+
+const (
+	// FileAccessExclusive is the same as FileAccessShared, but enables
+	// extra caching for improved performance. It should only be used if
+	// the sandbox has exclusive access to the filesystem.
+	FileAccessExclusive FileAccessType = iota
+
+	// FileAccessShared sends IO requests to a Gofer process that validates the
+	// requests and forwards them to the host.
+	FileAccessShared
+)
+
+func fileAccessTypePtr(v FileAccessType) *FileAccessType {
+	return &v
+}
+
+// Set implements flag.Value.
+func (f *FileAccessType) Set(v string) error {
+	switch v {
+	case "shared":
+		*f = FileAccessShared
+	case "exclusive":
+		*f = FileAccessExclusive
+	default:
+		return fmt.Errorf("invalid file access type %q", v)
 	}
-	if c.CPUNumFromQuota {
-		f = append(f, "--cpu-num-from-quota")
+	return nil
+}
+
+// Get implements flag.Value.
+func (f *FileAccessType) Get() interface{} {
+	return *f
+}
+
+// String implements flag.Value.
+func (f *FileAccessType) String() string {
+	switch *f {
+	case FileAccessShared:
+		return "shared"
+	case FileAccessExclusive:
+		return "exclusive"
 	}
-	if c.VFS2 {
-		f = append(f, "--vfs2=true")
+	panic(fmt.Sprintf("Invalid file access type %v", *f))
+}
+
+// NetworkType tells which network stack to use.
+type NetworkType int
+
+const (
+	// NetworkSandbox uses internal network stack, isolated from the host.
+	NetworkSandbox NetworkType = iota
+
+	// NetworkHost redirects network related syscalls to the host network.
+	NetworkHost
+
+	// NetworkNone sets up just loopback using netstack.
+	NetworkNone
+)
+
+func networkTypePtr(v NetworkType) *NetworkType {
+	return &v
+}
+
+// Set implements flag.Value.
+func (n *NetworkType) Set(v string) error {
+	switch v {
+	case "sandbox":
+		*n = NetworkSandbox
+	case "host":
+		*n = NetworkHost
+	case "none":
+		*n = NetworkNone
+	default:
+		return fmt.Errorf("invalid network type %q", v)
 	}
-	if c.FUSE {
-		f = append(f, "--fuse=true")
+	return nil
+}
+
+// Get implements flag.Value.
+func (n *NetworkType) Get() interface{} {
+	return *n
+}
+
+// String implements flag.Value.
+func (n *NetworkType) String() string {
+	switch *n {
+	case NetworkSandbox:
+		return "sandbox"
+	case NetworkHost:
+		return "host"
+	case NetworkNone:
+		return "none"
 	}
+	panic(fmt.Sprintf("Invalid network type %v", *n))
+}
+
+// QueueingDiscipline is used to specify the kind of Queueing Discipline to
+// apply for a give FDBasedLink.
+type QueueingDiscipline int
 
-	// Only include these if set since it is never to be used by users.
-	if c.TestOnlyAllowRunAsCurrentUserWithoutChroot {
-		f = append(f, "--TESTONLY-unsafe-nonroot=true")
+const (
+	// QDiscNone disables any queueing for the underlying FD.
+	QDiscNone QueueingDiscipline = iota
+
+	// QDiscFIFO applies a simple fifo based queue to the underlying FD.
+	QDiscFIFO
+)
+
+func queueingDisciplinePtr(v QueueingDiscipline) *QueueingDiscipline {
+	return &v
+}
+
+// Set implements flag.Value.
+func (q *QueueingDiscipline) Set(v string) error {
+	switch v {
+	case "none":
+		*q = QDiscNone
+	case "fifo":
+		*q = QDiscFIFO
+	default:
+		return fmt.Errorf("invalid qdisc %q", v)
 	}
-	if len(c.TestOnlyTestNameEnv) != 0 {
-		f = append(f, "--TESTONLY-test-name-env="+c.TestOnlyTestNameEnv)
+	return nil
+}
+
+// Get implements flag.Value.
+func (q *QueueingDiscipline) Get() interface{} {
+	return *q
+}
+
+// String implements flag.Value.
+func (q *QueueingDiscipline) String() string {
+	switch *q {
+	case QDiscNone:
+		return "none"
+	case QDiscFIFO:
+		return "fifo"
 	}
+	panic(fmt.Sprintf("Invalid qdisc %v", *q))
+}
+
+func leakModePtr(v refs.LeakMode) *refs.LeakMode {
+	return &v
+}
 
-	return f
+func watchdogActionPtr(v watchdog.Action) *watchdog.Action {
+	return &v
 }
diff --git a/runsc/config/config_test.go b/runsc/config/config_test.go
new file mode 100644
index 0000000000..af7867a2aa
--- /dev/null
+++ b/runsc/config/config_test.go
@@ -0,0 +1,185 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package config
+
+import (
+	"strings"
+	"testing"
+
+	"gvisor.dev/gvisor/runsc/flag"
+)
+
+func init() {
+	RegisterFlags()
+}
+
+func TestDefault(t *testing.T) {
+	c, err := NewFromFlags()
+	if err != nil {
+		t.Fatal(err)
+	}
+	// "--root" is always set to something different than the default. Reset it
+	// to make it easier to test that default values do not generate flags.
+	c.RootDir = ""
+
+	// All defaults doesn't require setting flags.
+	flags := c.ToFlags()
+	if len(flags) > 0 {
+		t.Errorf("default flags not set correctly for: %s", flags)
+	}
+}
+
+func setDefault(name string) {
+	fl := flag.CommandLine.Lookup(name)
+	fl.Value.Set(fl.DefValue)
+}
+
+func TestFromFlags(t *testing.T) {
+	flag.CommandLine.Lookup("root").Value.Set("some-path")
+	flag.CommandLine.Lookup("debug").Value.Set("true")
+	flag.CommandLine.Lookup("num-network-channels").Value.Set("123")
+	flag.CommandLine.Lookup("network").Value.Set("none")
+	defer func() {
+		setDefault("root")
+		setDefault("debug")
+		setDefault("num-network-channels")
+		setDefault("network")
+	}()
+
+	c, err := NewFromFlags()
+	if err != nil {
+		t.Fatal(err)
+	}
+	if want := "some-path"; c.RootDir != want {
+		t.Errorf("RootDir=%v, want: %v", c.RootDir, want)
+	}
+	if want := true; c.Debug != want {
+		t.Errorf("Debug=%v, want: %v", c.Debug, want)
+	}
+	if want := 123; c.NumNetworkChannels != want {
+		t.Errorf("NumNetworkChannels=%v, want: %v", c.NumNetworkChannels, want)
+	}
+	if want := NetworkNone; c.Network != want {
+		t.Errorf("Network=%v, want: %v", c.Network, want)
+	}
+}
+
+func TestToFlags(t *testing.T) {
+	c, err := NewFromFlags()
+	if err != nil {
+		t.Fatal(err)
+	}
+	c.RootDir = "some-path"
+	c.Debug = true
+	c.NumNetworkChannels = 123
+	c.Network = NetworkNone
+
+	flags := c.ToFlags()
+	if len(flags) != 4 {
+		t.Errorf("wrong number of flags set, want: 4, got: %d: %s", len(flags), flags)
+	}
+	t.Logf("Flags: %s", flags)
+	fm := map[string]string{}
+	for _, f := range flags {
+		kv := strings.Split(f, "=")
+		fm[kv[0]] = kv[1]
+	}
+	for name, want := range map[string]string{
+		"--root":                 "some-path",
+		"--debug":                "true",
+		"--num-network-channels": "123",
+		"--network":              "none",
+	} {
+		if got, ok := fm[name]; ok {
+			if got != want {
+				t.Errorf("flag %q, want: %q, got: %q", name, want, got)
+			}
+		} else {
+			t.Errorf("flag %q not set", name)
+		}
+	}
+}
+
+// TestInvalidFlags checks that enum flags fail when value is not in enum set.
+func TestInvalidFlags(t *testing.T) {
+	for _, tc := range []struct {
+		name  string
+		error string
+	}{
+		{
+			name:  "file-access",
+			error: "invalid file access type",
+		},
+		{
+			name:  "network",
+			error: "invalid network type",
+		},
+		{
+			name:  "qdisc",
+			error: "invalid qdisc",
+		},
+		{
+			name:  "watchdog-action",
+			error: "invalid watchdog action",
+		},
+		{
+			name:  "ref-leak-mode",
+			error: "invalid ref leak mode",
+		},
+	} {
+		t.Run(tc.name, func(t *testing.T) {
+			defer setDefault(tc.name)
+			if err := flag.CommandLine.Lookup(tc.name).Value.Set("invalid"); err == nil || !strings.Contains(err.Error(), tc.error) {
+				t.Errorf("flag.Value.Set(invalid) wrong error reported: %v", err)
+			}
+		})
+	}
+}
+
+func TestValidationFail(t *testing.T) {
+	for _, tc := range []struct {
+		name  string
+		flags map[string]string
+		error string
+	}{
+		{
+			name: "shared+overlay",
+			flags: map[string]string{
+				"file-access": "shared",
+				"overlay":     "true",
+			},
+			error: "overlay flag is incompatible",
+		},
+		{
+			name: "network-channels",
+			flags: map[string]string{
+				"num-network-channels": "-1",
+			},
+			error: "num_network_channels must be > 0",
+		},
+	} {
+		t.Run(tc.name, func(t *testing.T) {
+			for name, val := range tc.flags {
+				defer setDefault(name)
+				if err := flag.CommandLine.Lookup(name).Value.Set(val); err != nil {
+					t.Errorf("%s=%q: %v", name, val, err)
+				}
+			}
+			if _, err := NewFromFlags(); err == nil || !strings.Contains(err.Error(), tc.error) {
+				t.Errorf("NewFromFlags() wrong error reported: %v", err)
+			}
+		})
+	}
+}
diff --git a/runsc/config/flags.go b/runsc/config/flags.go
new file mode 100644
index 0000000000..488a4b9fb5
--- /dev/null
+++ b/runsc/config/flags.go
@@ -0,0 +1,168 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package config
+
+import (
+	"fmt"
+	"os"
+	"path/filepath"
+	"reflect"
+	"strconv"
+
+	"gvisor.dev/gvisor/pkg/refs"
+	"gvisor.dev/gvisor/pkg/sentry/watchdog"
+	"gvisor.dev/gvisor/pkg/sync"
+	"gvisor.dev/gvisor/runsc/flag"
+)
+
+var registration sync.Once
+
+// This is the set of flags used to populate Config.
+func RegisterFlags() {
+	registration.Do(func() {
+		// Although these flags are not part of the OCI spec, they are used by
+		// Docker, and thus should not be changed.
+		flag.String("root", "", "root directory for storage of container state.")
+		flag.String("log", "", "file path where internal debug information is written, default is stdout.")
+		flag.String("log-format", "text", "log format: text (default), json, or json-k8s.")
+		flag.Bool("debug", false, "enable debug logging.")
+
+		// These flags are unique to runsc, and are used to configure parts of the
+		// system that are not covered by the runtime spec.
+
+		// Debugging flags.
+		flag.String("debug-log", "", "additional location for logs. If it ends with '/', log files are created inside the directory with default names. The following variables are available: %TIMESTAMP%, %COMMAND%.")
+		flag.String("panic-log", "", "file path were panic reports and other Go's runtime messages are written.")
+		flag.Bool("log-packets", false, "enable network packet logging.")
+		flag.String("debug-log-format", "text", "log format: text (default), json, or json-k8s.")
+		flag.Bool("alsologtostderr", false, "send log messages to stderr.")
+
+		// Debugging flags: strace related
+		flag.Bool("strace", false, "enable strace.")
+		flag.String("strace-syscalls", "", "comma-separated list of syscalls to trace. If --strace is true and this list is empty, then all syscalls will be traced.")
+		flag.Uint("strace-log-size", 1024, "default size (in bytes) to log data argument blobs.")
+
+		// Flags that control sandbox runtime behavior.
+		flag.String("platform", "ptrace", "specifies which platform to use: ptrace (default), kvm.")
+		flag.Var(watchdogActionPtr(watchdog.LogWarning), "watchdog-action", "sets what action the watchdog takes when triggered: log (default), panic.")
+		flag.Int("panic-signal", -1, "register signal handling that panics. Usually set to SIGUSR2(12) to troubleshoot hangs. -1 disables it.")
+		flag.Bool("profile", false, "prepares the sandbox to use Golang profiler. Note that enabling profiler loosens the seccomp protection added to the sandbox (DO NOT USE IN PRODUCTION).")
+		flag.Bool("rootless", false, "it allows the sandbox to be started with a user that is not root. Sandbox and Gofer processes may run with same privileges as current user.")
+		flag.Var(leakModePtr(refs.NoLeakChecking), "ref-leak-mode", "sets reference leak check mode: disabled (default), log-names, log-traces.")
+		flag.Bool("cpu-num-from-quota", false, "set cpu number to cpu quota (least integer greater or equal to quota value, but not less than 2)")
+
+		// Flags that control sandbox runtime behavior: FS related.
+		flag.Var(fileAccessTypePtr(FileAccessExclusive), "file-access", "specifies which filesystem to use for the root mount: exclusive (default), shared. Volume mounts are always shared.")
+		flag.Bool("overlay", false, "wrap filesystem mounts with writable overlay. All modifications are stored in memory inside the sandbox.")
+		flag.Bool("overlayfs-stale-read", true, "assume root mount is an overlay filesystem")
+		flag.Bool("fsgofer-host-uds", false, "allow the gofer to mount Unix Domain Sockets.")
+		flag.Bool("vfs2", false, "TEST ONLY; use while VFSv2 is landing. This uses the new experimental VFS layer.")
+		flag.Bool("fuse", false, "TEST ONLY; use while FUSE in VFSv2 is landing. This allows the use of the new experimental FUSE filesystem.")
+
+		// Flags that control sandbox runtime behavior: network related.
+		flag.Var(networkTypePtr(NetworkSandbox), "network", "specifies which network to use: sandbox (default), host, none. Using network inside the sandbox is more secure because it's isolated from the host network.")
+		flag.Bool("net-raw", false, "enable raw sockets. When false, raw sockets are disabled by removing CAP_NET_RAW from containers (`runsc exec` will still be able to utilize raw sockets). Raw sockets allow malicious containers to craft packets and potentially attack the network.")
+		flag.Bool("gso", true, "enable hardware segmentation offload if it is supported by a network device.")
+		flag.Bool("software-gso", true, "enable software segmentation offload when hardware offload can't be enabled.")
+		flag.Bool("tx-checksum-offload", false, "enable TX checksum offload.")
+		flag.Bool("rx-checksum-offload", true, "enable RX checksum offload.")
+		flag.Var(queueingDisciplinePtr(QDiscFIFO), "qdisc", "specifies which queueing discipline to apply by default to the non loopback nics used by the sandbox.")
+		flag.Int("num-network-channels", 1, "number of underlying channels(FDs) to use for network link endpoints.")
+
+		// Test flags, not to be used outside tests, ever.
+		flag.Bool("TESTONLY-unsafe-nonroot", false, "TEST ONLY; do not ever use! This skips many security measures that isolate the host from the sandbox.")
+		flag.String("TESTONLY-test-name-env", "", "TEST ONLY; do not ever use! Used for automated tests to improve logging.")
+	})
+}
+
+// NewFromFlags creates a new Config with values coming from command line flags.
+func NewFromFlags() (*Config, error) {
+	conf := &Config{}
+
+	obj := reflect.ValueOf(conf).Elem()
+	st := obj.Type()
+	for i := 0; i < st.NumField(); i++ {
+		f := st.Field(i)
+		name, ok := f.Tag.Lookup("flag")
+		if !ok {
+			// No flag set for this field.
+			continue
+		}
+		fl := flag.CommandLine.Lookup(name)
+		if fl == nil {
+			panic(fmt.Sprintf("Flag %q not found", name))
+		}
+		x := reflect.ValueOf(flag.Get(fl.Value))
+		obj.Field(i).Set(x)
+	}
+
+	if len(conf.RootDir) == 0 {
+		// If not set, set default root dir to something (hopefully) user-writeable.
+		conf.RootDir = "/var/run/runsc"
+		if runtimeDir := os.Getenv("XDG_RUNTIME_DIR"); runtimeDir != "" {
+			conf.RootDir = filepath.Join(runtimeDir, "runsc")
+		}
+	}
+
+	if err := conf.validate(); err != nil {
+		return nil, err
+	}
+	return conf, nil
+}
+
+// ToFlags returns a slice of flags that correspond to the given Config.
+func (c *Config) ToFlags() []string {
+	var rv []string
+
+	obj := reflect.ValueOf(c).Elem()
+	st := obj.Type()
+	for i := 0; i < st.NumField(); i++ {
+		f := st.Field(i)
+		name, ok := f.Tag.Lookup("flag")
+		if !ok {
+			// No flag set for this field.
+			continue
+		}
+		val := getVal(obj.Field(i))
+
+		flag := flag.CommandLine.Lookup(name)
+		if flag == nil {
+			panic(fmt.Sprintf("Flag %q not found", name))
+		}
+		if val == flag.DefValue {
+			continue
+		}
+		rv = append(rv, fmt.Sprintf("--%s=%s", flag.Name, val))
+	}
+	return rv
+}
+
+func getVal(field reflect.Value) string {
+	if str, ok := field.Addr().Interface().(fmt.Stringer); ok {
+		return str.String()
+	}
+	switch field.Kind() {
+	case reflect.Bool:
+		return strconv.FormatBool(field.Bool())
+	case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
+		return strconv.FormatInt(field.Int(), 10)
+	case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
+		return strconv.FormatUint(field.Uint(), 10)
+	case reflect.String:
+		return field.String()
+	default:
+		panic("unknown type " + field.Kind().String())
+	}
+}
diff --git a/runsc/flag/flag.go b/runsc/flag/flag.go
index 0ca4829d73..ba1ff833f8 100644
--- a/runsc/flag/flag.go
+++ b/runsc/flag/flag.go
@@ -21,13 +21,19 @@ import (
 type FlagSet = flag.FlagSet
 
 var (
-	NewFlagSet  = flag.NewFlagSet
-	String      = flag.String
 	Bool        = flag.Bool
-	Int         = flag.Int
-	Uint        = flag.Uint
 	CommandLine = flag.CommandLine
+	Int         = flag.Int
+	NewFlagSet  = flag.NewFlagSet
 	Parse       = flag.Parse
+	String      = flag.String
+	Uint        = flag.Uint
+	Var         = flag.Var
 )
 
 const ContinueOnError = flag.ContinueOnError
+
+// Get returns the flag's underlying object.
+func Get(v flag.Value) interface{} {
+	return v.(flag.Getter).Get()
+}
diff --git a/runsc/main.go b/runsc/main.go
index c2ffecbdc7..ed244c4bae 100644
--- a/runsc/main.go
+++ b/runsc/main.go
@@ -23,8 +23,6 @@ import (
 	"io/ioutil"
 	"os"
 	"os/signal"
-	"path/filepath"
-	"strings"
 	"syscall"
 	"time"
 
@@ -41,58 +39,17 @@ import (
 var (
 	// Although these flags are not part of the OCI spec, they are used by
 	// Docker, and thus should not be changed.
-	rootDir     = flag.String("root", "", "root directory for storage of container state.")
-	logFilename = flag.String("log", "", "file path where internal debug information is written, default is stdout.")
-	logFormat   = flag.String("log-format", "text", "log format: text (default), json, or json-k8s.")
-	debug       = flag.Bool("debug", false, "enable debug logging.")
-	showVersion = flag.Bool("version", false, "show version and exit.")
 	// TODO(gvisor.dev/issue/193): support systemd cgroups
 	systemdCgroup = flag.Bool("systemd-cgroup", false, "Use systemd for cgroups. NOT SUPPORTED.")
+	showVersion   = flag.Bool("version", false, "show version and exit.")
 
 	// These flags are unique to runsc, and are used to configure parts of the
 	// system that are not covered by the runtime spec.
 
 	// Debugging flags.
-	debugLog        = flag.String("debug-log", "", "additional location for logs. If it ends with '/', log files are created inside the directory with default names. The following variables are available: %TIMESTAMP%, %COMMAND%.")
-	panicLog        = flag.String("panic-log", "", "file path were panic reports and other Go's runtime messages are written.")
-	logPackets      = flag.Bool("log-packets", false, "enable network packet logging.")
-	logFD           = flag.Int("log-fd", -1, "file descriptor to log to.  If set, the 'log' flag is ignored.")
-	debugLogFD      = flag.Int("debug-log-fd", -1, "file descriptor to write debug logs to.  If set, the 'debug-log-dir' flag is ignored.")
-	panicLogFD      = flag.Int("panic-log-fd", -1, "file descriptor to write Go's runtime messages.")
-	debugLogFormat  = flag.String("debug-log-format", "text", "log format: text (default), json, or json-k8s.")
-	alsoLogToStderr = flag.Bool("alsologtostderr", false, "send log messages to stderr.")
-
-	// Debugging flags: strace related
-	strace         = flag.Bool("strace", false, "enable strace.")
-	straceSyscalls = flag.String("strace-syscalls", "", "comma-separated list of syscalls to trace. If --strace is true and this list is empty, then all syscalls will be traced.")
-	straceLogSize  = flag.Uint("strace-log-size", 1024, "default size (in bytes) to log data argument blobs.")
-
-	// Flags that control sandbox runtime behavior.
-	platformName       = flag.String("platform", "ptrace", "specifies which platform to use: ptrace (default), kvm.")
-	network            = flag.String("network", "sandbox", "specifies which network to use: sandbox (default), host, none. Using network inside the sandbox is more secure because it's isolated from the host network.")
-	hardwareGSO        = flag.Bool("gso", true, "enable hardware segmentation offload if it is supported by a network device.")
-	softwareGSO        = flag.Bool("software-gso", true, "enable software segmentation offload when hardware offload can't be enabled.")
-	txChecksumOffload  = flag.Bool("tx-checksum-offload", false, "enable TX checksum offload.")
-	rxChecksumOffload  = flag.Bool("rx-checksum-offload", true, "enable RX checksum offload.")
-	qDisc              = flag.String("qdisc", "fifo", "specifies which queueing discipline to apply by default to the non loopback nics used by the sandbox.")
-	fileAccess         = flag.String("file-access", "exclusive", "specifies which filesystem to use for the root mount: exclusive (default), shared. Volume mounts are always shared.")
-	fsGoferHostUDS     = flag.Bool("fsgofer-host-uds", false, "allow the gofer to mount Unix Domain Sockets.")
-	overlay            = flag.Bool("overlay", false, "wrap filesystem mounts with writable overlay. All modifications are stored in memory inside the sandbox.")
-	overlayfsStaleRead = flag.Bool("overlayfs-stale-read", true, "assume root mount is an overlay filesystem")
-	watchdogAction     = flag.String("watchdog-action", "log", "sets what action the watchdog takes when triggered: log (default), panic.")
-	panicSignal        = flag.Int("panic-signal", -1, "register signal handling that panics. Usually set to SIGUSR2(12) to troubleshoot hangs. -1 disables it.")
-	profile            = flag.Bool("profile", false, "prepares the sandbox to use Golang profiler. Note that enabling profiler loosens the seccomp protection added to the sandbox (DO NOT USE IN PRODUCTION).")
-	netRaw             = flag.Bool("net-raw", false, "enable raw sockets. When false, raw sockets are disabled by removing CAP_NET_RAW from containers (`runsc exec` will still be able to utilize raw sockets). Raw sockets allow malicious containers to craft packets and potentially attack the network.")
-	numNetworkChannels = flag.Int("num-network-channels", 1, "number of underlying channels(FDs) to use for network link endpoints.")
-	rootless           = flag.Bool("rootless", false, "it allows the sandbox to be started with a user that is not root. Sandbox and Gofer processes may run with same privileges as current user.")
-	referenceLeakMode  = flag.String("ref-leak-mode", "disabled", "sets reference leak check mode: disabled (default), log-names, log-traces.")
-	cpuNumFromQuota    = flag.Bool("cpu-num-from-quota", false, "set cpu number to cpu quota (least integer greater or equal to quota value, but not less than 2)")
-	vfs2Enabled        = flag.Bool("vfs2", false, "TEST ONLY; use while VFSv2 is landing. This uses the new experimental VFS layer.")
-	fuseEnabled        = flag.Bool("fuse", false, "TEST ONLY; use while FUSE in VFSv2 is landing. This allows the use of the new experimental FUSE filesystem.")
-
-	// Test flags, not to be used outside tests, ever.
-	testOnlyAllowRunAsCurrentUserWithoutChroot = flag.Bool("TESTONLY-unsafe-nonroot", false, "TEST ONLY; do not ever use! This skips many security measures that isolate the host from the sandbox.")
-	testOnlyTestNameEnv                        = flag.String("TESTONLY-test-name-env", "", "TEST ONLY; do not ever use! Used for automated tests to improve logging.")
+	logFD      = flag.Int("log-fd", -1, "file descriptor to log to.  If set, the 'log' flag is ignored.")
+	debugLogFD = flag.Int("debug-log-fd", -1, "file descriptor to write debug logs to.  If set, the 'debug-log-dir' flag is ignored.")
+	panicLogFD = flag.Int("panic-log-fd", -1, "file descriptor to write Go's runtime messages.")
 )
 
 func main() {
@@ -136,6 +93,8 @@ func main() {
 	subcommands.Register(new(cmd.Gofer), internalGroup)
 	subcommands.Register(new(cmd.Statefile), internalGroup)
 
+	config.RegisterFlags()
+
 	// All subcommands must be registered before flag parsing.
 	flag.Parse()
 
@@ -147,6 +106,12 @@ func main() {
 		os.Exit(0)
 	}
 
+	// Create a new Config from the flags.
+	conf, err := config.NewFromFlags()
+	if err != nil {
+		cmd.Fatalf(err.Error())
+	}
+
 	// TODO(gvisor.dev/issue/193): support systemd cgroups
 	if *systemdCgroup {
 		fmt.Fprintln(os.Stderr, "systemd cgroup flag passed, but systemd cgroups not supported. See gvisor.dev/issue/193")
@@ -157,103 +122,28 @@ func main() {
 	if *logFD > -1 {
 		errorLogger = os.NewFile(uintptr(*logFD), "error log file")
 
-	} else if *logFilename != "" {
+	} else if conf.LogFilename != "" {
 		// We must set O_APPEND and not O_TRUNC because Docker passes
 		// the same log file for all commands (and also parses these
 		// log files), so we can't destroy them on each command.
 		var err error
-		errorLogger, err = os.OpenFile(*logFilename, os.O_WRONLY|os.O_CREATE|os.O_APPEND, 0644)
+		errorLogger, err = os.OpenFile(conf.LogFilename, os.O_WRONLY|os.O_CREATE|os.O_APPEND, 0644)
 		if err != nil {
-			cmd.Fatalf("error opening log file %q: %v", *logFilename, err)
+			cmd.Fatalf("error opening log file %q: %v", conf.LogFilename, err)
 		}
 	}
 	cmd.ErrorLogger = errorLogger
 
-	platformType := *platformName
-	if _, err := platform.Lookup(platformType); err != nil {
-		cmd.Fatalf("%v", err)
-	}
-
-	fsAccess, err := config.MakeFileAccessType(*fileAccess)
-	if err != nil {
-		cmd.Fatalf("%v", err)
-	}
-
-	if fsAccess == config.FileAccessShared && *overlay {
-		cmd.Fatalf("overlay flag is incompatible with shared file access")
-	}
-
-	netType, err := config.MakeNetworkType(*network)
-	if err != nil {
+	if _, err := platform.Lookup(conf.Platform); err != nil {
 		cmd.Fatalf("%v", err)
 	}
 
-	wa, err := config.MakeWatchdogAction(*watchdogAction)
-	if err != nil {
-		cmd.Fatalf("%v", err)
-	}
-
-	if *numNetworkChannels <= 0 {
-		cmd.Fatalf("num_network_channels must be > 0, got: %d", *numNetworkChannels)
-	}
-
-	refsLeakMode, err := config.MakeRefsLeakMode(*referenceLeakMode)
-	if err != nil {
-		cmd.Fatalf("%v", err)
-	}
-
-	queueingDiscipline, err := config.MakeQueueingDiscipline(*qDisc)
-	if err != nil {
-		cmd.Fatalf("%s", err)
-	}
-
 	// Sets the reference leak check mode. Also set it in config below to
 	// propagate it to child processes.
-	refs.SetLeakMode(refsLeakMode)
-
-	// Create a new Config from the flags.
-	conf := &config.Config{
-		RootDir:            *rootDir,
-		Debug:              *debug,
-		LogFilename:        *logFilename,
-		LogFormat:          *logFormat,
-		DebugLog:           *debugLog,
-		PanicLog:           *panicLog,
-		DebugLogFormat:     *debugLogFormat,
-		FileAccess:         fsAccess,
-		FSGoferHostUDS:     *fsGoferHostUDS,
-		Overlay:            *overlay,
-		Network:            netType,
-		HardwareGSO:        *hardwareGSO,
-		SoftwareGSO:        *softwareGSO,
-		TXChecksumOffload:  *txChecksumOffload,
-		RXChecksumOffload:  *rxChecksumOffload,
-		LogPackets:         *logPackets,
-		Platform:           platformType,
-		Strace:             *strace,
-		StraceLogSize:      *straceLogSize,
-		WatchdogAction:     wa,
-		PanicSignal:        *panicSignal,
-		ProfileEnable:      *profile,
-		EnableRaw:          *netRaw,
-		NumNetworkChannels: *numNetworkChannels,
-		Rootless:           *rootless,
-		AlsoLogToStderr:    *alsoLogToStderr,
-		ReferenceLeakMode:  refsLeakMode,
-		OverlayfsStaleRead: *overlayfsStaleRead,
-		CPUNumFromQuota:    *cpuNumFromQuota,
-		VFS2:               *vfs2Enabled,
-		FUSE:               *fuseEnabled,
-		QDisc:              queueingDiscipline,
-		TestOnlyAllowRunAsCurrentUserWithoutChroot: *testOnlyAllowRunAsCurrentUserWithoutChroot,
-		TestOnlyTestNameEnv:                        *testOnlyTestNameEnv,
-	}
-	if len(*straceSyscalls) != 0 {
-		conf.StraceSyscalls = strings.Split(*straceSyscalls, ",")
-	}
+	refs.SetLeakMode(conf.ReferenceLeak)
 
 	// Set up logging.
-	if *debug {
+	if conf.Debug {
 		log.SetLevel(log.Debug)
 	}
 
@@ -275,14 +165,14 @@ func main() {
 	if *debugLogFD > -1 {
 		f := os.NewFile(uintptr(*debugLogFD), "debug log file")
 
-		e = newEmitter(*debugLogFormat, f)
+		e = newEmitter(conf.DebugLogFormat, f)
 
-	} else if *debugLog != "" {
-		f, err := specutils.DebugLogFile(*debugLog, subcommand, "" /* name */)
+	} else if conf.DebugLog != "" {
+		f, err := specutils.DebugLogFile(conf.DebugLog, subcommand, "" /* name */)
 		if err != nil {
-			cmd.Fatalf("error opening debug log file in %q: %v", *debugLog, err)
+			cmd.Fatalf("error opening debug log file in %q: %v", conf.DebugLog, err)
 		}
-		e = newEmitter(*debugLogFormat, f)
+		e = newEmitter(conf.DebugLogFormat, f)
 
 	} else {
 		// Stderr is reserved for the application, just discard the logs if no debug
@@ -308,8 +198,8 @@ func main() {
 		if err := syscall.Dup3(fd, int(os.Stderr.Fd()), 0); err != nil {
 			cmd.Fatalf("error dup'ing fd %d to stderr: %v", fd, err)
 		}
-	} else if *alsoLogToStderr {
-		e = &log.MultiEmitter{e, newEmitter(*debugLogFormat, os.Stderr)}
+	} else if conf.AlsoLogToStderr {
+		e = &log.MultiEmitter{e, newEmitter(conf.DebugLogFormat, os.Stderr)}
 	}
 
 	log.SetTarget(e)
@@ -328,7 +218,7 @@ func main() {
 	log.Infof("\t\tVFS2 enabled: %v", conf.VFS2)
 	log.Infof("***************************")
 
-	if *testOnlyAllowRunAsCurrentUserWithoutChroot {
+	if conf.TestOnlyAllowRunAsCurrentUserWithoutChroot {
 		// SIGTERM is sent to all processes if a test exceeds its
 		// timeout and this case is handled by syscall_test_runner.
 		log.Warningf("Block the TERM signal. This is only safe in tests!")
@@ -364,11 +254,3 @@ func newEmitter(format string, logFile io.Writer) log.Emitter {
 	cmd.Fatalf("invalid log format %q, must be 'text', 'json', or 'json-k8s'", format)
 	panic("unreachable")
 }
-
-func init() {
-	// Set default root dir to something (hopefully) user-writeable.
-	*rootDir = "/var/run/runsc"
-	if runtimeDir := os.Getenv("XDG_RUNTIME_DIR"); runtimeDir != "" {
-		*rootDir = filepath.Join(runtimeDir, "runsc")
-	}
-}
diff --git a/runsc/sandbox/network.go b/runsc/sandbox/network.go
index f9abb2d441..0b9f394669 100644
--- a/runsc/sandbox/network.go
+++ b/runsc/sandbox/network.go
@@ -69,7 +69,7 @@ func setupNetwork(conn *urpc.Client, pid int, spec *specs.Spec, conf *config.Con
 	case config.NetworkHost:
 		// Nothing to do here.
 	default:
-		return fmt.Errorf("invalid network type: %d", conf.Network)
+		return fmt.Errorf("invalid network type: %v", conf.Network)
 	}
 	return nil
 }

From 5588def369a09b4525842b04a43fbf146e662311 Mon Sep 17 00:00:00 2001
From: Zach Koopmans <zkoopmans@google.com>
Date: Thu, 27 Aug 2020 09:50:53 -0700
Subject: [PATCH 095/211] Fix JobControl tests for open source.

ioctl calls with TIOCSCTTY fail if the calling process already has a
controlling terminal, which occurs on a 5.4 kernel like our Ubuntu 18 CI.
Thus, run tests calling ioctl TTOCSCTTY in clean subprocess.

Also, while we're here, switch out non-inclusive master/slave for main/replica.

PiperOrigin-RevId: 328756598
---
 test/syscalls/linux/pty.cc      | 758 +++++++++++++++++---------------
 test/syscalls/linux/pty_root.cc |  12 +-
 test/util/pty_util.cc           |  10 +-
 test/util/pty_util.h            |   8 +-
 4 files changed, 417 insertions(+), 371 deletions(-)

diff --git a/test/syscalls/linux/pty.cc b/test/syscalls/linux/pty.cc
index f9392b9e03..2e4ab6ca86 100644
--- a/test/syscalls/linux/pty.cc
+++ b/test/syscalls/linux/pty.cc
@@ -51,6 +51,7 @@ using ::testing::AnyOf;
 using ::testing::Contains;
 using ::testing::Eq;
 using ::testing::Not;
+using SubprocessCallback = std::function<void()>;
 
 // Tests Unix98 pseudoterminals.
 //
@@ -70,7 +71,7 @@ constexpr absl::Duration kTimeout = absl::Seconds(20);
 // The maximum line size in bytes returned per read from a pty file.
 constexpr int kMaxLineSize = 4096;
 
-constexpr char kMasterPath[] = "/dev/ptmx";
+constexpr char kMainPath[] = "/dev/ptmx";
 
 // glibc defines its own, different, version of struct termios. We care about
 // what the kernel does, not glibc.
@@ -387,22 +388,22 @@ PosixErrorOr<size_t> PollAndReadFd(int fd, void* buf, size_t count,
 TEST(PtyTrunc, Truncate) {
   // Opening PTYs with O_TRUNC shouldn't cause an error, but calls to
   // (f)truncate should.
-  FileDescriptor master =
-      ASSERT_NO_ERRNO_AND_VALUE(Open(kMasterPath, O_RDWR | O_TRUNC));
-  int n = ASSERT_NO_ERRNO_AND_VALUE(SlaveID(master));
+  FileDescriptor main =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(kMainPath, O_RDWR | O_TRUNC));
+  int n = ASSERT_NO_ERRNO_AND_VALUE(ReplicaID(main));
   std::string spath = absl::StrCat("/dev/pts/", n);
-  FileDescriptor slave =
+  FileDescriptor replica =
       ASSERT_NO_ERRNO_AND_VALUE(Open(spath, O_RDWR | O_NONBLOCK | O_TRUNC));
 
-  EXPECT_THAT(truncate(kMasterPath, 0), SyscallFailsWithErrno(EINVAL));
+  EXPECT_THAT(truncate(kMainPath, 0), SyscallFailsWithErrno(EINVAL));
   EXPECT_THAT(truncate(spath.c_str(), 0), SyscallFailsWithErrno(EINVAL));
-  EXPECT_THAT(ftruncate(master.get(), 0), SyscallFailsWithErrno(EINVAL));
-  EXPECT_THAT(ftruncate(slave.get(), 0), SyscallFailsWithErrno(EINVAL));
+  EXPECT_THAT(ftruncate(main.get(), 0), SyscallFailsWithErrno(EINVAL));
+  EXPECT_THAT(ftruncate(replica.get(), 0), SyscallFailsWithErrno(EINVAL));
 }
 
-TEST(BasicPtyTest, StatUnopenedMaster) {
+TEST(BasicPtyTest, StatUnopenedMain) {
   struct stat s;
-  ASSERT_THAT(stat(kMasterPath, &s), SyscallSucceeds());
+  ASSERT_THAT(stat(kMainPath, &s), SyscallSucceeds());
 
   EXPECT_EQ(s.st_rdev, makedev(TTYAUX_MAJOR, kPtmxMinor));
   EXPECT_EQ(s.st_size, 0);
@@ -453,41 +454,41 @@ void ExpectReadable(const FileDescriptor& fd, int expected, char* buf) {
   EXPECT_EQ(expected, n);
 }
 
-TEST(BasicPtyTest, OpenMasterSlave) {
-  FileDescriptor master = ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/ptmx", O_RDWR));
-  FileDescriptor slave = ASSERT_NO_ERRNO_AND_VALUE(OpenSlave(master));
+TEST(BasicPtyTest, OpenMainReplica) {
+  FileDescriptor main = ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/ptmx", O_RDWR));
+  FileDescriptor replica = ASSERT_NO_ERRNO_AND_VALUE(OpenReplica(main));
 }
 
-// The slave entry in /dev/pts/ disappears when the master is closed, even if
-// the slave is still open.
-TEST(BasicPtyTest, SlaveEntryGoneAfterMasterClose) {
-  FileDescriptor master = ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/ptmx", O_RDWR));
-  FileDescriptor slave = ASSERT_NO_ERRNO_AND_VALUE(OpenSlave(master));
+// The replica entry in /dev/pts/ disappears when the main is closed, even if
+// the replica is still open.
+TEST(BasicPtyTest, ReplicaEntryGoneAfterMainClose) {
+  FileDescriptor main = ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/ptmx", O_RDWR));
+  FileDescriptor replica = ASSERT_NO_ERRNO_AND_VALUE(OpenReplica(main));
 
   // Get pty index.
   int index = -1;
-  ASSERT_THAT(ioctl(master.get(), TIOCGPTN, &index), SyscallSucceeds());
+  ASSERT_THAT(ioctl(main.get(), TIOCGPTN, &index), SyscallSucceeds());
 
   std::string path = absl::StrCat("/dev/pts/", index);
 
   struct stat st;
   EXPECT_THAT(stat(path.c_str(), &st), SyscallSucceeds());
 
-  master.reset();
+  main.reset();
 
   EXPECT_THAT(stat(path.c_str(), &st), SyscallFailsWithErrno(ENOENT));
 }
 
 TEST(BasicPtyTest, Getdents) {
-  FileDescriptor master1 = ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/ptmx", O_RDWR));
+  FileDescriptor main1 = ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/ptmx", O_RDWR));
   int index1 = -1;
-  ASSERT_THAT(ioctl(master1.get(), TIOCGPTN, &index1), SyscallSucceeds());
-  FileDescriptor slave1 = ASSERT_NO_ERRNO_AND_VALUE(OpenSlave(master1));
+  ASSERT_THAT(ioctl(main1.get(), TIOCGPTN, &index1), SyscallSucceeds());
+  FileDescriptor replica1 = ASSERT_NO_ERRNO_AND_VALUE(OpenReplica(main1));
 
-  FileDescriptor master2 = ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/ptmx", O_RDWR));
+  FileDescriptor main2 = ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/ptmx", O_RDWR));
   int index2 = -1;
-  ASSERT_THAT(ioctl(master2.get(), TIOCGPTN, &index2), SyscallSucceeds());
-  FileDescriptor slave2 = ASSERT_NO_ERRNO_AND_VALUE(OpenSlave(master2));
+  ASSERT_THAT(ioctl(main2.get(), TIOCGPTN, &index2), SyscallSucceeds());
+  FileDescriptor replica2 = ASSERT_NO_ERRNO_AND_VALUE(OpenReplica(main2));
 
   // The directory contains ptmx, index1, and index2. (Plus any additional PTYs
   // unrelated to this test.)
@@ -497,9 +498,9 @@ TEST(BasicPtyTest, Getdents) {
   EXPECT_THAT(contents, Contains(absl::StrCat(index1)));
   EXPECT_THAT(contents, Contains(absl::StrCat(index2)));
 
-  master2.reset();
+  main2.reset();
 
-  // The directory contains ptmx and index1, but not index2 since the master is
+  // The directory contains ptmx and index1, but not index2 since the main is
   // closed. (Plus any additional PTYs unrelated to this test.)
 
   contents = ASSERT_NO_ERRNO_AND_VALUE(ListDir("/dev/pts/", true));
@@ -518,104 +519,105 @@ TEST(BasicPtyTest, Getdents) {
 class PtyTest : public ::testing::Test {
  protected:
   void SetUp() override {
-    master_ = ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/ptmx", O_RDWR | O_NONBLOCK));
-    slave_ = ASSERT_NO_ERRNO_AND_VALUE(OpenSlave(master_));
+    main_ = ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/ptmx", O_RDWR | O_NONBLOCK));
+    replica_ = ASSERT_NO_ERRNO_AND_VALUE(OpenReplica(main_));
   }
 
   void DisableCanonical() {
     struct kernel_termios t = {};
-    EXPECT_THAT(ioctl(slave_.get(), TCGETS, &t), SyscallSucceeds());
+    EXPECT_THAT(ioctl(replica_.get(), TCGETS, &t), SyscallSucceeds());
     t.c_lflag &= ~ICANON;
-    EXPECT_THAT(ioctl(slave_.get(), TCSETS, &t), SyscallSucceeds());
+    EXPECT_THAT(ioctl(replica_.get(), TCSETS, &t), SyscallSucceeds());
   }
 
   void EnableCanonical() {
     struct kernel_termios t = {};
-    EXPECT_THAT(ioctl(slave_.get(), TCGETS, &t), SyscallSucceeds());
+    EXPECT_THAT(ioctl(replica_.get(), TCGETS, &t), SyscallSucceeds());
     t.c_lflag |= ICANON;
-    EXPECT_THAT(ioctl(slave_.get(), TCSETS, &t), SyscallSucceeds());
+    EXPECT_THAT(ioctl(replica_.get(), TCSETS, &t), SyscallSucceeds());
   }
 
-  // Master and slave ends of the PTY. Non-blocking.
-  FileDescriptor master_;
-  FileDescriptor slave_;
+  // Main and replica ends of the PTY. Non-blocking.
+  FileDescriptor main_;
+  FileDescriptor replica_;
 };
 
-// Master to slave sanity test.
-TEST_F(PtyTest, WriteMasterToSlave) {
-  // N.B. by default, the slave reads nothing until the master writes a newline.
+// Main to replica sanity test.
+TEST_F(PtyTest, WriteMainToReplica) {
+  // N.B. by default, the replica reads nothing until the main writes a newline.
   constexpr char kBuf[] = "hello\n";
 
-  EXPECT_THAT(WriteFd(master_.get(), kBuf, sizeof(kBuf) - 1),
+  EXPECT_THAT(WriteFd(main_.get(), kBuf, sizeof(kBuf) - 1),
               SyscallSucceedsWithValue(sizeof(kBuf) - 1));
 
-  // Linux moves data from the master to the slave via async work scheduled via
+  // Linux moves data from the main to the replica via async work scheduled via
   // tty_flip_buffer_push. Since it is asynchronous, the data may not be
   // available for reading immediately. Instead we must poll and assert that it
   // becomes available "soon".
 
   char buf[sizeof(kBuf)] = {};
-  ExpectReadable(slave_, sizeof(buf) - 1, buf);
+  ExpectReadable(replica_, sizeof(buf) - 1, buf);
 
   EXPECT_EQ(memcmp(buf, kBuf, sizeof(kBuf)), 0);
 }
 
-// Slave to master sanity test.
-TEST_F(PtyTest, WriteSlaveToMaster) {
-  // N.B. by default, the master reads nothing until the slave writes a newline,
-  // and the master gets a carriage return.
+// Replica to main sanity test.
+TEST_F(PtyTest, WriteReplicaToMain) {
+  // N.B. by default, the main reads nothing until the replica writes a newline,
+  // and the main gets a carriage return.
   constexpr char kInput[] = "hello\n";
   constexpr char kExpected[] = "hello\r\n";
 
-  EXPECT_THAT(WriteFd(slave_.get(), kInput, sizeof(kInput) - 1),
+  EXPECT_THAT(WriteFd(replica_.get(), kInput, sizeof(kInput) - 1),
               SyscallSucceedsWithValue(sizeof(kInput) - 1));
 
-  // Linux moves data from the master to the slave via async work scheduled via
+  // Linux moves data from the main to the replica via async work scheduled via
   // tty_flip_buffer_push. Since it is asynchronous, the data may not be
   // available for reading immediately. Instead we must poll and assert that it
   // becomes available "soon".
 
   char buf[sizeof(kExpected)] = {};
-  ExpectReadable(master_, sizeof(buf) - 1, buf);
+  ExpectReadable(main_, sizeof(buf) - 1, buf);
 
   EXPECT_EQ(memcmp(buf, kExpected, sizeof(kExpected)), 0);
 }
 
 TEST_F(PtyTest, WriteInvalidUTF8) {
   char c = 0xff;
-  ASSERT_THAT(syscall(__NR_write, master_.get(), &c, sizeof(c)),
+  ASSERT_THAT(syscall(__NR_write, main_.get(), &c, sizeof(c)),
               SyscallSucceedsWithValue(sizeof(c)));
 }
 
-// Both the master and slave report the standard default termios settings.
+// Both the main and replica report the standard default termios settings.
 //
-// Note that TCGETS on the master actually redirects to the slave (see comment
-// on MasterTermiosUnchangable).
+// Note that TCGETS on the main actually redirects to the replica (see comment
+// on MainTermiosUnchangable).
 TEST_F(PtyTest, DefaultTermios) {
   struct kernel_termios t = {};
-  EXPECT_THAT(ioctl(slave_.get(), TCGETS, &t), SyscallSucceeds());
+  EXPECT_THAT(ioctl(replica_.get(), TCGETS, &t), SyscallSucceeds());
   EXPECT_EQ(t, DefaultTermios());
 
-  EXPECT_THAT(ioctl(master_.get(), TCGETS, &t), SyscallSucceeds());
+  EXPECT_THAT(ioctl(main_.get(), TCGETS, &t), SyscallSucceeds());
   EXPECT_EQ(t, DefaultTermios());
 }
 
-// Changing termios from the master actually affects the slave.
+// Changing termios from the main actually affects the replica.
 //
-// TCSETS on the master actually redirects to the slave (see comment on
-// MasterTermiosUnchangable).
-TEST_F(PtyTest, TermiosAffectsSlave) {
-  struct kernel_termios master_termios = {};
-  EXPECT_THAT(ioctl(master_.get(), TCGETS, &master_termios), SyscallSucceeds());
-  master_termios.c_lflag ^= ICANON;
-  EXPECT_THAT(ioctl(master_.get(), TCSETS, &master_termios), SyscallSucceeds());
-
-  struct kernel_termios slave_termios = {};
-  EXPECT_THAT(ioctl(slave_.get(), TCGETS, &slave_termios), SyscallSucceeds());
-  EXPECT_EQ(master_termios, slave_termios);
+// TCSETS on the main actually redirects to the replica (see comment on
+// MainTermiosUnchangable).
+TEST_F(PtyTest, TermiosAffectsReplica) {
+  struct kernel_termios main_termios = {};
+  EXPECT_THAT(ioctl(main_.get(), TCGETS, &main_termios), SyscallSucceeds());
+  main_termios.c_lflag ^= ICANON;
+  EXPECT_THAT(ioctl(main_.get(), TCSETS, &main_termios), SyscallSucceeds());
+
+  struct kernel_termios replica_termios = {};
+  EXPECT_THAT(ioctl(replica_.get(), TCGETS, &replica_termios),
+              SyscallSucceeds());
+  EXPECT_EQ(main_termios, replica_termios);
 }
 
-// The master end of the pty has termios:
+// The main end of the pty has termios:
 //
 // struct kernel_termios t = {
 //   .c_iflag = 0;
@@ -627,25 +629,25 @@ TEST_F(PtyTest, TermiosAffectsSlave) {
 //
 // (From drivers/tty/pty.c:unix98_pty_init)
 //
-// All termios control ioctls on the master actually redirect to the slave
+// All termios control ioctls on the main actually redirect to the replica
 // (drivers/tty/tty_ioctl.c:tty_mode_ioctl), making it impossible to change the
-// master termios.
+// main termios.
 //
 // Verify this by setting ICRNL (which rewrites input \r to \n) and verify that
-// it has no effect on the master.
-TEST_F(PtyTest, MasterTermiosUnchangable) {
-  struct kernel_termios master_termios = {};
-  EXPECT_THAT(ioctl(master_.get(), TCGETS, &master_termios), SyscallSucceeds());
-  master_termios.c_lflag |= ICRNL;
-  EXPECT_THAT(ioctl(master_.get(), TCSETS, &master_termios), SyscallSucceeds());
+// it has no effect on the main.
+TEST_F(PtyTest, MainTermiosUnchangable) {
+  struct kernel_termios main_termios = {};
+  EXPECT_THAT(ioctl(main_.get(), TCGETS, &main_termios), SyscallSucceeds());
+  main_termios.c_lflag |= ICRNL;
+  EXPECT_THAT(ioctl(main_.get(), TCSETS, &main_termios), SyscallSucceeds());
 
   char c = '\r';
-  ASSERT_THAT(WriteFd(slave_.get(), &c, 1), SyscallSucceedsWithValue(1));
+  ASSERT_THAT(WriteFd(replica_.get(), &c, 1), SyscallSucceedsWithValue(1));
 
-  ExpectReadable(master_, 1, &c);
+  ExpectReadable(main_, 1, &c);
   EXPECT_EQ(c, '\r');  // ICRNL had no effect!
 
-  ExpectFinished(master_);
+  ExpectFinished(main_);
 }
 
 // ICRNL rewrites input \r to \n.
@@ -653,15 +655,15 @@ TEST_F(PtyTest, TermiosICRNL) {
   struct kernel_termios t = DefaultTermios();
   t.c_iflag |= ICRNL;
   t.c_lflag &= ~ICANON;  // for byte-by-byte reading.
-  ASSERT_THAT(ioctl(slave_.get(), TCSETS, &t), SyscallSucceeds());
+  ASSERT_THAT(ioctl(replica_.get(), TCSETS, &t), SyscallSucceeds());
 
   char c = '\r';
-  ASSERT_THAT(WriteFd(master_.get(), &c, 1), SyscallSucceedsWithValue(1));
+  ASSERT_THAT(WriteFd(main_.get(), &c, 1), SyscallSucceedsWithValue(1));
 
-  ExpectReadable(slave_, 1, &c);
+  ExpectReadable(replica_, 1, &c);
   EXPECT_EQ(c, '\n');
 
-  ExpectFinished(slave_);
+  ExpectFinished(replica_);
 }
 
 // ONLCR rewrites output \n to \r\n.
@@ -669,42 +671,42 @@ TEST_F(PtyTest, TermiosONLCR) {
   struct kernel_termios t = DefaultTermios();
   t.c_oflag |= ONLCR;
   t.c_lflag &= ~ICANON;  // for byte-by-byte reading.
-  ASSERT_THAT(ioctl(slave_.get(), TCSETS, &t), SyscallSucceeds());
+  ASSERT_THAT(ioctl(replica_.get(), TCSETS, &t), SyscallSucceeds());
 
   char c = '\n';
-  ASSERT_THAT(WriteFd(slave_.get(), &c, 1), SyscallSucceedsWithValue(1));
+  ASSERT_THAT(WriteFd(replica_.get(), &c, 1), SyscallSucceedsWithValue(1));
 
   // Extra byte for NUL for EXPECT_STREQ.
   char buf[3] = {};
-  ExpectReadable(master_, 2, buf);
+  ExpectReadable(main_, 2, buf);
   EXPECT_STREQ(buf, "\r\n");
 
-  ExpectFinished(slave_);
+  ExpectFinished(replica_);
 }
 
 TEST_F(PtyTest, TermiosIGNCR) {
   struct kernel_termios t = DefaultTermios();
   t.c_iflag |= IGNCR;
   t.c_lflag &= ~ICANON;  // for byte-by-byte reading.
-  ASSERT_THAT(ioctl(slave_.get(), TCSETS, &t), SyscallSucceeds());
+  ASSERT_THAT(ioctl(replica_.get(), TCSETS, &t), SyscallSucceeds());
 
   char c = '\r';
-  ASSERT_THAT(WriteFd(master_.get(), &c, 1), SyscallSucceedsWithValue(1));
+  ASSERT_THAT(WriteFd(main_.get(), &c, 1), SyscallSucceedsWithValue(1));
 
   // Nothing to read.
-  ASSERT_THAT(PollAndReadFd(slave_.get(), &c, 1, kTimeout),
+  ASSERT_THAT(PollAndReadFd(replica_.get(), &c, 1, kTimeout),
               PosixErrorIs(ETIMEDOUT, ::testing::StrEq("Poll timed out")));
 }
 
-// Test that we can successfully poll for readable data from the slave.
-TEST_F(PtyTest, TermiosPollSlave) {
+// Test that we can successfully poll for readable data from the replica.
+TEST_F(PtyTest, TermiosPollReplica) {
   struct kernel_termios t = DefaultTermios();
   t.c_iflag |= IGNCR;
   t.c_lflag &= ~ICANON;  // for byte-by-byte reading.
-  ASSERT_THAT(ioctl(slave_.get(), TCSETS, &t), SyscallSucceeds());
+  ASSERT_THAT(ioctl(replica_.get(), TCSETS, &t), SyscallSucceeds());
 
   absl::Notification notify;
-  int sfd = slave_.get();
+  int sfd = replica_.get();
   ScopedThread th([sfd, &notify]() {
     notify.Notify();
 
@@ -723,18 +725,18 @@ TEST_F(PtyTest, TermiosPollSlave) {
   absl::SleepFor(absl::Seconds(1));
 
   char s[] = "foo\n";
-  ASSERT_THAT(WriteFd(master_.get(), s, strlen(s) + 1), SyscallSucceeds());
+  ASSERT_THAT(WriteFd(main_.get(), s, strlen(s) + 1), SyscallSucceeds());
 }
 
-// Test that we can successfully poll for readable data from the master.
-TEST_F(PtyTest, TermiosPollMaster) {
+// Test that we can successfully poll for readable data from the main.
+TEST_F(PtyTest, TermiosPollMain) {
   struct kernel_termios t = DefaultTermios();
   t.c_iflag |= IGNCR;
   t.c_lflag &= ~ICANON;  // for byte-by-byte reading.
-  ASSERT_THAT(ioctl(master_.get(), TCSETS, &t), SyscallSucceeds());
+  ASSERT_THAT(ioctl(main_.get(), TCSETS, &t), SyscallSucceeds());
 
   absl::Notification notify;
-  int mfd = master_.get();
+  int mfd = main_.get();
   ScopedThread th([mfd, &notify]() {
     notify.Notify();
 
@@ -753,57 +755,57 @@ TEST_F(PtyTest, TermiosPollMaster) {
   absl::SleepFor(absl::Seconds(1));
 
   char s[] = "foo\n";
-  ASSERT_THAT(WriteFd(slave_.get(), s, strlen(s) + 1), SyscallSucceeds());
+  ASSERT_THAT(WriteFd(replica_.get(), s, strlen(s) + 1), SyscallSucceeds());
 }
 
 TEST_F(PtyTest, TermiosINLCR) {
   struct kernel_termios t = DefaultTermios();
   t.c_iflag |= INLCR;
   t.c_lflag &= ~ICANON;  // for byte-by-byte reading.
-  ASSERT_THAT(ioctl(slave_.get(), TCSETS, &t), SyscallSucceeds());
+  ASSERT_THAT(ioctl(replica_.get(), TCSETS, &t), SyscallSucceeds());
 
   char c = '\n';
-  ASSERT_THAT(WriteFd(master_.get(), &c, 1), SyscallSucceedsWithValue(1));
+  ASSERT_THAT(WriteFd(main_.get(), &c, 1), SyscallSucceedsWithValue(1));
 
-  ExpectReadable(slave_, 1, &c);
+  ExpectReadable(replica_, 1, &c);
   EXPECT_EQ(c, '\r');
 
-  ExpectFinished(slave_);
+  ExpectFinished(replica_);
 }
 
 TEST_F(PtyTest, TermiosONOCR) {
   struct kernel_termios t = DefaultTermios();
   t.c_oflag |= ONOCR;
   t.c_lflag &= ~ICANON;  // for byte-by-byte reading.
-  ASSERT_THAT(ioctl(slave_.get(), TCSETS, &t), SyscallSucceeds());
+  ASSERT_THAT(ioctl(replica_.get(), TCSETS, &t), SyscallSucceeds());
 
   // The terminal is at column 0, so there should be no CR to read.
   char c = '\r';
-  ASSERT_THAT(WriteFd(slave_.get(), &c, 1), SyscallSucceedsWithValue(1));
+  ASSERT_THAT(WriteFd(replica_.get(), &c, 1), SyscallSucceedsWithValue(1));
 
   // Nothing to read.
-  ASSERT_THAT(PollAndReadFd(master_.get(), &c, 1, kTimeout),
+  ASSERT_THAT(PollAndReadFd(main_.get(), &c, 1, kTimeout),
               PosixErrorIs(ETIMEDOUT, ::testing::StrEq("Poll timed out")));
 
   // This time the column is greater than 0, so we should be able to read the CR
   // out of the other end.
   constexpr char kInput[] = "foo\r";
   constexpr int kInputSize = sizeof(kInput) - 1;
-  ASSERT_THAT(WriteFd(slave_.get(), kInput, kInputSize),
+  ASSERT_THAT(WriteFd(replica_.get(), kInput, kInputSize),
               SyscallSucceedsWithValue(kInputSize));
 
   char buf[kInputSize] = {};
-  ExpectReadable(master_, kInputSize, buf);
+  ExpectReadable(main_, kInputSize, buf);
 
   EXPECT_EQ(memcmp(buf, kInput, kInputSize), 0);
 
-  ExpectFinished(master_);
+  ExpectFinished(main_);
 
   // Terminal should be at column 0 again, so no CR can be read.
-  ASSERT_THAT(WriteFd(slave_.get(), &c, 1), SyscallSucceedsWithValue(1));
+  ASSERT_THAT(WriteFd(replica_.get(), &c, 1), SyscallSucceedsWithValue(1));
 
   // Nothing to read.
-  ASSERT_THAT(PollAndReadFd(master_.get(), &c, 1, kTimeout),
+  ASSERT_THAT(PollAndReadFd(main_.get(), &c, 1, kTimeout),
               PosixErrorIs(ETIMEDOUT, ::testing::StrEq("Poll timed out")));
 }
 
@@ -811,16 +813,16 @@ TEST_F(PtyTest, TermiosOCRNL) {
   struct kernel_termios t = DefaultTermios();
   t.c_oflag |= OCRNL;
   t.c_lflag &= ~ICANON;  // for byte-by-byte reading.
-  ASSERT_THAT(ioctl(slave_.get(), TCSETS, &t), SyscallSucceeds());
+  ASSERT_THAT(ioctl(replica_.get(), TCSETS, &t), SyscallSucceeds());
 
   // The terminal is at column 0, so there should be no CR to read.
   char c = '\r';
-  ASSERT_THAT(WriteFd(slave_.get(), &c, 1), SyscallSucceedsWithValue(1));
+  ASSERT_THAT(WriteFd(replica_.get(), &c, 1), SyscallSucceedsWithValue(1));
 
-  ExpectReadable(master_, 1, &c);
+  ExpectReadable(main_, 1, &c);
   EXPECT_EQ(c, '\n');
 
-  ExpectFinished(master_);
+  ExpectFinished(main_);
 }
 
 // Tests that VEOL is disabled when we start, and that we can set it to enable
@@ -828,27 +830,27 @@ TEST_F(PtyTest, TermiosOCRNL) {
 TEST_F(PtyTest, VEOLTermination) {
   // Write a few bytes ending with '\0', and confirm that we can't read.
   constexpr char kInput[] = "hello";
-  ASSERT_THAT(WriteFd(master_.get(), kInput, sizeof(kInput)),
+  ASSERT_THAT(WriteFd(main_.get(), kInput, sizeof(kInput)),
               SyscallSucceedsWithValue(sizeof(kInput)));
   char buf[sizeof(kInput)] = {};
-  ASSERT_THAT(PollAndReadFd(slave_.get(), buf, sizeof(kInput), kTimeout),
+  ASSERT_THAT(PollAndReadFd(replica_.get(), buf, sizeof(kInput), kTimeout),
               PosixErrorIs(ETIMEDOUT, ::testing::StrEq("Poll timed out")));
 
   // Set the EOL character to '=' and write it.
   constexpr char delim = '=';
   struct kernel_termios t = DefaultTermios();
   t.c_cc[VEOL] = delim;
-  ASSERT_THAT(ioctl(slave_.get(), TCSETS, &t), SyscallSucceeds());
-  ASSERT_THAT(WriteFd(master_.get(), &delim, 1), SyscallSucceedsWithValue(1));
+  ASSERT_THAT(ioctl(replica_.get(), TCSETS, &t), SyscallSucceeds());
+  ASSERT_THAT(WriteFd(main_.get(), &delim, 1), SyscallSucceedsWithValue(1));
 
   // Now we can read, as sending EOL caused the line to become available.
-  ExpectReadable(slave_, sizeof(kInput), buf);
+  ExpectReadable(replica_, sizeof(kInput), buf);
   EXPECT_EQ(memcmp(buf, kInput, sizeof(kInput)), 0);
 
-  ExpectReadable(slave_, 1, buf);
+  ExpectReadable(replica_, 1, buf);
   EXPECT_EQ(buf[0], '=');
 
-  ExpectFinished(slave_);
+  ExpectFinished(replica_);
 }
 
 // Tests that we can write more than the 4096 character limit, then a
@@ -859,14 +861,14 @@ TEST_F(PtyTest, CanonBigWrite) {
   char input[kWriteLen];
   memset(input, 'M', kWriteLen - 1);
   input[kWriteLen - 1] = '\n';
-  ASSERT_THAT(WriteFd(master_.get(), input, kWriteLen),
+  ASSERT_THAT(WriteFd(main_.get(), input, kWriteLen),
               SyscallSucceedsWithValue(kWriteLen));
 
   // We can read the line.
   char buf[kMaxLineSize] = {};
-  ExpectReadable(slave_, kMaxLineSize, buf);
+  ExpectReadable(replica_, kMaxLineSize, buf);
 
-  ExpectFinished(slave_);
+  ExpectFinished(replica_);
 }
 
 // Tests that data written in canonical mode can be read immediately once
@@ -875,36 +877,36 @@ TEST_F(PtyTest, SwitchCanonToNoncanon) {
   // Write a few bytes without a terminating character, switch to noncanonical
   // mode, and read them.
   constexpr char kInput[] = "hello";
-  ASSERT_THAT(WriteFd(master_.get(), kInput, sizeof(kInput)),
+  ASSERT_THAT(WriteFd(main_.get(), kInput, sizeof(kInput)),
               SyscallSucceedsWithValue(sizeof(kInput)));
 
   // Nothing available yet.
   char buf[sizeof(kInput)] = {};
-  ASSERT_THAT(PollAndReadFd(slave_.get(), buf, sizeof(kInput), kTimeout),
+  ASSERT_THAT(PollAndReadFd(replica_.get(), buf, sizeof(kInput), kTimeout),
               PosixErrorIs(ETIMEDOUT, ::testing::StrEq("Poll timed out")));
 
   DisableCanonical();
 
-  ExpectReadable(slave_, sizeof(kInput), buf);
+  ExpectReadable(replica_, sizeof(kInput), buf);
   EXPECT_STREQ(buf, kInput);
 
-  ExpectFinished(slave_);
+  ExpectFinished(replica_);
 }
 
 TEST_F(PtyTest, SwitchCanonToNonCanonNewline) {
   // Write a few bytes with a terminating character.
   constexpr char kInput[] = "hello\n";
-  ASSERT_THAT(WriteFd(master_.get(), kInput, sizeof(kInput)),
+  ASSERT_THAT(WriteFd(main_.get(), kInput, sizeof(kInput)),
               SyscallSucceedsWithValue(sizeof(kInput)));
 
   DisableCanonical();
 
   // We can read the line.
   char buf[sizeof(kInput)] = {};
-  ExpectReadable(slave_, sizeof(kInput), buf);
+  ExpectReadable(replica_, sizeof(kInput), buf);
   EXPECT_STREQ(buf, kInput);
 
-  ExpectFinished(slave_);
+  ExpectFinished(replica_);
 }
 
 TEST_F(PtyTest, SwitchNoncanonToCanonNewlineBig) {
@@ -914,23 +916,23 @@ TEST_F(PtyTest, SwitchNoncanonToCanonNewlineBig) {
   constexpr int kWriteLen = 4100;
   char input[kWriteLen];
   memset(input, 'M', kWriteLen);
-  ASSERT_THAT(WriteFd(master_.get(), input, kWriteLen),
+  ASSERT_THAT(WriteFd(main_.get(), input, kWriteLen),
               SyscallSucceedsWithValue(kWriteLen));
   // Wait for the input queue to fill.
-  ASSERT_NO_ERRNO(WaitUntilReceived(slave_.get(), kMaxLineSize - 1));
+  ASSERT_NO_ERRNO(WaitUntilReceived(replica_.get(), kMaxLineSize - 1));
   constexpr char delim = '\n';
-  ASSERT_THAT(WriteFd(master_.get(), &delim, 1), SyscallSucceedsWithValue(1));
+  ASSERT_THAT(WriteFd(main_.get(), &delim, 1), SyscallSucceedsWithValue(1));
 
   EnableCanonical();
 
   // We can read the line.
   char buf[kMaxLineSize] = {};
-  ExpectReadable(slave_, kMaxLineSize - 1, buf);
+  ExpectReadable(replica_, kMaxLineSize - 1, buf);
 
   // We can also read the remaining characters.
-  ExpectReadable(slave_, 6, buf);
+  ExpectReadable(replica_, 6, buf);
 
-  ExpectFinished(slave_);
+  ExpectFinished(replica_);
 }
 
 TEST_F(PtyTest, SwitchNoncanonToCanonNoNewline) {
@@ -939,18 +941,18 @@ TEST_F(PtyTest, SwitchNoncanonToCanonNoNewline) {
   // Write a few bytes without a terminating character.
   // mode, and read them.
   constexpr char kInput[] = "hello";
-  ASSERT_THAT(WriteFd(master_.get(), kInput, sizeof(kInput) - 1),
+  ASSERT_THAT(WriteFd(main_.get(), kInput, sizeof(kInput) - 1),
               SyscallSucceedsWithValue(sizeof(kInput) - 1));
 
-  ASSERT_NO_ERRNO(WaitUntilReceived(slave_.get(), sizeof(kInput) - 1));
+  ASSERT_NO_ERRNO(WaitUntilReceived(replica_.get(), sizeof(kInput) - 1));
   EnableCanonical();
 
   // We can read the line.
   char buf[sizeof(kInput)] = {};
-  ExpectReadable(slave_, sizeof(kInput) - 1, buf);
+  ExpectReadable(replica_, sizeof(kInput) - 1, buf);
   EXPECT_STREQ(buf, kInput);
 
-  ExpectFinished(slave_);
+  ExpectFinished(replica_);
 }
 
 TEST_F(PtyTest, SwitchNoncanonToCanonNoNewlineBig) {
@@ -961,17 +963,17 @@ TEST_F(PtyTest, SwitchNoncanonToCanonNoNewlineBig) {
   constexpr int kWriteLen = 4100;
   char input[kWriteLen];
   memset(input, 'M', kWriteLen);
-  ASSERT_THAT(WriteFd(master_.get(), input, kWriteLen),
+  ASSERT_THAT(WriteFd(main_.get(), input, kWriteLen),
               SyscallSucceedsWithValue(kWriteLen));
 
-  ASSERT_NO_ERRNO(WaitUntilReceived(slave_.get(), kMaxLineSize - 1));
+  ASSERT_NO_ERRNO(WaitUntilReceived(replica_.get(), kMaxLineSize - 1));
   EnableCanonical();
 
   // We can read the line.
   char buf[kMaxLineSize] = {};
-  ExpectReadable(slave_, kMaxLineSize - 1, buf);
+  ExpectReadable(replica_, kMaxLineSize - 1, buf);
 
-  ExpectFinished(slave_);
+  ExpectFinished(replica_);
 }
 
 // Tests that we can write over the 4095 noncanonical limit, then read out
@@ -985,22 +987,22 @@ TEST_F(PtyTest, NoncanonBigWrite) {
   for (int i = 0; i < kInputSize; i++) {
     // This makes too many syscalls for save/restore.
     const DisableSave ds;
-    ASSERT_THAT(WriteFd(master_.get(), &kInput, sizeof(kInput)),
+    ASSERT_THAT(WriteFd(main_.get(), &kInput, sizeof(kInput)),
                 SyscallSucceedsWithValue(sizeof(kInput)));
   }
 
   // We should be able to read out everything. Sleep a bit so that Linux has a
-  // chance to move data from the master to the slave.
-  ASSERT_NO_ERRNO(WaitUntilReceived(slave_.get(), kMaxLineSize - 1));
+  // chance to move data from the main to the replica.
+  ASSERT_NO_ERRNO(WaitUntilReceived(replica_.get(), kMaxLineSize - 1));
   for (int i = 0; i < kInputSize; i++) {
     // This makes too many syscalls for save/restore.
     const DisableSave ds;
     char c;
-    ExpectReadable(slave_, 1, &c);
+    ExpectReadable(replica_, 1, &c);
     ASSERT_EQ(c, kInput);
   }
 
-  ExpectFinished(slave_);
+  ExpectFinished(replica_);
 }
 
 // ICANON doesn't make input available until a line delimiter is typed.
@@ -1008,25 +1010,25 @@ TEST_F(PtyTest, NoncanonBigWrite) {
 // Test newline.
 TEST_F(PtyTest, TermiosICANONNewline) {
   char input[3] = {'a', 'b', 'c'};
-  ASSERT_THAT(WriteFd(master_.get(), input, sizeof(input)),
+  ASSERT_THAT(WriteFd(main_.get(), input, sizeof(input)),
               SyscallSucceedsWithValue(sizeof(input)));
 
   // Extra bytes for newline (written later) and NUL for EXPECT_STREQ.
   char buf[5] = {};
 
   // Nothing available yet.
-  ASSERT_THAT(PollAndReadFd(slave_.get(), buf, sizeof(input), kTimeout),
+  ASSERT_THAT(PollAndReadFd(replica_.get(), buf, sizeof(input), kTimeout),
               PosixErrorIs(ETIMEDOUT, ::testing::StrEq("Poll timed out")));
 
   char delim = '\n';
-  ASSERT_THAT(WriteFd(master_.get(), &delim, 1), SyscallSucceedsWithValue(1));
+  ASSERT_THAT(WriteFd(main_.get(), &delim, 1), SyscallSucceedsWithValue(1));
 
   // Now it is available.
-  ASSERT_NO_ERRNO(WaitUntilReceived(slave_.get(), sizeof(input) + 1));
-  ExpectReadable(slave_, sizeof(input) + 1, buf);
+  ASSERT_NO_ERRNO(WaitUntilReceived(replica_.get(), sizeof(input) + 1));
+  ExpectReadable(replica_, sizeof(input) + 1, buf);
   EXPECT_STREQ(buf, "abc\n");
 
-  ExpectFinished(slave_);
+  ExpectFinished(replica_);
 }
 
 // ICANON doesn't make input available until a line delimiter is typed.
@@ -1034,23 +1036,23 @@ TEST_F(PtyTest, TermiosICANONNewline) {
 // Test EOF (^D).
 TEST_F(PtyTest, TermiosICANONEOF) {
   char input[3] = {'a', 'b', 'c'};
-  ASSERT_THAT(WriteFd(master_.get(), input, sizeof(input)),
+  ASSERT_THAT(WriteFd(main_.get(), input, sizeof(input)),
               SyscallSucceedsWithValue(sizeof(input)));
 
   // Extra byte for NUL for EXPECT_STREQ.
   char buf[4] = {};
 
   // Nothing available yet.
-  ASSERT_THAT(PollAndReadFd(slave_.get(), buf, sizeof(input), kTimeout),
+  ASSERT_THAT(PollAndReadFd(replica_.get(), buf, sizeof(input), kTimeout),
               PosixErrorIs(ETIMEDOUT, ::testing::StrEq("Poll timed out")));
   char delim = ControlCharacter('D');
-  ASSERT_THAT(WriteFd(master_.get(), &delim, 1), SyscallSucceedsWithValue(1));
+  ASSERT_THAT(WriteFd(main_.get(), &delim, 1), SyscallSucceedsWithValue(1));
 
   // Now it is available. Note that ^D is not included.
-  ExpectReadable(slave_, sizeof(input), buf);
+  ExpectReadable(replica_, sizeof(input), buf);
   EXPECT_STREQ(buf, "abc");
 
-  ExpectFinished(slave_);
+  ExpectFinished(replica_);
 }
 
 // ICANON limits us to 4096 bytes including a terminating character. Anything
@@ -1067,21 +1069,21 @@ TEST_F(PtyTest, CanonDiscard) {
     // This makes too many syscalls for save/restore.
     const DisableSave ds;
     for (int i = 0; i < kInputSize; i++) {
-      ASSERT_THAT(WriteFd(master_.get(), &kInput, sizeof(kInput)),
+      ASSERT_THAT(WriteFd(main_.get(), &kInput, sizeof(kInput)),
                   SyscallSucceedsWithValue(sizeof(kInput)));
     }
-    ASSERT_THAT(WriteFd(master_.get(), &delim, 1), SyscallSucceedsWithValue(1));
+    ASSERT_THAT(WriteFd(main_.get(), &delim, 1), SyscallSucceedsWithValue(1));
   }
 
   // There should be multiple truncated lines available to read.
   for (int i = 0; i < kIter; i++) {
     char buf[kInputSize] = {};
-    ExpectReadable(slave_, kMaxLineSize, buf);
+    ExpectReadable(replica_, kMaxLineSize, buf);
     EXPECT_EQ(buf[kMaxLineSize - 1], delim);
     EXPECT_EQ(buf[kMaxLineSize - 2], kInput);
   }
 
-  ExpectFinished(slave_);
+  ExpectFinished(replica_);
 }
 
 TEST_F(PtyTest, CanonMultiline) {
@@ -1089,22 +1091,22 @@ TEST_F(PtyTest, CanonMultiline) {
   constexpr char kInput2[] = "BLUE\n";
 
   // Write both lines.
-  ASSERT_THAT(WriteFd(master_.get(), kInput1, sizeof(kInput1) - 1),
+  ASSERT_THAT(WriteFd(main_.get(), kInput1, sizeof(kInput1) - 1),
               SyscallSucceedsWithValue(sizeof(kInput1) - 1));
-  ASSERT_THAT(WriteFd(master_.get(), kInput2, sizeof(kInput2) - 1),
+  ASSERT_THAT(WriteFd(main_.get(), kInput2, sizeof(kInput2) - 1),
               SyscallSucceedsWithValue(sizeof(kInput2) - 1));
 
   // Get the first line.
   char line1[8] = {};
-  ExpectReadable(slave_, sizeof(kInput1) - 1, line1);
+  ExpectReadable(replica_, sizeof(kInput1) - 1, line1);
   EXPECT_STREQ(line1, kInput1);
 
   // Get the second line.
   char line2[8] = {};
-  ExpectReadable(slave_, sizeof(kInput2) - 1, line2);
+  ExpectReadable(replica_, sizeof(kInput2) - 1, line2);
   EXPECT_STREQ(line2, kInput2);
 
-  ExpectFinished(slave_);
+  ExpectFinished(replica_);
 }
 
 TEST_F(PtyTest, SwitchNoncanonToCanonMultiline) {
@@ -1115,21 +1117,21 @@ TEST_F(PtyTest, SwitchNoncanonToCanonMultiline) {
   constexpr char kExpected[] = "GO\nBLUE\n";
 
   // Write both lines.
-  ASSERT_THAT(WriteFd(master_.get(), kInput1, sizeof(kInput1) - 1),
+  ASSERT_THAT(WriteFd(main_.get(), kInput1, sizeof(kInput1) - 1),
               SyscallSucceedsWithValue(sizeof(kInput1) - 1));
-  ASSERT_THAT(WriteFd(master_.get(), kInput2, sizeof(kInput2) - 1),
+  ASSERT_THAT(WriteFd(main_.get(), kInput2, sizeof(kInput2) - 1),
               SyscallSucceedsWithValue(sizeof(kInput2) - 1));
 
   ASSERT_NO_ERRNO(
-      WaitUntilReceived(slave_.get(), sizeof(kInput1) + sizeof(kInput2) - 2));
+      WaitUntilReceived(replica_.get(), sizeof(kInput1) + sizeof(kInput2) - 2));
   EnableCanonical();
 
   // Get all together as one line.
   char line[9] = {};
-  ExpectReadable(slave_, 8, line);
+  ExpectReadable(replica_, 8, line);
   EXPECT_STREQ(line, kExpected);
 
-  ExpectFinished(slave_);
+  ExpectFinished(replica_);
 }
 
 TEST_F(PtyTest, SwitchTwiceMultiline) {
@@ -1138,7 +1140,7 @@ TEST_F(PtyTest, SwitchTwiceMultiline) {
 
   // Write each line.
   for (const std::string& input : kInputs) {
-    ASSERT_THAT(WriteFd(master_.get(), input.c_str(), input.size()),
+    ASSERT_THAT(WriteFd(main_.get(), input.c_str(), input.size()),
                 SyscallSucceedsWithValue(input.size()));
   }
 
@@ -1146,32 +1148,32 @@ TEST_F(PtyTest, SwitchTwiceMultiline) {
   // All written characters have to make it into the input queue before
   // canonical mode is re-enabled. If the final '!' character hasn't been
   // enqueued before canonical mode is re-enabled, it won't be readable.
-  ASSERT_NO_ERRNO(WaitUntilReceived(slave_.get(), kExpected.size()));
+  ASSERT_NO_ERRNO(WaitUntilReceived(replica_.get(), kExpected.size()));
   EnableCanonical();
 
   // Get all together as one line.
   char line[10] = {};
-  ExpectReadable(slave_, 9, line);
+  ExpectReadable(replica_, 9, line);
   EXPECT_STREQ(line, kExpected.c_str());
 
-  ExpectFinished(slave_);
+  ExpectFinished(replica_);
 }
 
 TEST_F(PtyTest, QueueSize) {
   // Write the line.
   constexpr char kInput1[] = "GO\n";
-  ASSERT_THAT(WriteFd(master_.get(), kInput1, sizeof(kInput1) - 1),
+  ASSERT_THAT(WriteFd(main_.get(), kInput1, sizeof(kInput1) - 1),
               SyscallSucceedsWithValue(sizeof(kInput1) - 1));
-  ASSERT_NO_ERRNO(WaitUntilReceived(slave_.get(), sizeof(kInput1) - 1));
+  ASSERT_NO_ERRNO(WaitUntilReceived(replica_.get(), sizeof(kInput1) - 1));
 
   // Ensure that writing more (beyond what is readable) does not impact the
   // readable size.
   char input[kMaxLineSize];
   memset(input, 'M', kMaxLineSize);
-  ASSERT_THAT(WriteFd(master_.get(), input, kMaxLineSize),
+  ASSERT_THAT(WriteFd(main_.get(), input, kMaxLineSize),
               SyscallSucceedsWithValue(kMaxLineSize));
   int inputBufSize = ASSERT_NO_ERRNO_AND_VALUE(
-      WaitUntilReceived(slave_.get(), sizeof(kInput1) - 1));
+      WaitUntilReceived(replica_.get(), sizeof(kInput1) - 1));
   EXPECT_EQ(inputBufSize, sizeof(kInput1) - 1);
 }
 
@@ -1190,15 +1192,14 @@ TEST_F(PtyTest, PartialBadBuffer) {
   // Leave only one free byte in the buffer.
   char* bad_buffer = buf + kPageSize - 1;
 
-  // Write to the master.
+  // Write to the main.
   constexpr char kBuf[] = "hello\n";
   constexpr size_t size = sizeof(kBuf) - 1;
-  EXPECT_THAT(WriteFd(master_.get(), kBuf, size),
-              SyscallSucceedsWithValue(size));
+  EXPECT_THAT(WriteFd(main_.get(), kBuf, size), SyscallSucceedsWithValue(size));
 
-  // Read from the slave into bad_buffer.
-  ASSERT_NO_ERRNO(WaitUntilReceived(slave_.get(), size));
-  EXPECT_THAT(ReadFd(slave_.get(), bad_buffer, size),
+  // Read from the replica into bad_buffer.
+  ASSERT_NO_ERRNO(WaitUntilReceived(replica_.get(), size));
+  EXPECT_THAT(ReadFd(replica_.get(), bad_buffer, size),
               SyscallFailsWithErrno(EFAULT));
 
   EXPECT_THAT(munmap(addr, 2 * kPageSize), SyscallSucceeds()) << addr;
@@ -1206,44 +1207,43 @@ TEST_F(PtyTest, PartialBadBuffer) {
 
 TEST_F(PtyTest, SimpleEcho) {
   constexpr char kInput[] = "Mr. Eko";
-  EXPECT_THAT(WriteFd(master_.get(), kInput, strlen(kInput)),
+  EXPECT_THAT(WriteFd(main_.get(), kInput, strlen(kInput)),
               SyscallSucceedsWithValue(strlen(kInput)));
 
   char buf[100] = {};
-  ExpectReadable(master_, strlen(kInput), buf);
+  ExpectReadable(main_, strlen(kInput), buf);
 
   EXPECT_STREQ(buf, kInput);
-  ExpectFinished(master_);
+  ExpectFinished(main_);
 }
 
 TEST_F(PtyTest, GetWindowSize) {
   struct winsize ws;
-  ASSERT_THAT(ioctl(slave_.get(), TIOCGWINSZ, &ws), SyscallSucceeds());
+  ASSERT_THAT(ioctl(replica_.get(), TIOCGWINSZ, &ws), SyscallSucceeds());
   EXPECT_EQ(ws.ws_row, 0);
   EXPECT_EQ(ws.ws_col, 0);
 }
 
-TEST_F(PtyTest, SetSlaveWindowSize) {
+TEST_F(PtyTest, SetReplicaWindowSize) {
   constexpr uint16_t kRows = 343;
   constexpr uint16_t kCols = 2401;
   struct winsize ws = {.ws_row = kRows, .ws_col = kCols};
-  ASSERT_THAT(ioctl(slave_.get(), TIOCSWINSZ, &ws), SyscallSucceeds());
+  ASSERT_THAT(ioctl(replica_.get(), TIOCSWINSZ, &ws), SyscallSucceeds());
 
   struct winsize retrieved_ws = {};
-  ASSERT_THAT(ioctl(master_.get(), TIOCGWINSZ, &retrieved_ws),
-              SyscallSucceeds());
+  ASSERT_THAT(ioctl(main_.get(), TIOCGWINSZ, &retrieved_ws), SyscallSucceeds());
   EXPECT_EQ(retrieved_ws.ws_row, kRows);
   EXPECT_EQ(retrieved_ws.ws_col, kCols);
 }
 
-TEST_F(PtyTest, SetMasterWindowSize) {
+TEST_F(PtyTest, SetMainWindowSize) {
   constexpr uint16_t kRows = 343;
   constexpr uint16_t kCols = 2401;
   struct winsize ws = {.ws_row = kRows, .ws_col = kCols};
-  ASSERT_THAT(ioctl(master_.get(), TIOCSWINSZ, &ws), SyscallSucceeds());
+  ASSERT_THAT(ioctl(main_.get(), TIOCSWINSZ, &ws), SyscallSucceeds());
 
   struct winsize retrieved_ws = {};
-  ASSERT_THAT(ioctl(slave_.get(), TIOCGWINSZ, &retrieved_ws),
+  ASSERT_THAT(ioctl(replica_.get(), TIOCGWINSZ, &retrieved_ws),
               SyscallSucceeds());
   EXPECT_EQ(retrieved_ws.ws_row, kRows);
   EXPECT_EQ(retrieved_ws.ws_col, kCols);
@@ -1252,8 +1252,8 @@ TEST_F(PtyTest, SetMasterWindowSize) {
 class JobControlTest : public ::testing::Test {
  protected:
   void SetUp() override {
-    master_ = ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/ptmx", O_RDWR | O_NONBLOCK));
-    slave_ = ASSERT_NO_ERRNO_AND_VALUE(OpenSlave(master_));
+    main_ = ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/ptmx", O_RDWR | O_NONBLOCK));
+    replica_ = ASSERT_NO_ERRNO_AND_VALUE(OpenReplica(main_));
 
     // Make this a session leader, which also drops the controlling terminal.
     // In the gVisor test environment, this test will be run as the session
@@ -1263,61 +1263,82 @@ class JobControlTest : public ::testing::Test {
     }
   }
 
-  // Master and slave ends of the PTY. Non-blocking.
-  FileDescriptor master_;
-  FileDescriptor slave_;
+  PosixError RunInChild(SubprocessCallback childFunc) {
+    pid_t child = fork();
+    if (!child) {
+      childFunc();
+      _exit(0);
+    }
+    int wstatus;
+    if (waitpid(child, &wstatus, 0) != child) {
+      return PosixError(
+          errno, absl::StrCat("child failed with wait status: ", wstatus));
+    }
+    return PosixError(wstatus, "process returned");
+  }
+
+  // Main and replica ends of the PTY. Non-blocking.
+  FileDescriptor main_;
+  FileDescriptor replica_;
 };
 
-TEST_F(JobControlTest, SetTTYMaster) {
-  ASSERT_THAT(ioctl(master_.get(), TIOCSCTTY, 0), SyscallSucceeds());
+TEST_F(JobControlTest, SetTTYMain) {
+  auto res = RunInChild([=]() {
+    TEST_PCHECK(setsid() >= 0);
+    TEST_PCHECK(!ioctl(main_.get(), TIOCSCTTY, 0));
+  });
+  ASSERT_NO_ERRNO(res);
 }
 
 TEST_F(JobControlTest, SetTTY) {
-  ASSERT_THAT(ioctl(slave_.get(), TIOCSCTTY, 0), SyscallSucceeds());
+  auto res = RunInChild([=]() {
+    TEST_PCHECK(setsid() >= 0);
+    TEST_PCHECK(ioctl(!replica_.get(), TIOCSCTTY, 0));
+  });
+  ASSERT_NO_ERRNO(res);
 }
 
 TEST_F(JobControlTest, SetTTYNonLeader) {
   // Fork a process that won't be the session leader.
-  pid_t child = fork();
-  if (!child) {
-    // We shouldn't be able to set the terminal.
-    TEST_PCHECK(ioctl(slave_.get(), TIOCSCTTY, 0));
-    _exit(0);
-  }
-
-  int wstatus;
-  ASSERT_THAT(waitpid(child, &wstatus, 0), SyscallSucceedsWithValue(child));
-  ASSERT_EQ(wstatus, 0);
+  auto res =
+      RunInChild([=]() { TEST_PCHECK(ioctl(replica_.get(), TIOCSCTTY, 0)); });
+  ASSERT_NO_ERRNO(res);
 }
 
 TEST_F(JobControlTest, SetTTYBadArg) {
-  // Despite the man page saying arg should be 0 here, Linux doesn't actually
-  // check.
-  ASSERT_THAT(ioctl(slave_.get(), TIOCSCTTY, 1), SyscallSucceeds());
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN)));
+  auto res = RunInChild([=]() {
+    TEST_PCHECK(setsid() >= 0);
+    TEST_PCHECK(!ioctl(replica_.get(), TIOCSCTTY, 1));
+  });
+  ASSERT_NO_ERRNO(res);
 }
 
 TEST_F(JobControlTest, SetTTYDifferentSession) {
   SKIP_IF(ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN)));
 
-  ASSERT_THAT(ioctl(slave_.get(), TIOCSCTTY, 0), SyscallSucceeds());
-
-  // Fork, join a new session, and try to steal the parent's controlling
-  // terminal, which should fail.
-  pid_t child = fork();
-  if (!child) {
+  auto res = RunInChild([=]() {
     TEST_PCHECK(setsid() >= 0);
-    // We shouldn't be able to steal the terminal.
-    TEST_PCHECK(ioctl(slave_.get(), TIOCSCTTY, 1));
-    _exit(0);
-  }
+    TEST_PCHECK(!ioctl(replica_.get(), TIOCSCTTY, 1));
 
-  int wstatus;
-  ASSERT_THAT(waitpid(child, &wstatus, 0), SyscallSucceedsWithValue(child));
-  ASSERT_EQ(wstatus, 0);
+    // Fork, join a new session, and try to steal the parent's controlling
+    // terminal, which should fail.
+    pid_t grandchild = fork();
+    if (!grandchild) {
+      TEST_PCHECK(setsid() >= 0);
+      // We shouldn't be able to steal the terminal.
+      TEST_PCHECK(ioctl(replica_.get(), TIOCSCTTY, 1));
+      _exit(0);
+    }
+
+    int gcwstatus;
+    TEST_PCHECK(waitpid(grandchild, &gcwstatus, 0) == grandchild);
+    TEST_PCHECK(gcwstatus == 0);
+  });
 }
 
 TEST_F(JobControlTest, ReleaseTTY) {
-  ASSERT_THAT(ioctl(slave_.get(), TIOCSCTTY, 0), SyscallSucceeds());
+  ASSERT_THAT(ioctl(replica_.get(), TIOCSCTTY, 0), SyscallSucceeds());
 
   // Make sure we're ignoring SIGHUP, which will be sent to this process once we
   // disconnect they TTY.
@@ -1327,48 +1348,60 @@ TEST_F(JobControlTest, ReleaseTTY) {
   sigemptyset(&sa.sa_mask);
   struct sigaction old_sa;
   EXPECT_THAT(sigaction(SIGHUP, &sa, &old_sa), SyscallSucceeds());
-  EXPECT_THAT(ioctl(slave_.get(), TIOCNOTTY), SyscallSucceeds());
+  EXPECT_THAT(ioctl(replica_.get(), TIOCNOTTY), SyscallSucceeds());
   EXPECT_THAT(sigaction(SIGHUP, &old_sa, NULL), SyscallSucceeds());
 }
 
 TEST_F(JobControlTest, ReleaseUnsetTTY) {
-  ASSERT_THAT(ioctl(slave_.get(), TIOCNOTTY), SyscallFailsWithErrno(ENOTTY));
+  ASSERT_THAT(ioctl(replica_.get(), TIOCNOTTY), SyscallFailsWithErrno(ENOTTY));
 }
 
 TEST_F(JobControlTest, ReleaseWrongTTY) {
-  ASSERT_THAT(ioctl(slave_.get(), TIOCSCTTY, 0), SyscallSucceeds());
-
-  ASSERT_THAT(ioctl(master_.get(), TIOCNOTTY), SyscallFailsWithErrno(ENOTTY));
+  auto res = RunInChild([=]() {
+    TEST_PCHECK(setsid() >= 0);
+    TEST_PCHECK(!ioctl(replica_.get(), TIOCSCTTY, 0));
+    TEST_PCHECK(ioctl(main_.get(), TIOCNOTTY) < 0 && errno == ENOTTY);
+  });
+  ASSERT_NO_ERRNO(res);
 }
 
 TEST_F(JobControlTest, ReleaseTTYNonLeader) {
-  ASSERT_THAT(ioctl(slave_.get(), TIOCSCTTY, 0), SyscallSucceeds());
+  auto ret = RunInChild([=]() {
+    TEST_PCHECK(setsid() >= 0);
+    TEST_PCHECK(!ioctl(replica_.get(), TIOCSCTTY, 0));
 
-  pid_t child = fork();
-  if (!child) {
-    TEST_PCHECK(!ioctl(slave_.get(), TIOCNOTTY));
-    _exit(0);
-  }
+    pid_t grandchild = fork();
+    if (!grandchild) {
+      TEST_PCHECK(!ioctl(replica_.get(), TIOCNOTTY));
+      _exit(0);
+    }
 
-  int wstatus;
-  ASSERT_THAT(waitpid(child, &wstatus, 0), SyscallSucceedsWithValue(child));
-  ASSERT_EQ(wstatus, 0);
+    int wstatus;
+    TEST_PCHECK(waitpid(grandchild, &wstatus, 0) == grandchild);
+    TEST_PCHECK(wstatus == 0);
+  });
+  ASSERT_NO_ERRNO(ret);
 }
 
 TEST_F(JobControlTest, ReleaseTTYDifferentSession) {
-  ASSERT_THAT(ioctl(slave_.get(), TIOCSCTTY, 0), SyscallSucceeds());
-
-  pid_t child = fork();
-  if (!child) {
-    // Join a new session, then try to disconnect.
+  auto ret = RunInChild([=]() {
     TEST_PCHECK(setsid() >= 0);
-    TEST_PCHECK(ioctl(slave_.get(), TIOCNOTTY));
-    _exit(0);
-  }
 
-  int wstatus;
-  ASSERT_THAT(waitpid(child, &wstatus, 0), SyscallSucceedsWithValue(child));
-  ASSERT_EQ(wstatus, 0);
+    TEST_PCHECK(!ioctl(replica_.get(), TIOCSCTTY, 0));
+
+    pid_t grandchild = fork();
+    if (!grandchild) {
+      // Join a new session, then try to disconnect.
+      TEST_PCHECK(setsid() >= 0);
+      TEST_PCHECK(ioctl(replica_.get(), TIOCNOTTY));
+      _exit(0);
+    }
+
+    int wstatus;
+    TEST_PCHECK(waitpid(grandchild, &wstatus, 0) == grandchild);
+    TEST_PCHECK(wstatus == 0);
+  });
+  ASSERT_NO_ERRNO(ret);
 }
 
 // Used by the child process spawned in ReleaseTTYSignals to track received
@@ -1387,7 +1420,7 @@ void sig_handler(int signum) { received |= signum; }
 // - Checks that thread 1 got both signals
 // - Checks that thread 2 didn't get any signals.
 TEST_F(JobControlTest, ReleaseTTYSignals) {
-  ASSERT_THAT(ioctl(slave_.get(), TIOCSCTTY, 0), SyscallSucceeds());
+  ASSERT_THAT(ioctl(replica_.get(), TIOCSCTTY, 0), SyscallSucceeds());
 
   received = 0;
   struct sigaction sa = {};
@@ -1439,7 +1472,7 @@ TEST_F(JobControlTest, ReleaseTTYSignals) {
 
   // Release the controlling terminal, sending SIGHUP and SIGCONT to all other
   // processes in this process group.
-  EXPECT_THAT(ioctl(slave_.get(), TIOCNOTTY), SyscallSucceeds());
+  EXPECT_THAT(ioctl(replica_.get(), TIOCNOTTY), SyscallSucceeds());
 
   EXPECT_THAT(sigaction(SIGHUP, &old_sa, NULL), SyscallSucceeds());
 
@@ -1456,20 +1489,21 @@ TEST_F(JobControlTest, ReleaseTTYSignals) {
 }
 
 TEST_F(JobControlTest, GetForegroundProcessGroup) {
-  ASSERT_THAT(ioctl(slave_.get(), TIOCSCTTY, 0), SyscallSucceeds());
-  pid_t foreground_pgid;
-  pid_t pid;
-  ASSERT_THAT(ioctl(slave_.get(), TIOCGPGRP, &foreground_pgid),
-              SyscallSucceeds());
-  ASSERT_THAT(pid = getpid(), SyscallSucceeds());
-
-  ASSERT_EQ(foreground_pgid, pid);
+  auto res = RunInChild([=]() {
+    pid_t pid, foreground_pgid;
+    TEST_PCHECK(setsid() >= 0);
+    TEST_PCHECK(!ioctl(replica_.get(), TIOCSCTTY, 1));
+    TEST_PCHECK(!ioctl(replica_.get(), TIOCGPGRP, &foreground_pgid));
+    TEST_PCHECK((pid = getpid()) >= 0);
+    TEST_PCHECK(pid == foreground_pgid);
+  });
+  ASSERT_NO_ERRNO(res);
 }
 
 TEST_F(JobControlTest, GetForegroundProcessGroupNonControlling) {
   // At this point there's no controlling terminal, so TIOCGPGRP should fail.
   pid_t foreground_pgid;
-  ASSERT_THAT(ioctl(slave_.get(), TIOCGPGRP, &foreground_pgid),
+  ASSERT_THAT(ioctl(replica_.get(), TIOCGPGRP, &foreground_pgid),
               SyscallFailsWithErrno(ENOTTY));
 }
 
@@ -1479,113 +1513,125 @@ TEST_F(JobControlTest, GetForegroundProcessGroupNonControlling) {
 // - sets that child as the foreground process group
 // - kills its child and sets itself as the foreground process group.
 TEST_F(JobControlTest, SetForegroundProcessGroup) {
-  ASSERT_THAT(ioctl(slave_.get(), TIOCSCTTY, 0), SyscallSucceeds());
-
-  // Ignore SIGTTOU so that we don't stop ourself when calling tcsetpgrp.
-  struct sigaction sa = {};
-  sa.sa_handler = SIG_IGN;
-  sa.sa_flags = 0;
-  sigemptyset(&sa.sa_mask);
-  sigaction(SIGTTOU, &sa, NULL);
-
-  // Set ourself as the foreground process group.
-  ASSERT_THAT(tcsetpgrp(slave_.get(), getpgid(0)), SyscallSucceeds());
-
-  // Create a new process that just waits to be signaled.
-  pid_t child = fork();
-  if (!child) {
-    TEST_PCHECK(!pause());
-    // We should never reach this.
-    _exit(1);
-  }
-
-  // Make the child its own process group, then make it the controlling process
-  // group of the terminal.
-  ASSERT_THAT(setpgid(child, child), SyscallSucceeds());
-  ASSERT_THAT(tcsetpgrp(slave_.get(), child), SyscallSucceeds());
+  auto res = RunInChild([=]() {
+    TEST_PCHECK(!ioctl(replica_.get(), TIOCSCTTY, 0));
+
+    // Ignore SIGTTOU so that we don't stop ourself when calling tcsetpgrp.
+    struct sigaction sa = {};
+    sa.sa_handler = SIG_IGN;
+    sa.sa_flags = 0;
+    sigemptyset(&sa.sa_mask);
+    sigaction(SIGTTOU, &sa, NULL);
+
+    // Set ourself as the foreground process group.
+    TEST_PCHECK(!tcsetpgrp(replica_.get(), getpgid(0)));
+
+    // Create a new process that just waits to be signaled.
+    pid_t grandchild = fork();
+    if (!grandchild) {
+      TEST_PCHECK(!pause());
+      // We should never reach this.
+      _exit(1);
+    }
 
-  // Sanity check - we're still the controlling session.
-  ASSERT_EQ(getsid(0), getsid(child));
+    // Make the child its own process group, then make it the controlling
+    // process group of the terminal.
+    TEST_PCHECK(!setpgid(grandchild, grandchild));
+    TEST_PCHECK(!tcsetpgrp(replica_.get(), grandchild));
 
-  // Signal the child, wait for it to exit, then retake the terminal.
-  ASSERT_THAT(kill(child, SIGTERM), SyscallSucceeds());
-  int wstatus;
-  ASSERT_THAT(waitpid(child, &wstatus, 0), SyscallSucceedsWithValue(child));
-  ASSERT_TRUE(WIFSIGNALED(wstatus));
-  ASSERT_EQ(WTERMSIG(wstatus), SIGTERM);
+    // Sanity check - we're still the controlling session.
+    TEST_PCHECK(getsid(0) == getsid(grandchild));
 
-  // Set ourself as the foreground process.
-  pid_t pgid;
-  ASSERT_THAT(pgid = getpgid(0), SyscallSucceeds());
-  ASSERT_THAT(tcsetpgrp(slave_.get(), pgid), SyscallSucceeds());
+    // Signal the child, wait for it to exit, then retake the terminal.
+    TEST_PCHECK(!kill(grandchild, SIGTERM));
+    int wstatus;
+    TEST_PCHECK(waitpid(grandchild, &wstatus, 0) == grandchild);
+    TEST_PCHECK(WIFSIGNALED(wstatus));
+    TEST_PCHECK(WTERMSIG(wstatus) == SIGTERM);
+
+    // Set ourself as the foreground process.
+    pid_t pgid;
+    TEST_PCHECK(pgid = getpgid(0) == 0);
+    TEST_PCHECK(!tcsetpgrp(replica_.get(), pgid));
+  });
 }
 
 TEST_F(JobControlTest, SetForegroundProcessGroupWrongTTY) {
   pid_t pid = getpid();
-  ASSERT_THAT(ioctl(slave_.get(), TIOCSPGRP, &pid),
+  ASSERT_THAT(ioctl(replica_.get(), TIOCSPGRP, &pid),
               SyscallFailsWithErrno(ENOTTY));
 }
 
 TEST_F(JobControlTest, SetForegroundProcessGroupNegPgid) {
-  ASSERT_THAT(ioctl(slave_.get(), TIOCSCTTY, 0), SyscallSucceeds());
+  auto ret = RunInChild([=]() {
+    TEST_PCHECK(setsid() >= 0);
+    TEST_PCHECK(!ioctl(replica_.get(), TIOCSCTTY, 0));
 
-  pid_t pid = -1;
-  ASSERT_THAT(ioctl(slave_.get(), TIOCSPGRP, &pid),
-              SyscallFailsWithErrno(EINVAL));
+    pid_t pid = -1;
+    TEST_PCHECK(ioctl(replica_.get(), TIOCSPGRP, &pid) && errno == EINVAL);
+  });
+  ASSERT_NO_ERRNO(ret);
 }
 
 TEST_F(JobControlTest, SetForegroundProcessGroupEmptyProcessGroup) {
-  ASSERT_THAT(ioctl(slave_.get(), TIOCSCTTY, 0), SyscallSucceeds());
-
-  // Create a new process, put it in a new process group, make that group the
-  // foreground process group, then have the process wait.
-  pid_t child = fork();
-  if (!child) {
-    TEST_PCHECK(!setpgid(0, 0));
-    _exit(0);
-  }
+  auto ret = RunInChild([=]() {
+    TEST_PCHECK(!ioctl(replica_.get(), TIOCSCTTY, 0));
+
+    // Create a new process, put it in a new process group, make that group the
+    // foreground process group, then have the process wait.
+    pid_t grandchild = fork();
+    if (!grandchild) {
+      TEST_PCHECK(!setpgid(0, 0));
+      _exit(0);
+    }
 
-  // Wait for the child to exit.
-  int wstatus;
-  EXPECT_THAT(waitpid(child, &wstatus, 0), SyscallSucceedsWithValue(child));
-  // The child's process group doesn't exist anymore - this should fail.
-  ASSERT_THAT(ioctl(slave_.get(), TIOCSPGRP, &child),
-              SyscallFailsWithErrno(ESRCH));
+    // Wait for the child to exit.
+    int wstatus;
+    TEST_PCHECK(waitpid(grandchild, &wstatus, 0) == grandchild);
+    // The child's process group doesn't exist anymore - this should fail.
+    TEST_PCHECK(ioctl(replica_.get(), TIOCSPGRP, &grandchild) != 0 &&
+                errno == ESRCH);
+  });
 }
 
 TEST_F(JobControlTest, SetForegroundProcessGroupDifferentSession) {
-  ASSERT_THAT(ioctl(slave_.get(), TIOCSCTTY, 0), SyscallSucceeds());
+  auto ret = RunInChild([=]() {
+    TEST_PCHECK(setsid() >= 0);
+    TEST_PCHECK(!ioctl(replica_.get(), TIOCSCTTY, 0));
 
-  int sync_setsid[2];
-  int sync_exit[2];
-  ASSERT_THAT(pipe(sync_setsid), SyscallSucceeds());
-  ASSERT_THAT(pipe(sync_exit), SyscallSucceeds());
+    int sync_setsid[2];
+    int sync_exit[2];
+    TEST_PCHECK(pipe(sync_setsid) >= 0);
+    TEST_PCHECK(pipe(sync_exit) >= 0);
 
-  // Create a new process and put it in a new session.
-  pid_t child = fork();
-  if (!child) {
-    TEST_PCHECK(setsid() >= 0);
-    // Tell the parent we're in a new session.
-    char c = 'c';
-    TEST_PCHECK(WriteFd(sync_setsid[1], &c, 1) == 1);
-    TEST_PCHECK(ReadFd(sync_exit[0], &c, 1) == 1);
-    _exit(0);
-  }
+    // Create a new process and put it in a new session.
+    pid_t grandchild = fork();
+    if (!grandchild) {
+      TEST_PCHECK(setsid() >= 0);
+      // Tell the parent we're in a new session.
+      char c = 'c';
+      TEST_PCHECK(WriteFd(sync_setsid[1], &c, 1) == 1);
+      TEST_PCHECK(ReadFd(sync_exit[0], &c, 1) == 1);
+      _exit(0);
+    }
 
-  // Wait for the child to tell us it's in a new session.
-  char c = 'c';
-  ASSERT_THAT(ReadFd(sync_setsid[0], &c, 1), SyscallSucceedsWithValue(1));
+    // Wait for the child to tell us it's in a new session.
+    char c = 'c';
+    TEST_PCHECK(ReadFd(sync_setsid[0], &c, 1) == 1);
 
-  // Child is in a new session, so we can't make it the foregroup process group.
-  EXPECT_THAT(ioctl(slave_.get(), TIOCSPGRP, &child),
-              SyscallFailsWithErrno(EPERM));
+    // Child is in a new session, so we can't make it the foregroup process
+    // group.
+    TEST_PCHECK(ioctl(replica_.get(), TIOCSPGRP, &grandchild) &&
+                errno == EPERM);
 
-  EXPECT_THAT(WriteFd(sync_exit[1], &c, 1), SyscallSucceedsWithValue(1));
+    TEST_PCHECK(WriteFd(sync_exit[1], &c, 1) == 1);
 
-  int wstatus;
-  EXPECT_THAT(waitpid(child, &wstatus, 0), SyscallSucceedsWithValue(child));
-  EXPECT_TRUE(WIFEXITED(wstatus));
-  EXPECT_EQ(WEXITSTATUS(wstatus), 0);
+    int wstatus;
+    TEST_PCHECK(waitpid(grandchild, &wstatus, 0) == grandchild);
+    TEST_PCHECK(WIFEXITED(wstatus));
+    TEST_PCHECK(!WEXITSTATUS(wstatus));
+  });
+  ASSERT_NO_ERRNO(ret);
 }
 
 // Verify that we don't hang when creating a new session from an orphaned
diff --git a/test/syscalls/linux/pty_root.cc b/test/syscalls/linux/pty_root.cc
index 1d7dbefdb9..a534cf0bba 100644
--- a/test/syscalls/linux/pty_root.cc
+++ b/test/syscalls/linux/pty_root.cc
@@ -48,12 +48,12 @@ TEST(JobControlRootTest, StealTTY) {
     ASSERT_THAT(setsid(), SyscallSucceeds());
   }
 
-  FileDescriptor master =
+  FileDescriptor main =
       ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/ptmx", O_RDWR | O_NONBLOCK));
-  FileDescriptor slave = ASSERT_NO_ERRNO_AND_VALUE(OpenSlave(master));
+  FileDescriptor replica = ASSERT_NO_ERRNO_AND_VALUE(OpenReplica(main));
 
-  // Make slave the controlling terminal.
-  ASSERT_THAT(ioctl(slave.get(), TIOCSCTTY, 0), SyscallSucceeds());
+  // Make replica the controlling terminal.
+  ASSERT_THAT(ioctl(replica.get(), TIOCSCTTY, 0), SyscallSucceeds());
 
   // Fork, join a new session, and try to steal the parent's controlling
   // terminal, which should succeed when we have CAP_SYS_ADMIN and pass an arg
@@ -62,9 +62,9 @@ TEST(JobControlRootTest, StealTTY) {
   if (!child) {
     ASSERT_THAT(setsid(), SyscallSucceeds());
     // We shouldn't be able to steal the terminal with the wrong arg value.
-    TEST_PCHECK(ioctl(slave.get(), TIOCSCTTY, 0));
+    TEST_PCHECK(ioctl(replica.get(), TIOCSCTTY, 0));
     // We should be able to steal it if we are true root.
-    TEST_PCHECK(true_root == !ioctl(slave.get(), TIOCSCTTY, 1));
+    TEST_PCHECK(true_root == !ioctl(replica.get(), TIOCSCTTY, 1));
     _exit(0);
   }
 
diff --git a/test/util/pty_util.cc b/test/util/pty_util.cc
index c01f916aa3..5fa6229220 100644
--- a/test/util/pty_util.cc
+++ b/test/util/pty_util.cc
@@ -23,25 +23,25 @@
 namespace gvisor {
 namespace testing {
 
-PosixErrorOr<FileDescriptor> OpenSlave(const FileDescriptor& master) {
-  PosixErrorOr<int> n = SlaveID(master);
+PosixErrorOr<FileDescriptor> OpenReplica(const FileDescriptor& main) {
+  PosixErrorOr<int> n = ReplicaID(main);
   if (!n.ok()) {
     return PosixErrorOr<FileDescriptor>(n.error());
   }
   return Open(absl::StrCat("/dev/pts/", n.ValueOrDie()), O_RDWR | O_NONBLOCK);
 }
 
-PosixErrorOr<int> SlaveID(const FileDescriptor& master) {
+PosixErrorOr<int> ReplicaID(const FileDescriptor& main) {
   // Get pty index.
   int n;
-  int ret = ioctl(master.get(), TIOCGPTN, &n);
+  int ret = ioctl(main.get(), TIOCGPTN, &n);
   if (ret < 0) {
     return PosixError(errno, "ioctl(TIOCGPTN) failed");
   }
 
   // Unlock pts.
   int unlock = 0;
-  ret = ioctl(master.get(), TIOCSPTLCK, &unlock);
+  ret = ioctl(main.get(), TIOCSPTLCK, &unlock);
   if (ret < 0) {
     return PosixError(errno, "ioctl(TIOSPTLCK) failed");
   }
diff --git a/test/util/pty_util.h b/test/util/pty_util.h
index 0722da3799..dff6adab53 100644
--- a/test/util/pty_util.h
+++ b/test/util/pty_util.h
@@ -21,11 +21,11 @@
 namespace gvisor {
 namespace testing {
 
-// Opens the slave end of the passed master as R/W and nonblocking.
-PosixErrorOr<FileDescriptor> OpenSlave(const FileDescriptor& master);
+// Opens the replica end of the passed main as R/W and nonblocking.
+PosixErrorOr<FileDescriptor> OpenReplica(const FileDescriptor& main);
 
-// Get the number of the slave end of the master.
-PosixErrorOr<int> SlaveID(const FileDescriptor& master);
+// Get the number of the replica end of the main.
+PosixErrorOr<int> ReplicaID(const FileDescriptor& main);
 
 }  // namespace testing
 }  // namespace gvisor

From 97d6398d435025c7ab361c36994feab2c7e2d84f Mon Sep 17 00:00:00 2001
From: Kevin Krakauer <krakauer@google.com>
Date: Thu, 27 Aug 2020 10:51:59 -0700
Subject: [PATCH 096/211] ip6tables: (de)serialize ip6tables structs

More implementation+testing to follow.

#3549.

PiperOrigin-RevId: 328770160
---
 pkg/abi/linux/netfilter_ipv6.go          |  13 ++
 pkg/sentry/socket/netfilter/BUILD        |   1 +
 pkg/sentry/socket/netfilter/ipv4.go      |  33 ++-
 pkg/sentry/socket/netfilter/ipv6.go      | 265 +++++++++++++++++++++++
 pkg/sentry/socket/netfilter/netfilter.go |  77 ++++---
 pkg/sentry/socket/netfilter/targets.go   |  10 +-
 pkg/sentry/socket/netstack/netstack.go   |  75 ++++++-
 pkg/tcpip/stack/iptables.go              |  12 +-
 pkg/tcpip/stack/iptables_types.go        |   5 +
 test/syscalls/linux/ip6tables.cc         |  48 ++++
 10 files changed, 489 insertions(+), 50 deletions(-)
 create mode 100644 pkg/sentry/socket/netfilter/ipv6.go

diff --git a/pkg/abi/linux/netfilter_ipv6.go b/pkg/abi/linux/netfilter_ipv6.go
index 9bb9efb106..f6117024c2 100644
--- a/pkg/abi/linux/netfilter_ipv6.go
+++ b/pkg/abi/linux/netfilter_ipv6.go
@@ -290,6 +290,19 @@ type IP6TIP struct {
 
 const SizeOfIP6TIP = 136
 
+// Flags in IP6TIP.Flags. Corresponding constants are in
+// include/uapi/linux/netfilter_ipv6/ip6_tables.h.
+const (
+	// Whether to check the Protocol field.
+	IP6T_F_PROTO = 0x01
+	// Whether to match the TOS field.
+	IP6T_F_TOS = 0x02
+	// Indicates that the jump target is an aboslute GOTO, not an offset.
+	IP6T_F_GOTO = 0x04
+	// Enables all flags.
+	IP6T_F_MASK = 0x07
+)
+
 // Flags in IP6TIP.InverseFlags. Corresponding constants are in
 // include/uapi/linux/netfilter_ipv6/ip6_tables.h.
 const (
diff --git a/pkg/sentry/socket/netfilter/BUILD b/pkg/sentry/socket/netfilter/BUILD
index 795620589d..8aea0200fd 100644
--- a/pkg/sentry/socket/netfilter/BUILD
+++ b/pkg/sentry/socket/netfilter/BUILD
@@ -7,6 +7,7 @@ go_library(
     srcs = [
         "extensions.go",
         "ipv4.go",
+        "ipv6.go",
         "netfilter.go",
         "owner_matcher.go",
         "targets.go",
diff --git a/pkg/sentry/socket/netfilter/ipv4.go b/pkg/sentry/socket/netfilter/ipv4.go
index 4fb887e494..e4c55a1000 100644
--- a/pkg/sentry/socket/netfilter/ipv4.go
+++ b/pkg/sentry/socket/netfilter/ipv4.go
@@ -36,14 +36,37 @@ var emptyIPv4Filter = stack.IPHeaderFilter{
 	SrcMask: "\x00\x00\x00\x00",
 }
 
-func getEntries4(table stack.Table, info *linux.IPTGetinfo) linux.KernelIPTGetEntries {
+// convertNetstackToBinary4 converts the iptables as stored in netstack to the
+// format expected by the iptables tool. Linux stores each table as a binary
+// blob that can only be traversed by parsing a little data, reading some
+// offsets, jumping to those offsets, parsing again, etc.
+func convertNetstackToBinary4(stk *stack.Stack, tablename linux.TableName) (linux.KernelIPTGetEntries, linux.IPTGetinfo, error) {
+	// The table name has to fit in the struct.
+	if linux.XT_TABLE_MAXNAMELEN < len(tablename) {
+		return linux.KernelIPTGetEntries{}, linux.IPTGetinfo{}, fmt.Errorf("table name %q too long", tablename)
+	}
+
+	table, ok := stk.IPTables().GetTable(tablename.String(), false)
+	if !ok {
+		return linux.KernelIPTGetEntries{}, linux.IPTGetinfo{}, fmt.Errorf("couldn't find table %q", tablename)
+	}
+
+	// Setup the info struct.
+	entries, info := getEntries4(table, tablename)
+	return entries, info, nil
+}
+
+func getEntries4(table stack.Table, tablename linux.TableName) (linux.KernelIPTGetEntries, linux.IPTGetinfo) {
+	var info linux.IPTGetinfo
 	var entries linux.KernelIPTGetEntries
+	copy(info.Name[:], tablename[:])
 	copy(entries.Name[:], info.Name[:])
+	info.ValidHooks = table.ValidHooks()
 
 	for ruleIdx, rule := range table.Rules {
 		nflog("convert to binary: current offset: %d", entries.Size)
 
-		setHooksAndUnderflow(info, table, entries.Size, ruleIdx)
+		setHooksAndUnderflow(&info, table, entries.Size, ruleIdx)
 		// Each rule corresponds to an entry.
 		entry := linux.KernelIPTEntry{
 			Entry: linux.IPTEntry{
@@ -100,7 +123,7 @@ func getEntries4(table stack.Table, info *linux.IPTGetinfo) linux.KernelIPTGetEn
 
 	info.Size = entries.Size
 	nflog("convert to binary: finished with an marshalled size of %d", info.Size)
-	return entries
+	return entries, info
 }
 
 func modifyEntries4(stk *stack.Stack, optVal []byte, replace *linux.IPTReplace, table *stack.Table) (map[uint32]int, *syserr.Error) {
@@ -205,7 +228,9 @@ func filterFromIPTIP(iptip linux.IPTIP) (stack.IPHeaderFilter, error) {
 	ifnameMask := string(iptip.OutputInterfaceMask[:n])
 
 	return stack.IPHeaderFilter{
-		Protocol:              tcpip.TransportProtocolNumber(iptip.Protocol),
+		Protocol: tcpip.TransportProtocolNumber(iptip.Protocol),
+		// A Protocol value of 0 indicates all protocols match.
+		CheckProtocol:         iptip.Protocol != 0,
 		Dst:                   tcpip.Address(iptip.Dst[:]),
 		DstMask:               tcpip.Address(iptip.DstMask[:]),
 		DstInvert:             iptip.InverseFlags&linux.IPT_INV_DSTIP != 0,
diff --git a/pkg/sentry/socket/netfilter/ipv6.go b/pkg/sentry/socket/netfilter/ipv6.go
new file mode 100644
index 0000000000..3b2c1becde
--- /dev/null
+++ b/pkg/sentry/socket/netfilter/ipv6.go
@@ -0,0 +1,265 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package netfilter
+
+import (
+	"bytes"
+	"fmt"
+
+	"gvisor.dev/gvisor/pkg/abi/linux"
+	"gvisor.dev/gvisor/pkg/binary"
+	"gvisor.dev/gvisor/pkg/syserr"
+	"gvisor.dev/gvisor/pkg/tcpip"
+	"gvisor.dev/gvisor/pkg/tcpip/header"
+	"gvisor.dev/gvisor/pkg/tcpip/stack"
+	"gvisor.dev/gvisor/pkg/usermem"
+)
+
+// emptyIPv6Filter is for comparison with a rule's filters to determine whether
+// it is also empty. It is immutable.
+var emptyIPv6Filter = stack.IPHeaderFilter{
+	Dst:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00",
+	DstMask: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00",
+	Src:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00",
+	SrcMask: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00",
+}
+
+// convertNetstackToBinary6 converts the ip6tables as stored in netstack to the
+// format expected by the iptables tool. Linux stores each table as a binary
+// blob that can only be traversed by parsing a little data, reading some
+// offsets, jumping to those offsets, parsing again, etc.
+func convertNetstackToBinary6(stk *stack.Stack, tablename linux.TableName) (linux.KernelIP6TGetEntries, linux.IPTGetinfo, error) {
+	// The table name has to fit in the struct.
+	if linux.XT_TABLE_MAXNAMELEN < len(tablename) {
+		return linux.KernelIP6TGetEntries{}, linux.IPTGetinfo{}, fmt.Errorf("table name %q too long", tablename)
+	}
+
+	table, ok := stk.IPTables().GetTable(tablename.String(), true)
+	if !ok {
+		return linux.KernelIP6TGetEntries{}, linux.IPTGetinfo{}, fmt.Errorf("couldn't find table %q", tablename)
+	}
+
+	// Setup the info struct, which is the same in IPv4 and IPv6.
+	entries, info := getEntries6(table, tablename)
+	return entries, info, nil
+}
+
+func getEntries6(table stack.Table, tablename linux.TableName) (linux.KernelIP6TGetEntries, linux.IPTGetinfo) {
+	var info linux.IPTGetinfo
+	var entries linux.KernelIP6TGetEntries
+	copy(info.Name[:], tablename[:])
+	copy(entries.Name[:], info.Name[:])
+	info.ValidHooks = table.ValidHooks()
+
+	for ruleIdx, rule := range table.Rules {
+		nflog("convert to binary: current offset: %d", entries.Size)
+
+		setHooksAndUnderflow(&info, table, entries.Size, ruleIdx)
+		// Each rule corresponds to an entry.
+		entry := linux.KernelIP6TEntry{
+			Entry: linux.IP6TEntry{
+				IPv6: linux.IP6TIP{
+					Protocol: uint16(rule.Filter.Protocol),
+				},
+				NextOffset:   linux.SizeOfIP6TEntry,
+				TargetOffset: linux.SizeOfIP6TEntry,
+			},
+		}
+		copy(entry.Entry.IPv6.Dst[:], rule.Filter.Dst)
+		copy(entry.Entry.IPv6.DstMask[:], rule.Filter.DstMask)
+		copy(entry.Entry.IPv6.Src[:], rule.Filter.Src)
+		copy(entry.Entry.IPv6.SrcMask[:], rule.Filter.SrcMask)
+		copy(entry.Entry.IPv6.OutputInterface[:], rule.Filter.OutputInterface)
+		copy(entry.Entry.IPv6.OutputInterfaceMask[:], rule.Filter.OutputInterfaceMask)
+		if rule.Filter.DstInvert {
+			entry.Entry.IPv6.InverseFlags |= linux.IP6T_INV_DSTIP
+		}
+		if rule.Filter.SrcInvert {
+			entry.Entry.IPv6.InverseFlags |= linux.IP6T_INV_SRCIP
+		}
+		if rule.Filter.OutputInterfaceInvert {
+			entry.Entry.IPv6.InverseFlags |= linux.IP6T_INV_VIA_OUT
+		}
+		if rule.Filter.CheckProtocol {
+			entry.Entry.IPv6.Flags |= linux.IP6T_F_PROTO
+		}
+
+		for _, matcher := range rule.Matchers {
+			// Serialize the matcher and add it to the
+			// entry.
+			serialized := marshalMatcher(matcher)
+			nflog("convert to binary: matcher serialized as: %v", serialized)
+			if len(serialized)%8 != 0 {
+				panic(fmt.Sprintf("matcher %T is not 64-bit aligned", matcher))
+			}
+			entry.Elems = append(entry.Elems, serialized...)
+			entry.Entry.NextOffset += uint16(len(serialized))
+			entry.Entry.TargetOffset += uint16(len(serialized))
+		}
+
+		// Serialize and append the target.
+		serialized := marshalTarget(rule.Target)
+		if len(serialized)%8 != 0 {
+			panic(fmt.Sprintf("target %T is not 64-bit aligned", rule.Target))
+		}
+		entry.Elems = append(entry.Elems, serialized...)
+		entry.Entry.NextOffset += uint16(len(serialized))
+
+		nflog("convert to binary: adding entry: %+v", entry)
+
+		entries.Size += uint32(entry.Entry.NextOffset)
+		entries.Entrytable = append(entries.Entrytable, entry)
+		info.NumEntries++
+	}
+
+	info.Size = entries.Size
+	nflog("convert to binary: finished with an marshalled size of %d", info.Size)
+	return entries, info
+}
+
+func modifyEntries6(stk *stack.Stack, optVal []byte, replace *linux.IPTReplace, table *stack.Table) (map[uint32]int, *syserr.Error) {
+	nflog("set entries: setting entries in table %q", replace.Name.String())
+
+	// Convert input into a list of rules and their offsets.
+	var offset uint32
+	// offsets maps rule byte offsets to their position in table.Rules.
+	offsets := map[uint32]int{}
+	for entryIdx := uint32(0); entryIdx < replace.NumEntries; entryIdx++ {
+		nflog("set entries: processing entry at offset %d", offset)
+
+		// Get the struct ipt_entry.
+		if len(optVal) < linux.SizeOfIP6TEntry {
+			nflog("optVal has insufficient size for entry %d", len(optVal))
+			return nil, syserr.ErrInvalidArgument
+		}
+		var entry linux.IP6TEntry
+		buf := optVal[:linux.SizeOfIP6TEntry]
+		binary.Unmarshal(buf, usermem.ByteOrder, &entry)
+		initialOptValLen := len(optVal)
+		optVal = optVal[linux.SizeOfIP6TEntry:]
+
+		if entry.TargetOffset < linux.SizeOfIP6TEntry {
+			nflog("entry has too-small target offset %d", entry.TargetOffset)
+			return nil, syserr.ErrInvalidArgument
+		}
+
+		// TODO(gvisor.dev/issue/170): We should support more IPTIP
+		// filtering fields.
+		filter, err := filterFromIP6TIP(entry.IPv6)
+		if err != nil {
+			nflog("bad iptip: %v", err)
+			return nil, syserr.ErrInvalidArgument
+		}
+
+		// TODO(gvisor.dev/issue/170): Matchers and targets can specify
+		// that they only work for certain protocols, hooks, tables.
+		// Get matchers.
+		matchersSize := entry.TargetOffset - linux.SizeOfIP6TEntry
+		if len(optVal) < int(matchersSize) {
+			nflog("entry doesn't have enough room for its matchers (only %d bytes remain)", len(optVal))
+			return nil, syserr.ErrInvalidArgument
+		}
+		matchers, err := parseMatchers(filter, optVal[:matchersSize])
+		if err != nil {
+			nflog("failed to parse matchers: %v", err)
+			return nil, syserr.ErrInvalidArgument
+		}
+		optVal = optVal[matchersSize:]
+
+		// Get the target of the rule.
+		targetSize := entry.NextOffset - entry.TargetOffset
+		if len(optVal) < int(targetSize) {
+			nflog("entry doesn't have enough room for its target (only %d bytes remain)", len(optVal))
+			return nil, syserr.ErrInvalidArgument
+		}
+		target, err := parseTarget(filter, optVal[:targetSize])
+		if err != nil {
+			nflog("failed to parse target: %v", err)
+			return nil, syserr.ErrInvalidArgument
+		}
+		optVal = optVal[targetSize:]
+
+		table.Rules = append(table.Rules, stack.Rule{
+			Filter:   filter,
+			Target:   target,
+			Matchers: matchers,
+		})
+		offsets[offset] = int(entryIdx)
+		offset += uint32(entry.NextOffset)
+
+		if initialOptValLen-len(optVal) != int(entry.NextOffset) {
+			nflog("entry NextOffset is %d, but entry took up %d bytes", entry.NextOffset, initialOptValLen-len(optVal))
+			return nil, syserr.ErrInvalidArgument
+		}
+	}
+	return offsets, nil
+}
+
+func filterFromIP6TIP(iptip linux.IP6TIP) (stack.IPHeaderFilter, error) {
+	if containsUnsupportedFields6(iptip) {
+		return stack.IPHeaderFilter{}, fmt.Errorf("unsupported fields in struct iptip: %+v", iptip)
+	}
+	if len(iptip.Dst) != header.IPv6AddressSize || len(iptip.DstMask) != header.IPv6AddressSize {
+		return stack.IPHeaderFilter{}, fmt.Errorf("incorrect length of destination (%d) and/or destination mask (%d) fields", len(iptip.Dst), len(iptip.DstMask))
+	}
+	if len(iptip.Src) != header.IPv6AddressSize || len(iptip.SrcMask) != header.IPv6AddressSize {
+		return stack.IPHeaderFilter{}, fmt.Errorf("incorrect length of source (%d) and/or source mask (%d) fields", len(iptip.Src), len(iptip.SrcMask))
+	}
+
+	n := bytes.IndexByte([]byte(iptip.OutputInterface[:]), 0)
+	if n == -1 {
+		n = len(iptip.OutputInterface)
+	}
+	ifname := string(iptip.OutputInterface[:n])
+
+	n = bytes.IndexByte([]byte(iptip.OutputInterfaceMask[:]), 0)
+	if n == -1 {
+		n = len(iptip.OutputInterfaceMask)
+	}
+	ifnameMask := string(iptip.OutputInterfaceMask[:n])
+
+	return stack.IPHeaderFilter{
+		Protocol: tcpip.TransportProtocolNumber(iptip.Protocol),
+		// In ip6tables a flag controls whether to check the protocol.
+		CheckProtocol:         iptip.Flags&linux.IP6T_F_PROTO != 0,
+		Dst:                   tcpip.Address(iptip.Dst[:]),
+		DstMask:               tcpip.Address(iptip.DstMask[:]),
+		DstInvert:             iptip.InverseFlags&linux.IP6T_INV_DSTIP != 0,
+		Src:                   tcpip.Address(iptip.Src[:]),
+		SrcMask:               tcpip.Address(iptip.SrcMask[:]),
+		SrcInvert:             iptip.InverseFlags&linux.IP6T_INV_SRCIP != 0,
+		OutputInterface:       ifname,
+		OutputInterfaceMask:   ifnameMask,
+		OutputInterfaceInvert: iptip.InverseFlags&linux.IP6T_INV_VIA_OUT != 0,
+	}, nil
+}
+
+func containsUnsupportedFields6(iptip linux.IP6TIP) bool {
+	// The following features are supported:
+	// - Protocol
+	// - Dst and DstMask
+	// - Src and SrcMask
+	// - The inverse destination IP check flag
+	// - OutputInterface, OutputInterfaceMask and its inverse.
+	var emptyInterface = [linux.IFNAMSIZ]byte{}
+	flagMask := uint8(linux.IP6T_F_PROTO)
+	// Disable any supported inverse flags.
+	inverseMask := uint8(linux.IP6T_INV_DSTIP) | uint8(linux.IP6T_INV_SRCIP) | uint8(linux.IP6T_INV_VIA_OUT)
+	return iptip.InputInterface != emptyInterface ||
+		iptip.InputInterfaceMask != emptyInterface ||
+		iptip.Flags&^flagMask != 0 ||
+		iptip.InverseFlags&^inverseMask != 0 ||
+		iptip.TOS != 0
+}
diff --git a/pkg/sentry/socket/netfilter/netfilter.go b/pkg/sentry/socket/netfilter/netfilter.go
index df256676fe..3e17350799 100644
--- a/pkg/sentry/socket/netfilter/netfilter.go
+++ b/pkg/sentry/socket/netfilter/netfilter.go
@@ -42,14 +42,19 @@ func nflog(format string, args ...interface{}) {
 }
 
 // GetInfo returns information about iptables.
-func GetInfo(t *kernel.Task, stack *stack.Stack, outPtr usermem.Addr) (linux.IPTGetinfo, *syserr.Error) {
+func GetInfo(t *kernel.Task, stack *stack.Stack, outPtr usermem.Addr, ipv6 bool) (linux.IPTGetinfo, *syserr.Error) {
 	// Read in the struct and table name.
 	var info linux.IPTGetinfo
 	if _, err := info.CopyIn(t, outPtr); err != nil {
 		return linux.IPTGetinfo{}, syserr.FromError(err)
 	}
 
-	_, info, err := convertNetstackToBinary(stack, info.Name)
+	var err error
+	if ipv6 {
+		_, info, err = convertNetstackToBinary6(stack, info.Name)
+	} else {
+		_, info, err = convertNetstackToBinary4(stack, info.Name)
+	}
 	if err != nil {
 		nflog("couldn't convert iptables: %v", err)
 		return linux.IPTGetinfo{}, syserr.ErrInvalidArgument
@@ -59,9 +64,9 @@ func GetInfo(t *kernel.Task, stack *stack.Stack, outPtr usermem.Addr) (linux.IPT
 	return info, nil
 }
 
-// GetEntries4 returns netstack's iptables rules encoded for the iptables tool.
+// GetEntries4 returns netstack's iptables rules.
 func GetEntries4(t *kernel.Task, stack *stack.Stack, outPtr usermem.Addr, outLen int) (linux.KernelIPTGetEntries, *syserr.Error) {
-	// Read in the ABI struct.
+	// Read in the struct and table name.
 	var userEntries linux.IPTGetEntries
 	if _, err := userEntries.CopyIn(t, outPtr); err != nil {
 		nflog("couldn't copy in entries %q", userEntries.Name)
@@ -70,7 +75,7 @@ func GetEntries4(t *kernel.Task, stack *stack.Stack, outPtr usermem.Addr, outLen
 
 	// Convert netstack's iptables rules to something that the iptables
 	// tool can understand.
-	entries, _, err := convertNetstackToBinary(stack, userEntries.Name)
+	entries, _, err := convertNetstackToBinary4(stack, userEntries.Name)
 	if err != nil {
 		nflog("couldn't read entries: %v", err)
 		return linux.KernelIPTGetEntries{}, syserr.ErrInvalidArgument
@@ -83,28 +88,29 @@ func GetEntries4(t *kernel.Task, stack *stack.Stack, outPtr usermem.Addr, outLen
 	return entries, nil
 }
 
-// convertNetstackToBinary converts the iptables as stored in netstack to the
-// format expected by the iptables tool. Linux stores each table as a binary
-// blob that can only be traversed by parsing a bit, reading some offsets,
-// jumping to those offsets, parsing again, etc.
-func convertNetstackToBinary(stk *stack.Stack, tablename linux.TableName) (linux.KernelIPTGetEntries, linux.IPTGetinfo, error) {
-	// The table name has to fit in the struct.
-	if linux.XT_TABLE_MAXNAMELEN < len(tablename) {
-		return linux.KernelIPTGetEntries{}, linux.IPTGetinfo{}, fmt.Errorf("table name %q too long", tablename)
+// GetEntries6 returns netstack's ip6tables rules.
+func GetEntries6(t *kernel.Task, stack *stack.Stack, outPtr usermem.Addr, outLen int) (linux.KernelIP6TGetEntries, *syserr.Error) {
+	// Read in the struct and table name. IPv4 and IPv6 utilize structs
+	// with the same layout.
+	var userEntries linux.IPTGetEntries
+	if _, err := userEntries.CopyIn(t, outPtr); err != nil {
+		nflog("couldn't copy in entries %q", userEntries.Name)
+		return linux.KernelIP6TGetEntries{}, syserr.FromError(err)
 	}
 
-	table, ok := stk.IPTables().GetTable(tablename.String())
-	if !ok {
-		return linux.KernelIPTGetEntries{}, linux.IPTGetinfo{}, fmt.Errorf("couldn't find table %q", tablename)
+	// Convert netstack's iptables rules to something that the iptables
+	// tool can understand.
+	entries, _, err := convertNetstackToBinary6(stack, userEntries.Name)
+	if err != nil {
+		nflog("couldn't read entries: %v", err)
+		return linux.KernelIP6TGetEntries{}, syserr.ErrInvalidArgument
+	}
+	if binary.Size(entries) > uintptr(outLen) {
+		nflog("insufficient GetEntries output size: %d", uintptr(outLen))
+		return linux.KernelIP6TGetEntries{}, syserr.ErrInvalidArgument
 	}
 
-	// Setup the info struct.
-	var info linux.IPTGetinfo
-	info.ValidHooks = table.ValidHooks()
-	copy(info.Name[:], tablename[:])
-
-	entries := getEntries4(table, &info)
-	return entries, info, nil
+	return entries, nil
 }
 
 // setHooksAndUnderflow checks whether the rule at ruleIdx is a hook entrypoint
@@ -128,7 +134,7 @@ func setHooksAndUnderflow(info *linux.IPTGetinfo, table stack.Table, offset uint
 
 // SetEntries sets iptables rules for a single table. See
 // net/ipv4/netfilter/ip_tables.c:translate_table for reference.
-func SetEntries(stk *stack.Stack, optVal []byte) *syserr.Error {
+func SetEntries(stk *stack.Stack, optVal []byte, ipv6 bool) *syserr.Error {
 	var replace linux.IPTReplace
 	replaceBuf := optVal[:linux.SizeOfIPTReplace]
 	optVal = optVal[linux.SizeOfIPTReplace:]
@@ -146,7 +152,13 @@ func SetEntries(stk *stack.Stack, optVal []byte) *syserr.Error {
 		return syserr.ErrInvalidArgument
 	}
 
-	offsets, err := modifyEntries4(stk, optVal, &replace, &table)
+	var err *syserr.Error
+	var offsets map[uint32]int
+	if ipv6 {
+		offsets, err = modifyEntries6(stk, optVal, &replace, &table)
+	} else {
+		offsets, err = modifyEntries4(stk, optVal, &replace, &table)
+	}
 	if err != nil {
 		return err
 	}
@@ -163,7 +175,7 @@ func SetEntries(stk *stack.Stack, optVal []byte) *syserr.Error {
 					table.BuiltinChains[hk] = ruleIdx
 				}
 				if offset == replace.Underflow[hook] {
-					if !validUnderflow(table.Rules[ruleIdx]) {
+					if !validUnderflow(table.Rules[ruleIdx], ipv6) {
 						nflog("underflow for hook %d isn't an unconditional ACCEPT or DROP", ruleIdx)
 						return syserr.ErrInvalidArgument
 					}
@@ -228,7 +240,7 @@ func SetEntries(stk *stack.Stack, optVal []byte) *syserr.Error {
 			if ruleIdx == stack.HookUnset {
 				continue
 			}
-			if !isUnconditionalAccept(table.Rules[ruleIdx]) {
+			if !isUnconditionalAccept(table.Rules[ruleIdx], ipv6) {
 				nflog("hook %d is unsupported.", hook)
 				return syserr.ErrInvalidArgument
 			}
@@ -240,7 +252,8 @@ func SetEntries(stk *stack.Stack, optVal []byte) *syserr.Error {
 	// - There are no chains without an unconditional final rule.
 	// - There are no chains without an unconditional underflow rule.
 
-	return syserr.TranslateNetstackError(stk.IPTables().ReplaceTable(replace.Name.String(), table))
+	return syserr.TranslateNetstackError(stk.IPTables().ReplaceTable(replace.Name.String(), table, ipv6))
+
 }
 
 // parseMatchers parses 0 or more matchers from optVal. optVal should contain
@@ -286,11 +299,11 @@ func parseMatchers(filter stack.IPHeaderFilter, optVal []byte) ([]stack.Matcher,
 	return matchers, nil
 }
 
-func validUnderflow(rule stack.Rule) bool {
+func validUnderflow(rule stack.Rule, ipv6 bool) bool {
 	if len(rule.Matchers) != 0 {
 		return false
 	}
-	if rule.Filter != emptyIPv4Filter {
+	if (ipv6 && rule.Filter != emptyIPv6Filter) || (!ipv6 && rule.Filter != emptyIPv4Filter) {
 		return false
 	}
 	switch rule.Target.(type) {
@@ -301,8 +314,8 @@ func validUnderflow(rule stack.Rule) bool {
 	}
 }
 
-func isUnconditionalAccept(rule stack.Rule) bool {
-	if !validUnderflow(rule) {
+func isUnconditionalAccept(rule stack.Rule, ipv6 bool) bool {
+	if !validUnderflow(rule, ipv6) {
 		return false
 	}
 	_, ok := rule.Target.(stack.AcceptTarget)
diff --git a/pkg/sentry/socket/netfilter/targets.go b/pkg/sentry/socket/netfilter/targets.go
index 8ebdaff181..87e41abd86 100644
--- a/pkg/sentry/socket/netfilter/targets.go
+++ b/pkg/sentry/socket/netfilter/targets.go
@@ -218,8 +218,8 @@ func parseTarget(filter stack.IPHeaderFilter, optVal []byte) (stack.Target, erro
 			return nil, fmt.Errorf("netfilter.SetEntries: optVal has insufficient size for redirect target %d", len(optVal))
 		}
 
-		if filter.Protocol != header.TCPProtocolNumber && filter.Protocol != header.UDPProtocolNumber {
-			return nil, fmt.Errorf("netfilter.SetEntries: invalid argument")
+		if p := filter.Protocol; p != header.TCPProtocolNumber && p != header.UDPProtocolNumber {
+			return nil, fmt.Errorf("netfilter.SetEntries: bad proto %d", p)
 		}
 
 		var redirectTarget linux.XTRedirectTarget
@@ -232,7 +232,7 @@ func parseTarget(filter stack.IPHeaderFilter, optVal []byte) (stack.Target, erro
 
 		// RangeSize should be 1.
 		if nfRange.RangeSize != 1 {
-			return nil, fmt.Errorf("netfilter.SetEntries: invalid argument")
+			return nil, fmt.Errorf("netfilter.SetEntries: bad rangesize %d", nfRange.RangeSize)
 		}
 
 		// TODO(gvisor.dev/issue/170): Check if the flags are valid.
@@ -240,7 +240,7 @@ func parseTarget(filter stack.IPHeaderFilter, optVal []byte) (stack.Target, erro
 		// For now, redirect target only supports destination port change.
 		// Port range and IP range are not supported yet.
 		if nfRange.RangeIPV4.Flags&linux.NF_NAT_RANGE_PROTO_SPECIFIED == 0 {
-			return nil, fmt.Errorf("netfilter.SetEntries: invalid argument")
+			return nil, fmt.Errorf("netfilter.SetEntries: invalid range flags %d", nfRange.RangeIPV4.Flags)
 		}
 		target.RangeProtoSpecified = true
 
@@ -249,7 +249,7 @@ func parseTarget(filter stack.IPHeaderFilter, optVal []byte) (stack.Target, erro
 
 		// TODO(gvisor.dev/issue/170): Port range is not supported yet.
 		if nfRange.RangeIPV4.MinPort != nfRange.RangeIPV4.MaxPort {
-			return nil, fmt.Errorf("netfilter.SetEntries: invalid argument")
+			return nil, fmt.Errorf("netfilter.SetEntries: minport != maxport (%d, %d)", nfRange.RangeIPV4.MinPort, nfRange.RangeIPV4.MaxPort)
 		}
 
 		// Convert port from big endian to little endian.
diff --git a/pkg/sentry/socket/netstack/netstack.go b/pkg/sentry/socket/netstack/netstack.go
index 9e2ebc7d4f..2af2d82529 100644
--- a/pkg/sentry/socket/netstack/netstack.go
+++ b/pkg/sentry/socket/netstack/netstack.go
@@ -997,7 +997,7 @@ func GetSockOpt(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, family in
 		return getSockOptTCP(t, ep, name, outLen)
 
 	case linux.SOL_IPV6:
-		return getSockOptIPv6(t, ep, name, outLen)
+		return getSockOptIPv6(t, s, ep, name, outPtr, outLen)
 
 	case linux.SOL_IP:
 		return getSockOptIP(t, s, ep, name, outPtr, outLen, family)
@@ -1455,7 +1455,7 @@ func getSockOptTCP(t *kernel.Task, ep commonEndpoint, name, outLen int) (marshal
 }
 
 // getSockOptIPv6 implements GetSockOpt when level is SOL_IPV6.
-func getSockOptIPv6(t *kernel.Task, ep commonEndpoint, name, outLen int) (marshal.Marshallable, *syserr.Error) {
+func getSockOptIPv6(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, name int, outPtr usermem.Addr, outLen int) (marshal.Marshallable, *syserr.Error) {
 	switch name {
 	case linux.IPV6_V6ONLY:
 		if outLen < sizeOfInt32 {
@@ -1508,10 +1508,50 @@ func getSockOptIPv6(t *kernel.Task, ep commonEndpoint, name, outLen int) (marsha
 		vP := primitive.Int32(boolToInt32(v))
 		return &vP, nil
 
-	case linux.SO_ORIGINAL_DST:
+	case linux.IP6T_ORIGINAL_DST:
 		// TODO(gvisor.dev/issue/170): ip6tables.
 		return nil, syserr.ErrInvalidArgument
 
+	case linux.IP6T_SO_GET_INFO:
+		if outLen < linux.SizeOfIPTGetinfo {
+			return nil, syserr.ErrInvalidArgument
+		}
+
+		// Only valid for raw IPv6 sockets.
+		if family, skType, _ := s.Type(); family != linux.AF_INET6 || skType != linux.SOCK_RAW {
+			return nil, syserr.ErrProtocolNotAvailable
+		}
+
+		stack := inet.StackFromContext(t)
+		if stack == nil {
+			return nil, syserr.ErrNoDevice
+		}
+		info, err := netfilter.GetInfo(t, stack.(*Stack).Stack, outPtr, true)
+		if err != nil {
+			return nil, err
+		}
+		return &info, nil
+
+	case linux.IP6T_SO_GET_ENTRIES:
+		// IPTGetEntries is reused for IPv6.
+		if outLen < linux.SizeOfIPTGetEntries {
+			return nil, syserr.ErrInvalidArgument
+		}
+		// Only valid for raw IPv6 sockets.
+		if family, skType, _ := s.Type(); family != linux.AF_INET6 || skType != linux.SOCK_RAW {
+			return nil, syserr.ErrProtocolNotAvailable
+		}
+
+		stack := inet.StackFromContext(t)
+		if stack == nil {
+			return nil, syserr.ErrNoDevice
+		}
+		entries, err := netfilter.GetEntries6(t, stack.(*Stack).Stack, outPtr, outLen)
+		if err != nil {
+			return nil, err
+		}
+		return &entries, nil
+
 	default:
 		emitUnimplementedEventIPv6(t, name)
 	}
@@ -1649,7 +1689,7 @@ func getSockOptIP(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, name in
 		if stack == nil {
 			return nil, syserr.ErrNoDevice
 		}
-		info, err := netfilter.GetInfo(t, stack.(*Stack).Stack, outPtr)
+		info, err := netfilter.GetInfo(t, stack.(*Stack).Stack, outPtr, false)
 		if err != nil {
 			return nil, err
 		}
@@ -1722,7 +1762,7 @@ func SetSockOpt(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, level int
 		return setSockOptTCP(t, ep, name, optVal)
 
 	case linux.SOL_IPV6:
-		return setSockOptIPv6(t, ep, name, optVal)
+		return setSockOptIPv6(t, s, ep, name, optVal)
 
 	case linux.SOL_IP:
 		return setSockOptIP(t, s, ep, name, optVal)
@@ -2027,7 +2067,7 @@ func setSockOptTCP(t *kernel.Task, ep commonEndpoint, name int, optVal []byte) *
 }
 
 // setSockOptIPv6 implements SetSockOpt when level is SOL_IPV6.
-func setSockOptIPv6(t *kernel.Task, ep commonEndpoint, name int, optVal []byte) *syserr.Error {
+func setSockOptIPv6(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, name int, optVal []byte) *syserr.Error {
 	switch name {
 	case linux.IPV6_V6ONLY:
 		if len(optVal) < sizeOfInt32 {
@@ -2076,6 +2116,27 @@ func setSockOptIPv6(t *kernel.Task, ep commonEndpoint, name int, optVal []byte)
 
 		return syserr.TranslateNetstackError(ep.SetSockOptBool(tcpip.ReceiveTClassOption, v != 0))
 
+	case linux.IP6T_SO_SET_REPLACE:
+		if len(optVal) < linux.SizeOfIP6TReplace {
+			return syserr.ErrInvalidArgument
+		}
+
+		// Only valid for raw IPv6 sockets.
+		if family, skType, _ := s.Type(); family != linux.AF_INET6 || skType != linux.SOCK_RAW {
+			return syserr.ErrProtocolNotAvailable
+		}
+
+		stack := inet.StackFromContext(t)
+		if stack == nil {
+			return syserr.ErrNoDevice
+		}
+		// Stack must be a netstack stack.
+		return netfilter.SetEntries(stack.(*Stack).Stack, optVal, true)
+
+	case linux.IP6T_SO_SET_ADD_COUNTERS:
+		// TODO(gvisor.dev/issue/170): Counter support.
+		return nil
+
 	default:
 		emitUnimplementedEventIPv6(t, name)
 	}
@@ -2271,7 +2332,7 @@ func setSockOptIP(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, name in
 			return syserr.ErrNoDevice
 		}
 		// Stack must be a netstack stack.
-		return netfilter.SetEntries(stack.(*Stack).Stack, optVal)
+		return netfilter.SetEntries(stack.(*Stack).Stack, optVal, false)
 
 	case linux.IPT_SO_SET_ADD_COUNTERS:
 		// TODO(gvisor.dev/issue/170): Counter support.
diff --git a/pkg/tcpip/stack/iptables.go b/pkg/tcpip/stack/iptables.go
index 41ef4236bc..30aa41db23 100644
--- a/pkg/tcpip/stack/iptables.go
+++ b/pkg/tcpip/stack/iptables.go
@@ -165,7 +165,11 @@ func EmptyNATTable() Table {
 }
 
 // GetTable returns a table by name.
-func (it *IPTables) GetTable(name string) (Table, bool) {
+func (it *IPTables) GetTable(name string, ipv6 bool) (Table, bool) {
+	// TODO(gvisor.dev/issue/3549): Enable IPv6.
+	if ipv6 {
+		return Table{}, false
+	}
 	id, ok := nameToID[name]
 	if !ok {
 		return Table{}, false
@@ -176,7 +180,11 @@ func (it *IPTables) GetTable(name string) (Table, bool) {
 }
 
 // ReplaceTable replaces or inserts table by name.
-func (it *IPTables) ReplaceTable(name string, table Table) *tcpip.Error {
+func (it *IPTables) ReplaceTable(name string, table Table, ipv6 bool) *tcpip.Error {
+	// TODO(gvisor.dev/issue/3549): Enable IPv6.
+	if ipv6 {
+		return tcpip.ErrInvalidOptionValue
+	}
 	id, ok := nameToID[name]
 	if !ok {
 		return tcpip.ErrInvalidOptionValue
diff --git a/pkg/tcpip/stack/iptables_types.go b/pkg/tcpip/stack/iptables_types.go
index 73274ada9e..fbbd2f50fa 100644
--- a/pkg/tcpip/stack/iptables_types.go
+++ b/pkg/tcpip/stack/iptables_types.go
@@ -155,6 +155,11 @@ type IPHeaderFilter struct {
 	// Protocol matches the transport protocol.
 	Protocol tcpip.TransportProtocolNumber
 
+	// CheckProtocol determines whether the Protocol field should be
+	// checked during matching.
+	// TODO(gvisor.dev/issue/3549): Check this field during matching.
+	CheckProtocol bool
+
 	// Dst matches the destination IP address.
 	Dst tcpip.Address
 
diff --git a/test/syscalls/linux/ip6tables.cc b/test/syscalls/linux/ip6tables.cc
index 685e513f86..78e1fa09d7 100644
--- a/test/syscalls/linux/ip6tables.cc
+++ b/test/syscalls/linux/ip6tables.cc
@@ -34,6 +34,54 @@ constexpr size_t kEmptyStandardEntrySize =
 constexpr size_t kEmptyErrorEntrySize =
     sizeof(struct ip6t_entry) + sizeof(struct xt_error_target);
 
+TEST(IP6TablesBasic, FailSockoptNonRaw) {
+  // Even if the user has CAP_NET_RAW, they shouldn't be able to use the
+  // ip6tables sockopts with a non-raw socket.
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+
+  int sock;
+  ASSERT_THAT(sock = socket(AF_INET6, SOCK_DGRAM, 0), SyscallSucceeds());
+
+  struct ipt_getinfo info = {};
+  snprintf(info.name, XT_TABLE_MAXNAMELEN, "%s", kNatTablename);
+  socklen_t info_size = sizeof(info);
+  EXPECT_THAT(getsockopt(sock, SOL_IPV6, IP6T_SO_GET_INFO, &info, &info_size),
+              SyscallFailsWithErrno(ENOPROTOOPT));
+
+  EXPECT_THAT(close(sock), SyscallSucceeds());
+}
+
+TEST(IP6TablesBasic, GetInfoErrorPrecedence) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+
+  int sock;
+  ASSERT_THAT(sock = socket(AF_INET6, SOCK_DGRAM, 0), SyscallSucceeds());
+
+  // When using the wrong type of socket and a too-short optlen, we should get
+  // EINVAL.
+  struct ipt_getinfo info = {};
+  snprintf(info.name, XT_TABLE_MAXNAMELEN, "%s", kNatTablename);
+  socklen_t info_size = sizeof(info) - 1;
+  EXPECT_THAT(getsockopt(sock, SOL_IPV6, IP6T_SO_GET_INFO, &info, &info_size),
+              SyscallFailsWithErrno(EINVAL));
+}
+
+TEST(IP6TablesBasic, GetEntriesErrorPrecedence) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+
+  int sock;
+  ASSERT_THAT(sock = socket(AF_INET6, SOCK_DGRAM, 0), SyscallSucceeds());
+
+  // When using the wrong type of socket and a too-short optlen, we should get
+  // EINVAL.
+  struct ip6t_get_entries entries = {};
+  socklen_t entries_size = sizeof(struct ip6t_get_entries) - 1;
+  snprintf(entries.name, XT_TABLE_MAXNAMELEN, "%s", kNatTablename);
+  EXPECT_THAT(
+      getsockopt(sock, SOL_IPV6, IP6T_SO_GET_ENTRIES, &entries, &entries_size),
+      SyscallFailsWithErrno(EINVAL));
+}
+
 // This tests the initial state of a machine with empty ip6tables via
 // getsockopt(IP6T_SO_GET_INFO). We don't have a guarantee that the iptables are
 // empty when running in native, but we can test that gVisor has the same

From 5d449c870622f7088825af5650786e8bb755567a Mon Sep 17 00:00:00 2001
From: Ghanan Gowripalan <ghanan@google.com>
Date: Thu, 27 Aug 2020 12:48:19 -0700
Subject: [PATCH 097/211] Add function to get error from a tcpip.Endpoint

In an upcoming CL, socket option types are made to implement a marker
interface with pointer receivers. Since this results in calling methods
of an interface with a pointer, we incur an allocation when attempting
to get an Endpoint's last error with the current implementation.

When calling the method of an interface, the compiler is unable to
determine what the interface implementation does with the pointer
(since calling a method on an interface uses virtual dispatch at runtime
so the compiler does not know what the interface method will do) so it
allocates on the heap to be safe incase an implementation continues to
hold the pointer after the functioon returns (the reference escapes the
scope of the object).

In the example below, the compiler does not know what b.foo does with
the reference to a it allocates a on the heap as the reference to a may
escape the scope of a.
```
var a int
var b someInterface
b.foo(&a)
```

This change removes the opportunity for that allocation.

RELNOTES: n/a
PiperOrigin-RevId: 328796559
---
 pkg/sentry/socket/netstack/netstack.go        |  5 +++-
 pkg/sentry/socket/unix/transport/unix.go      | 10 ++++++-
 pkg/tcpip/adapters/gonet/gonet.go             |  2 +-
 pkg/tcpip/adapters/gonet/gonet_test.go        |  2 +-
 pkg/tcpip/sample/tun_tcp_connect/main.go      |  2 +-
 pkg/tcpip/stack/transport_test.go             | 26 ++++++++-----------
 pkg/tcpip/tcpip.go                            |  7 +++--
 pkg/tcpip/transport/icmp/endpoint.go          | 15 +++++------
 pkg/tcpip/transport/packet/endpoint.go        |  8 ++----
 pkg/tcpip/transport/raw/endpoint.go           | 14 +++++-----
 pkg/tcpip/transport/tcp/connect.go            |  4 +--
 pkg/tcpip/transport/tcp/dual_stack_test.go    |  6 ++---
 pkg/tcpip/transport/tcp/endpoint.go           |  5 +---
 pkg/tcpip/transport/tcp/tcp_test.go           | 10 +++----
 .../transport/tcp/testing/context/context.go  |  5 ++--
 pkg/tcpip/transport/udp/endpoint.go           |  8 +++---
 16 files changed, 60 insertions(+), 69 deletions(-)

diff --git a/pkg/sentry/socket/netstack/netstack.go b/pkg/sentry/socket/netstack/netstack.go
index 2af2d82529..8da77cc681 100644
--- a/pkg/sentry/socket/netstack/netstack.go
+++ b/pkg/sentry/socket/netstack/netstack.go
@@ -257,6 +257,9 @@ type commonEndpoint interface {
 	// GetSockOptInt implements tcpip.Endpoint.GetSockOptInt and
 	// transport.Endpoint.GetSockOpt.
 	GetSockOptInt(opt tcpip.SockOptInt) (int, *tcpip.Error)
+
+	// LastError implements tcpip.Endpoint.LastError.
+	LastError() *tcpip.Error
 }
 
 // LINT.IfChange
@@ -1030,7 +1033,7 @@ func getSockOptSocket(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, fam
 		}
 
 		// Get the last error and convert it.
-		err := ep.GetSockOpt(tcpip.ErrorOption{})
+		err := ep.LastError()
 		if err == nil {
 			optP := primitive.Int32(0)
 			return &optP, nil
diff --git a/pkg/sentry/socket/unix/transport/unix.go b/pkg/sentry/socket/unix/transport/unix.go
index ab7bab5cd3..4bf06d4dc1 100644
--- a/pkg/sentry/socket/unix/transport/unix.go
+++ b/pkg/sentry/socket/unix/transport/unix.go
@@ -199,6 +199,9 @@ type Endpoint interface {
 	// State returns the current state of the socket, as represented by Linux in
 	// procfs.
 	State() uint32
+
+	// LastError implements tcpip.Endpoint.LastError.
+	LastError() *tcpip.Error
 }
 
 // A Credentialer is a socket or endpoint that supports the SO_PASSCRED socket
@@ -942,7 +945,7 @@ func (e *baseEndpoint) GetSockOptInt(opt tcpip.SockOptInt) (int, *tcpip.Error) {
 // GetSockOpt implements tcpip.Endpoint.GetSockOpt.
 func (e *baseEndpoint) GetSockOpt(opt interface{}) *tcpip.Error {
 	switch opt.(type) {
-	case tcpip.ErrorOption, *tcpip.LingerOption:
+	case *tcpip.LingerOption:
 		return nil
 
 	default:
@@ -951,6 +954,11 @@ func (e *baseEndpoint) GetSockOpt(opt interface{}) *tcpip.Error {
 	}
 }
 
+// LastError implements Endpoint.LastError.
+func (*baseEndpoint) LastError() *tcpip.Error {
+	return nil
+}
+
 // Shutdown closes the read and/or write end of the endpoint connection to its
 // peer.
 func (e *baseEndpoint) Shutdown(flags tcpip.ShutdownFlags) *syserr.Error {
diff --git a/pkg/tcpip/adapters/gonet/gonet.go b/pkg/tcpip/adapters/gonet/gonet.go
index d82ed52050..68a954a101 100644
--- a/pkg/tcpip/adapters/gonet/gonet.go
+++ b/pkg/tcpip/adapters/gonet/gonet.go
@@ -541,7 +541,7 @@ func DialContextTCP(ctx context.Context, s *stack.Stack, addr tcpip.FullAddress,
 		case <-notifyCh:
 		}
 
-		err = ep.GetSockOpt(tcpip.ErrorOption{})
+		err = ep.LastError()
 	}
 	if err != nil {
 		ep.Close()
diff --git a/pkg/tcpip/adapters/gonet/gonet_test.go b/pkg/tcpip/adapters/gonet/gonet_test.go
index 3c552988a4..c975ad9cf9 100644
--- a/pkg/tcpip/adapters/gonet/gonet_test.go
+++ b/pkg/tcpip/adapters/gonet/gonet_test.go
@@ -104,7 +104,7 @@ func connect(s *stack.Stack, addr tcpip.FullAddress) (*testConnection, *tcpip.Er
 	err = ep.Connect(addr)
 	if err == tcpip.ErrConnectStarted {
 		<-ch
-		err = ep.GetSockOpt(tcpip.ErrorOption{})
+		err = ep.LastError()
 	}
 	if err != nil {
 		return nil, err
diff --git a/pkg/tcpip/sample/tun_tcp_connect/main.go b/pkg/tcpip/sample/tun_tcp_connect/main.go
index 0ab089208c..91fc26722a 100644
--- a/pkg/tcpip/sample/tun_tcp_connect/main.go
+++ b/pkg/tcpip/sample/tun_tcp_connect/main.go
@@ -182,7 +182,7 @@ func main() {
 	if terr == tcpip.ErrConnectStarted {
 		fmt.Println("Connect is pending...")
 		<-notifyCh
-		terr = ep.GetSockOpt(tcpip.ErrorOption{})
+		terr = ep.LastError()
 	}
 	wq.EventUnregister(&waitEntry)
 
diff --git a/pkg/tcpip/stack/transport_test.go b/pkg/tcpip/stack/transport_test.go
index 6c6e444683..7869bb98b4 100644
--- a/pkg/tcpip/stack/transport_test.go
+++ b/pkg/tcpip/stack/transport_test.go
@@ -53,11 +53,11 @@ func (f *fakeTransportEndpoint) Info() tcpip.EndpointInfo {
 	return &f.TransportEndpointInfo
 }
 
-func (f *fakeTransportEndpoint) Stats() tcpip.EndpointStats {
+func (*fakeTransportEndpoint) Stats() tcpip.EndpointStats {
 	return nil
 }
 
-func (f *fakeTransportEndpoint) SetOwner(owner tcpip.PacketOwner) {}
+func (*fakeTransportEndpoint) SetOwner(owner tcpip.PacketOwner) {}
 
 func newFakeTransportEndpoint(s *stack.Stack, proto *fakeTransportProtocol, netProto tcpip.NetworkProtocolNumber, uniqueID uint64) tcpip.Endpoint {
 	return &fakeTransportEndpoint{stack: s, TransportEndpointInfo: stack.TransportEndpointInfo{NetProto: netProto}, proto: proto, uniqueID: uniqueID}
@@ -100,7 +100,7 @@ func (f *fakeTransportEndpoint) Write(p tcpip.Payloader, opts tcpip.WriteOptions
 	return int64(len(v)), nil, nil
 }
 
-func (f *fakeTransportEndpoint) Peek([][]byte) (int64, tcpip.ControlMessages, *tcpip.Error) {
+func (*fakeTransportEndpoint) Peek([][]byte) (int64, tcpip.ControlMessages, *tcpip.Error) {
 	return 0, tcpip.ControlMessages{}, nil
 }
 
@@ -131,10 +131,6 @@ func (*fakeTransportEndpoint) GetSockOptInt(opt tcpip.SockOptInt) (int, *tcpip.E
 
 // GetSockOpt implements tcpip.Endpoint.GetSockOpt.
 func (*fakeTransportEndpoint) GetSockOpt(opt interface{}) *tcpip.Error {
-	switch opt.(type) {
-	case tcpip.ErrorOption:
-		return nil
-	}
 	return tcpip.ErrInvalidEndpointState
 }
 
@@ -169,7 +165,7 @@ func (f *fakeTransportEndpoint) UniqueID() uint64 {
 	return f.uniqueID
 }
 
-func (f *fakeTransportEndpoint) ConnectEndpoint(e tcpip.Endpoint) *tcpip.Error {
+func (*fakeTransportEndpoint) ConnectEndpoint(e tcpip.Endpoint) *tcpip.Error {
 	return nil
 }
 
@@ -239,19 +235,19 @@ func (f *fakeTransportEndpoint) HandleControlPacket(stack.TransportEndpointID, s
 	f.proto.controlCount++
 }
 
-func (f *fakeTransportEndpoint) State() uint32 {
+func (*fakeTransportEndpoint) State() uint32 {
 	return 0
 }
 
-func (f *fakeTransportEndpoint) ModerateRecvBuf(copied int) {}
+func (*fakeTransportEndpoint) ModerateRecvBuf(copied int) {}
 
-func (f *fakeTransportEndpoint) IPTables() (stack.IPTables, error) {
-	return stack.IPTables{}, nil
-}
+func (*fakeTransportEndpoint) Resume(*stack.Stack) {}
 
-func (f *fakeTransportEndpoint) Resume(*stack.Stack) {}
+func (*fakeTransportEndpoint) Wait() {}
 
-func (f *fakeTransportEndpoint) Wait() {}
+func (*fakeTransportEndpoint) LastError() *tcpip.Error {
+	return nil
+}
 
 type fakeTransportGoodOption bool
 
diff --git a/pkg/tcpip/tcpip.go b/pkg/tcpip/tcpip.go
index 609b8af33e..cae9436082 100644
--- a/pkg/tcpip/tcpip.go
+++ b/pkg/tcpip/tcpip.go
@@ -620,6 +620,9 @@ type Endpoint interface {
 
 	// SetOwner sets the task owner to the endpoint owner.
 	SetOwner(owner PacketOwner)
+
+	// LastError clears and returns the last error reported by the endpoint.
+	LastError() *Error
 }
 
 // LinkPacketInfo holds Link layer information for a received packet.
@@ -839,10 +842,6 @@ const (
 	PMTUDiscoveryProbe
 )
 
-// ErrorOption is used in GetSockOpt to specify that the last error reported by
-// the endpoint should be cleared and returned.
-type ErrorOption struct{}
-
 // BindToDeviceOption is used by SetSockOpt/GetSockOpt to specify that sockets
 // should bind only on a specific NIC.
 type BindToDeviceOption NICID
diff --git a/pkg/tcpip/transport/icmp/endpoint.go b/pkg/tcpip/transport/icmp/endpoint.go
index bd6f49eb8c..c545c8367c 100644
--- a/pkg/tcpip/transport/icmp/endpoint.go
+++ b/pkg/tcpip/transport/icmp/endpoint.go
@@ -415,14 +415,8 @@ func (e *endpoint) GetSockOptInt(opt tcpip.SockOptInt) (int, *tcpip.Error) {
 }
 
 // GetSockOpt implements tcpip.Endpoint.GetSockOpt.
-func (e *endpoint) GetSockOpt(opt interface{}) *tcpip.Error {
-	switch opt.(type) {
-	case tcpip.ErrorOption:
-		return nil
-
-	default:
-		return tcpip.ErrUnknownProtocolOption
-	}
+func (*endpoint) GetSockOpt(interface{}) *tcpip.Error {
+	return tcpip.ErrUnknownProtocolOption
 }
 
 func send4(r *stack.Route, ident uint16, data buffer.View, ttl uint8, owner tcpip.PacketOwner) *tcpip.Error {
@@ -836,3 +830,8 @@ func (e *endpoint) Stats() tcpip.EndpointStats {
 
 // Wait implements stack.TransportEndpoint.Wait.
 func (*endpoint) Wait() {}
+
+// LastError implements tcpip.Endpoint.LastError.
+func (*endpoint) LastError() *tcpip.Error {
+	return nil
+}
diff --git a/pkg/tcpip/transport/packet/endpoint.go b/pkg/tcpip/transport/packet/endpoint.go
index 1b03ad6bb7..95dc8ed578 100644
--- a/pkg/tcpip/transport/packet/endpoint.go
+++ b/pkg/tcpip/transport/packet/endpoint.go
@@ -356,7 +356,7 @@ func (ep *endpoint) SetSockOptInt(opt tcpip.SockOptInt, v int) *tcpip.Error {
 	}
 }
 
-func (ep *endpoint) takeLastError() *tcpip.Error {
+func (ep *endpoint) LastError() *tcpip.Error {
 	ep.lastErrorMu.Lock()
 	defer ep.lastErrorMu.Unlock()
 
@@ -366,11 +366,7 @@ func (ep *endpoint) takeLastError() *tcpip.Error {
 }
 
 // GetSockOpt implements tcpip.Endpoint.GetSockOpt.
-func (ep *endpoint) GetSockOpt(opt interface{}) *tcpip.Error {
-	switch opt.(type) {
-	case tcpip.ErrorOption:
-		return ep.takeLastError()
-	}
+func (*endpoint) GetSockOpt(interface{}) *tcpip.Error {
 	return tcpip.ErrNotSupported
 }
 
diff --git a/pkg/tcpip/transport/raw/endpoint.go b/pkg/tcpip/transport/raw/endpoint.go
index edc2b5b615..2087bcfa81 100644
--- a/pkg/tcpip/transport/raw/endpoint.go
+++ b/pkg/tcpip/transport/raw/endpoint.go
@@ -577,14 +577,8 @@ func (e *endpoint) SetSockOptInt(opt tcpip.SockOptInt, v int) *tcpip.Error {
 }
 
 // GetSockOpt implements tcpip.Endpoint.GetSockOpt.
-func (e *endpoint) GetSockOpt(opt interface{}) *tcpip.Error {
-	switch opt.(type) {
-	case tcpip.ErrorOption:
-		return nil
-
-	default:
-		return tcpip.ErrUnknownProtocolOption
-	}
+func (*endpoint) GetSockOpt(interface{}) *tcpip.Error {
+	return tcpip.ErrUnknownProtocolOption
 }
 
 // GetSockOptBool implements tcpip.Endpoint.GetSockOptBool.
@@ -739,3 +733,7 @@ func (e *endpoint) Stats() tcpip.EndpointStats {
 
 // Wait implements stack.TransportEndpoint.Wait.
 func (*endpoint) Wait() {}
+
+func (*endpoint) LastError() *tcpip.Error {
+	return nil
+}
diff --git a/pkg/tcpip/transport/tcp/connect.go b/pkg/tcpip/transport/tcp/connect.go
index 290172ac98..72df5c2a16 100644
--- a/pkg/tcpip/transport/tcp/connect.go
+++ b/pkg/tcpip/transport/tcp/connect.go
@@ -491,7 +491,7 @@ func (h *handshake) resolveRoute() *tcpip.Error {
 				h.ep.mu.Lock()
 			}
 			if n&notifyError != 0 {
-				return h.ep.takeLastError()
+				return h.ep.LastError()
 			}
 		}
 
@@ -620,7 +620,7 @@ func (h *handshake) execute() *tcpip.Error {
 				h.ep.mu.Lock()
 			}
 			if n&notifyError != 0 {
-				return h.ep.takeLastError()
+				return h.ep.LastError()
 			}
 
 		case wakerForNewSegment:
diff --git a/pkg/tcpip/transport/tcp/dual_stack_test.go b/pkg/tcpip/transport/tcp/dual_stack_test.go
index 804e95aea2..6074cc24e0 100644
--- a/pkg/tcpip/transport/tcp/dual_stack_test.go
+++ b/pkg/tcpip/transport/tcp/dual_stack_test.go
@@ -86,8 +86,7 @@ func testV4Connect(t *testing.T, c *context.Context, checkers ...checker.Network
 	// Wait for connection to be established.
 	select {
 	case <-ch:
-		err = c.EP.GetSockOpt(tcpip.ErrorOption{})
-		if err != nil {
+		if err := c.EP.LastError(); err != nil {
 			t.Fatalf("Unexpected error when connecting: %v", err)
 		}
 	case <-time.After(1 * time.Second):
@@ -194,8 +193,7 @@ func testV6Connect(t *testing.T, c *context.Context, checkers ...checker.Network
 	// Wait for connection to be established.
 	select {
 	case <-ch:
-		err = c.EP.GetSockOpt(tcpip.ErrorOption{})
-		if err != nil {
+		if err := c.EP.LastError(); err != nil {
 			t.Fatalf("Unexpected error when connecting: %v", err)
 		}
 	case <-time.After(1 * time.Second):
diff --git a/pkg/tcpip/transport/tcp/endpoint.go b/pkg/tcpip/transport/tcp/endpoint.go
index ff9b8804db..8a5e993b51 100644
--- a/pkg/tcpip/transport/tcp/endpoint.go
+++ b/pkg/tcpip/transport/tcp/endpoint.go
@@ -1234,7 +1234,7 @@ func (e *endpoint) SetOwner(owner tcpip.PacketOwner) {
 	e.owner = owner
 }
 
-func (e *endpoint) takeLastError() *tcpip.Error {
+func (e *endpoint) LastError() *tcpip.Error {
 	e.lastErrorMu.Lock()
 	defer e.lastErrorMu.Unlock()
 	err := e.lastError
@@ -1995,9 +1995,6 @@ func (e *endpoint) GetSockOptInt(opt tcpip.SockOptInt) (int, *tcpip.Error) {
 // GetSockOpt implements tcpip.Endpoint.GetSockOpt.
 func (e *endpoint) GetSockOpt(opt interface{}) *tcpip.Error {
 	switch o := opt.(type) {
-	case tcpip.ErrorOption:
-		return e.takeLastError()
-
 	case *tcpip.BindToDeviceOption:
 		e.LockUser()
 		*o = tcpip.BindToDeviceOption(e.bindToDevice)
diff --git a/pkg/tcpip/transport/tcp/tcp_test.go b/pkg/tcpip/transport/tcp/tcp_test.go
index 9650bb06c0..3d3034d509 100644
--- a/pkg/tcpip/transport/tcp/tcp_test.go
+++ b/pkg/tcpip/transport/tcp/tcp_test.go
@@ -74,8 +74,8 @@ func TestGiveUpConnect(t *testing.T) {
 
 	// Wait for ep to become writable.
 	<-notifyCh
-	if err := ep.GetSockOpt(tcpip.ErrorOption{}); err != tcpip.ErrAborted {
-		t.Fatalf("got ep.GetSockOpt(tcpip.ErrorOption{}) = %s, want = %s", err, tcpip.ErrAborted)
+	if err := ep.LastError(); err != tcpip.ErrAborted {
+		t.Fatalf("got ep.LastError() = %s, want = %s", err, tcpip.ErrAborted)
 	}
 
 	// Call Connect again to retreive the handshake failure status
@@ -3023,8 +3023,8 @@ func TestSynOptionsOnActiveConnect(t *testing.T) {
 	// Wait for connection to be established.
 	select {
 	case <-ch:
-		if err := c.EP.GetSockOpt(tcpip.ErrorOption{}); err != nil {
-			t.Fatalf("GetSockOpt failed: %s", err)
+		if err := c.EP.LastError(); err != nil {
+			t.Fatalf("Connect failed: %s", err)
 		}
 	case <-time.After(1 * time.Second):
 		t.Fatalf("Timed out waiting for connection")
@@ -4411,7 +4411,7 @@ func TestSelfConnect(t *testing.T) {
 	}
 
 	<-notifyCh
-	if err := ep.GetSockOpt(tcpip.ErrorOption{}); err != nil {
+	if err := ep.LastError(); err != nil {
 		t.Fatalf("Connect failed: %s", err)
 	}
 
diff --git a/pkg/tcpip/transport/tcp/testing/context/context.go b/pkg/tcpip/transport/tcp/testing/context/context.go
index b6031354e1..1f5340cd03 100644
--- a/pkg/tcpip/transport/tcp/testing/context/context.go
+++ b/pkg/tcpip/transport/tcp/testing/context/context.go
@@ -638,7 +638,7 @@ func (c *Context) Connect(iss seqnum.Value, rcvWnd seqnum.Size, options []byte)
 	// Wait for connection to be established.
 	select {
 	case <-notifyCh:
-		if err := c.EP.GetSockOpt(tcpip.ErrorOption{}); err != nil {
+		if err := c.EP.LastError(); err != nil {
 			c.t.Fatalf("Unexpected error when connecting: %v", err)
 		}
 	case <-time.After(1 * time.Second):
@@ -882,8 +882,7 @@ func (c *Context) CreateConnectedWithOptions(wantOptions header.TCPSynOptions) *
 	// Wait for connection to be established.
 	select {
 	case <-notifyCh:
-		err = c.EP.GetSockOpt(tcpip.ErrorOption{})
-		if err != nil {
+		if err := c.EP.LastError(); err != nil {
 			c.t.Fatalf("Unexpected error when connecting: %v", err)
 		}
 	case <-time.After(1 * time.Second):
diff --git a/pkg/tcpip/transport/udp/endpoint.go b/pkg/tcpip/transport/udp/endpoint.go
index 0a9d3c6cf8..1d5ebe3f2b 100644
--- a/pkg/tcpip/transport/udp/endpoint.go
+++ b/pkg/tcpip/transport/udp/endpoint.go
@@ -209,7 +209,7 @@ func (e *endpoint) UniqueID() uint64 {
 	return e.uniqueID
 }
 
-func (e *endpoint) takeLastError() *tcpip.Error {
+func (e *endpoint) LastError() *tcpip.Error {
 	e.lastErrorMu.Lock()
 	defer e.lastErrorMu.Unlock()
 
@@ -268,7 +268,7 @@ func (e *endpoint) ModerateRecvBuf(copied int) {}
 // Read reads data from the endpoint. This method does not block if
 // there is no data pending.
 func (e *endpoint) Read(addr *tcpip.FullAddress) (buffer.View, tcpip.ControlMessages, *tcpip.Error) {
-	if err := e.takeLastError(); err != nil {
+	if err := e.LastError(); err != nil {
 		return buffer.View{}, tcpip.ControlMessages{}, err
 	}
 
@@ -411,7 +411,7 @@ func (e *endpoint) Write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, <-c
 }
 
 func (e *endpoint) write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, <-chan struct{}, *tcpip.Error) {
-	if err := e.takeLastError(); err != nil {
+	if err := e.LastError(); err != nil {
 		return 0, nil, err
 	}
 
@@ -962,8 +962,6 @@ func (e *endpoint) GetSockOptInt(opt tcpip.SockOptInt) (int, *tcpip.Error) {
 // GetSockOpt implements tcpip.Endpoint.GetSockOpt.
 func (e *endpoint) GetSockOpt(opt interface{}) *tcpip.Error {
 	switch o := opt.(type) {
-	case tcpip.ErrorOption:
-		return e.takeLastError()
 	case *tcpip.MulticastInterfaceOption:
 		e.mu.Lock()
 		*o = tcpip.MulticastInterfaceOption{

From 5ec86b76aa71417c09347253027ebbc33d4b3123 Mon Sep 17 00:00:00 2001
From: Ghanan Gowripalan <ghanan@google.com>
Date: Thu, 27 Aug 2020 13:43:47 -0700
Subject: [PATCH 098/211] Skip IPv6UDPUnboundSocketNetlinkTest on native linux

...while we figure out of we want to consider the loopback interface
bound to all IPs in an assigned IPv6 subnet, or not (to maintain
compatibility with Linux).

PiperOrigin-RevId: 328807974
---
 test/syscalls/linux/socket_ipv6_udp_unbound_netlink.cc | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/test/syscalls/linux/socket_ipv6_udp_unbound_netlink.cc b/test/syscalls/linux/socket_ipv6_udp_unbound_netlink.cc
index 6275b5aed3..539a4ec551 100644
--- a/test/syscalls/linux/socket_ipv6_udp_unbound_netlink.cc
+++ b/test/syscalls/linux/socket_ipv6_udp_unbound_netlink.cc
@@ -26,7 +26,10 @@ namespace testing {
 // Checks that the loopback interface considers itself bound to all IPs in an
 // associated subnet.
 TEST_P(IPv6UDPUnboundSocketNetlinkTest, JoinSubnet) {
-  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_ADMIN)));
+  // TODO(b/166440211): Only run this test on gvisor or remove if the loopback
+  // interface should not consider itself bound to all IPs in an IPv6 subnet.
+  SKIP_IF(!IsRunningOnGvisor() ||
+          !ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_ADMIN)));
 
   // Add an IP address to the loopback interface.
   Link loopback_link = ASSERT_NO_ERRNO_AND_VALUE(LoopbackLink());

From d50bfc1b0d9884c2f07c944eff7a7e7d40aceb1b Mon Sep 17 00:00:00 2001
From: Zach Koopmans <zkoopmans@google.com>
Date: Thu, 27 Aug 2020 14:09:25 -0700
Subject: [PATCH 099/211] Fix BadSocketPair for open source.

BadSocketPair test will return several errnos (EPREM, ESOCKTNOSUPPORT,
EAFNOSUPPORT) meaning the test is just too specific. Checking the syscall
fails is appropriate.

PiperOrigin-RevId: 328813071
---
 test/syscalls/linux/socket_inet_loopback.cc | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/test/syscalls/linux/socket_inet_loopback.cc b/test/syscalls/linux/socket_inet_loopback.cc
index 7c1d6a4148..67893033cc 100644
--- a/test/syscalls/linux/socket_inet_loopback.cc
+++ b/test/syscalls/linux/socket_inet_loopback.cc
@@ -97,13 +97,9 @@ TEST(BadSocketPairArgs, ValidateErrForBadCallsToSocketPair) {
   ASSERT_THAT(socketpair(AF_INET6, 0, 0, fd),
               SyscallFailsWithErrno(ESOCKTNOSUPPORT));
 
-  // Invalid AF will return ENOAFSUPPORT or EPERM.
-  ASSERT_THAT(socketpair(AF_MAX, 0, 0, fd),
-              ::testing::AnyOf(SyscallFailsWithErrno(EAFNOSUPPORT),
-                               SyscallFailsWithErrno(EPERM)));
-  ASSERT_THAT(socketpair(8675309, 0, 0, fd),
-              ::testing::AnyOf(SyscallFailsWithErrno(EAFNOSUPPORT),
-                               SyscallFailsWithErrno(EPERM)));
+  // Invalid AF will fail.
+  ASSERT_THAT(socketpair(AF_MAX, 0, 0, fd), SyscallFails());
+  ASSERT_THAT(socketpair(8675309, 0, 0, fd), SyscallFails());
 }
 
 enum class Operation {

From 512a4015f6180f7312cbab0cd1de1a0acc40d0ff Mon Sep 17 00:00:00 2001
From: Jinmou Li <jinmli@google.com>
Date: Wed, 24 Jun 2020 19:38:47 +0000
Subject: [PATCH 100/211] beef up write syscall tests

Added a few tests for write(2) and pwrite(2)

1. Regular Files

For write(2)

- write zero bytes should not move the offset
- write non-zero bytes should increment the offset the exact amount
- write non-zero bytes after a lseek() should move the offset the exact amount after the seek
- write non-zero bytes with O_APPEND should move the offset the exact amount after original EOF

For pwrite(2), offset is not affected when

- pwrite zero bytes
- pwrite non-zero bytes

For EOF, added a test asserting the EOF (indicated by lseek(SEEK_END)) is updated properly after writing non-zero bytes

2. Symlink

Added one pwite64() call for symlink that is written as a counterpart of the existing test using pread64()
---
 test/syscalls/linux/symlink.cc | 29 ++++++++++++++
 test/syscalls/linux/write.cc   | 71 ++++++++++++++++++++++++++++++++++
 2 files changed, 100 insertions(+)

diff --git a/test/syscalls/linux/symlink.cc b/test/syscalls/linux/symlink.cc
index a17ff62e97..aa1f32c859 100644
--- a/test/syscalls/linux/symlink.cc
+++ b/test/syscalls/linux/symlink.cc
@@ -218,6 +218,35 @@ TEST(SymlinkTest, PreadFromSymlink) {
   EXPECT_THAT(unlink(linkname.c_str()), SyscallSucceeds());
 }
 
+TEST(SymlinkTest, PwriteToSymlink) {
+  std::string name = NewTempAbsPath();
+  int fd;
+  ASSERT_THAT(fd = open(name.c_str(), O_CREAT, 0644), SyscallSucceeds());
+  ASSERT_THAT(close(fd), SyscallSucceeds());
+
+  std::string linkname = NewTempAbsPath();
+  ASSERT_THAT(symlink(name.c_str(), linkname.c_str()), SyscallSucceeds());
+
+  ASSERT_THAT(fd = open(linkname.c_str(), O_WRONLY), SyscallSucceeds());
+
+  const int data_size = 10;
+  const std::string data = std::string(data_size, 'a');
+  EXPECT_THAT(pwrite64(fd, data.c_str(), data.size(), 0), SyscallSucceedsWithValue(data.size()));
+
+  ASSERT_THAT(close(fd), SyscallSucceeds());
+  ASSERT_THAT(fd = open(name.c_str(), O_RDONLY), SyscallSucceeds());
+
+  char buf[data_size+1];
+  EXPECT_THAT(pread64(fd, buf, data.size(), 0), SyscallSucceeds());
+  buf[data.size()] = '\0';
+  EXPECT_STREQ(buf, data.c_str());
+
+  ASSERT_THAT(close(fd), SyscallSucceeds());
+
+  EXPECT_THAT(unlink(name.c_str()), SyscallSucceeds());
+  EXPECT_THAT(unlink(linkname.c_str()), SyscallSucceeds());
+}
+
 TEST(SymlinkTest, SymlinkAtDegradedPermissions_NoRandomSave) {
   // Drop capabilities that allow us to override file and directory permissions.
   ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false));
diff --git a/test/syscalls/linux/write.cc b/test/syscalls/linux/write.cc
index 39b5b2f567..d611f3c5d0 100644
--- a/test/syscalls/linux/write.cc
+++ b/test/syscalls/linux/write.cc
@@ -133,6 +133,77 @@ TEST_F(WriteTest, WriteExceedsRLimit) {
   EXPECT_THAT(close(fd), SyscallSucceeds());
 }
 
+TEST_F(WriteTest, WriteIncrementOffset) {
+  TempPath tmpfile = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  FileDescriptor f = ASSERT_NO_ERRNO_AND_VALUE(Open(tmpfile.path().c_str(), O_WRONLY));
+  int fd = f.get();
+
+  EXPECT_THAT(WriteBytes(fd, 0), SyscallSucceedsWithValue(0));
+  EXPECT_THAT(lseek(fd, 0, SEEK_CUR), SyscallSucceedsWithValue(0));
+
+  const int bytes_total = 1024;
+
+  EXPECT_THAT(WriteBytes(fd, bytes_total), SyscallSucceedsWithValue(bytes_total));
+  EXPECT_THAT(lseek(fd, 0, SEEK_CUR), SyscallSucceedsWithValue(bytes_total));
+}
+
+TEST_F(WriteTest, WriteIncrementOffsetSeek) {
+  const std::string data = "hello world\n";
+  TempPath tmpfile = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+      GetAbsoluteTestTmpdir(), data, TempPath::kDefaultFileMode));
+  FileDescriptor f = ASSERT_NO_ERRNO_AND_VALUE(Open(tmpfile.path().c_str(), O_WRONLY));
+  int fd = f.get();
+
+  const int seek_offset = data.size()/2;
+  ASSERT_THAT(lseek(fd, seek_offset, SEEK_SET), SyscallSucceedsWithValue(seek_offset));
+
+  const int write_bytes = 512;
+  EXPECT_THAT(WriteBytes(fd, write_bytes), SyscallSucceedsWithValue(write_bytes));
+  EXPECT_THAT(lseek(fd, 0, SEEK_CUR), SyscallSucceedsWithValue(seek_offset+write_bytes));
+}
+
+TEST_F(WriteTest, WriteIncrementOffsetAppend) {
+  const std::string data = "hello world\n";
+  TempPath tmpfile = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+      GetAbsoluteTestTmpdir(), data, TempPath::kDefaultFileMode));
+  FileDescriptor f = ASSERT_NO_ERRNO_AND_VALUE(Open(tmpfile.path().c_str(),O_WRONLY | O_APPEND));
+  int fd = f.get();
+
+  EXPECT_THAT(WriteBytes(fd, 1024), SyscallSucceedsWithValue(1024));
+  EXPECT_THAT(lseek(fd, 0, SEEK_CUR), SyscallSucceedsWithValue(data.size()+1024));
+}
+
+TEST_F(WriteTest, WriteIncrementOffsetEOF) {
+  const std::string data = "hello world\n";
+  const TempPath tmpfile = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+      GetAbsoluteTestTmpdir(), data, TempPath::kDefaultFileMode));
+  FileDescriptor f = ASSERT_NO_ERRNO_AND_VALUE(Open(tmpfile.path().c_str(), O_WRONLY));
+  int fd = f.get();
+
+  EXPECT_THAT(lseek(fd, 0, SEEK_END), SyscallSucceedsWithValue(data.size()));
+
+  EXPECT_THAT(WriteBytes(fd, 1024), SyscallSucceedsWithValue(1024));
+  EXPECT_THAT(lseek(fd, 0, SEEK_END), SyscallSucceedsWithValue(data.size()+1024));
+}
+
+TEST_F(WriteTest, PwriteNoChangeOffset) {
+  TempPath tmpfile = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  FileDescriptor f = ASSERT_NO_ERRNO_AND_VALUE(Open(tmpfile.path().c_str(), O_WRONLY));
+  int fd = f.get();
+
+  const std::string data = "hello world\n";
+
+  EXPECT_THAT(pwrite(fd, data.data(), data.size(), 0), SyscallSucceedsWithValue(data.size()));
+  EXPECT_THAT(lseek(fd, 0, SEEK_CUR), SyscallSucceedsWithValue(0));
+
+  const int bytes_total = 1024;
+  ASSERT_THAT(WriteBytes(fd, bytes_total), SyscallSucceedsWithValue(bytes_total));
+  ASSERT_THAT(lseek(fd, 0, SEEK_CUR), SyscallSucceedsWithValue(bytes_total));
+
+  EXPECT_THAT(pwrite(fd, data.data(), data.size(), bytes_total), SyscallSucceedsWithValue(data.size()));
+  EXPECT_THAT(lseek(fd, 0, SEEK_CUR), SyscallSucceedsWithValue(bytes_total));
+}
+
 }  // namespace
 
 }  // namespace testing

From cc5312a42f21f34c178cd821de227f4167c00cfb Mon Sep 17 00:00:00 2001
From: Ghanan Gowripalan <ghanan@google.com>
Date: Thu, 27 Aug 2020 15:45:02 -0700
Subject: [PATCH 101/211] Improve type safety for socket options

The existing implementation for {G,S}etSockOpt take arguments of an
empty interface type which all types (implicitly) implement; any
type may be passed to the functions.

This change introduces marker interfaces for socket options that may be
set or queried which socket option types implement to ensure that invalid
types are caught at compile time. Different interfaces are used to allow
the compiler to enforce read-only or set-only socket options.

Fixes #3714.

RELNOTES: n/a
PiperOrigin-RevId: 328832161
---
 pkg/sentry/socket/netstack/netstack.go    |  55 +++++-----
 pkg/sentry/socket/unix/transport/unix.go  |  14 +--
 pkg/tcpip/stack/transport_demuxer_test.go |   4 +-
 pkg/tcpip/stack/transport_test.go         |   4 +-
 pkg/tcpip/tcpip.go                        | 127 ++++++++++++++++++----
 pkg/tcpip/transport/icmp/endpoint.go      |   6 +-
 pkg/tcpip/transport/packet/endpoint.go    |   6 +-
 pkg/tcpip/transport/raw/endpoint.go       |   6 +-
 pkg/tcpip/transport/tcp/endpoint.go       |  58 +++++-----
 pkg/tcpip/transport/tcp/tcp_test.go       |  95 ++++++++++------
 pkg/tcpip/transport/udp/endpoint.go       |  16 +--
 pkg/tcpip/transport/udp/udp_test.go       |  45 ++++----
 12 files changed, 276 insertions(+), 160 deletions(-)

diff --git a/pkg/sentry/socket/netstack/netstack.go b/pkg/sentry/socket/netstack/netstack.go
index 8da77cc681..0bf21f7d8e 100644
--- a/pkg/sentry/socket/netstack/netstack.go
+++ b/pkg/sentry/socket/netstack/netstack.go
@@ -236,7 +236,7 @@ type commonEndpoint interface {
 
 	// SetSockOpt implements tcpip.Endpoint.SetSockOpt and
 	// transport.Endpoint.SetSockOpt.
-	SetSockOpt(interface{}) *tcpip.Error
+	SetSockOpt(tcpip.SettableSocketOption) *tcpip.Error
 
 	// SetSockOptBool implements tcpip.Endpoint.SetSockOptBool and
 	// transport.Endpoint.SetSockOptBool.
@@ -248,7 +248,7 @@ type commonEndpoint interface {
 
 	// GetSockOpt implements tcpip.Endpoint.GetSockOpt and
 	// transport.Endpoint.GetSockOpt.
-	GetSockOpt(interface{}) *tcpip.Error
+	GetSockOpt(tcpip.GettableSocketOption) *tcpip.Error
 
 	// GetSockOptBool implements tcpip.Endpoint.GetSockOptBool and
 	// transport.Endpoint.GetSockOpt.
@@ -1778,8 +1778,7 @@ func SetSockOpt(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, level int
 		t.Kernel().EmitUnimplementedEvent(t)
 	}
 
-	// Default to the old behavior; hand off to network stack.
-	return syserr.TranslateNetstackError(ep.SetSockOpt(struct{}{}))
+	return nil
 }
 
 // setSockOptSocket implements SetSockOpt when level is SOL_SOCKET.
@@ -1824,7 +1823,8 @@ func setSockOptSocket(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, nam
 		}
 		name := string(optVal[:n])
 		if name == "" {
-			return syserr.TranslateNetstackError(ep.SetSockOpt(tcpip.BindToDeviceOption(0)))
+			v := tcpip.BindToDeviceOption(0)
+			return syserr.TranslateNetstackError(ep.SetSockOpt(&v))
 		}
 		s := t.NetworkContext()
 		if s == nil {
@@ -1832,7 +1832,8 @@ func setSockOptSocket(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, nam
 		}
 		for nicID, nic := range s.Interfaces() {
 			if nic.Name == name {
-				return syserr.TranslateNetstackError(ep.SetSockOpt(tcpip.BindToDeviceOption(nicID)))
+				v := tcpip.BindToDeviceOption(nicID)
+				return syserr.TranslateNetstackError(ep.SetSockOpt(&v))
 			}
 		}
 		return syserr.ErrUnknownDevice
@@ -1898,7 +1899,8 @@ func setSockOptSocket(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, nam
 			socket.SetSockOptEmitUnimplementedEvent(t, name)
 		}
 
-		return syserr.TranslateNetstackError(ep.SetSockOpt(tcpip.OutOfBandInlineOption(v)))
+		opt := tcpip.OutOfBandInlineOption(v)
+		return syserr.TranslateNetstackError(ep.SetSockOpt(&opt))
 
 	case linux.SO_NO_CHECK:
 		if len(optVal) < sizeOfInt32 {
@@ -1921,21 +1923,20 @@ func setSockOptSocket(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, nam
 		}
 
 		return syserr.TranslateNetstackError(
-			ep.SetSockOpt(tcpip.LingerOption{
+			ep.SetSockOpt(&tcpip.LingerOption{
 				Enabled: v.OnOff != 0,
 				Timeout: time.Second * time.Duration(v.Linger)}))
 
 	case linux.SO_DETACH_FILTER:
 		// optval is ignored.
 		var v tcpip.SocketDetachFilterOption
-		return syserr.TranslateNetstackError(ep.SetSockOpt(v))
+		return syserr.TranslateNetstackError(ep.SetSockOpt(&v))
 
 	default:
 		socket.SetSockOptEmitUnimplementedEvent(t, name)
 	}
 
-	// Default to the old behavior; hand off to network stack.
-	return syserr.TranslateNetstackError(ep.SetSockOpt(struct{}{}))
+	return nil
 }
 
 // setSockOptTCP implements SetSockOpt when level is SOL_TCP.
@@ -1982,7 +1983,8 @@ func setSockOptTCP(t *kernel.Task, ep commonEndpoint, name int, optVal []byte) *
 		if v < 1 || v > linux.MAX_TCP_KEEPIDLE {
 			return syserr.ErrInvalidArgument
 		}
-		return syserr.TranslateNetstackError(ep.SetSockOpt(tcpip.KeepaliveIdleOption(time.Second * time.Duration(v))))
+		opt := tcpip.KeepaliveIdleOption(time.Second * time.Duration(v))
+		return syserr.TranslateNetstackError(ep.SetSockOpt(&opt))
 
 	case linux.TCP_KEEPINTVL:
 		if len(optVal) < sizeOfInt32 {
@@ -1993,7 +1995,8 @@ func setSockOptTCP(t *kernel.Task, ep commonEndpoint, name int, optVal []byte) *
 		if v < 1 || v > linux.MAX_TCP_KEEPINTVL {
 			return syserr.ErrInvalidArgument
 		}
-		return syserr.TranslateNetstackError(ep.SetSockOpt(tcpip.KeepaliveIntervalOption(time.Second * time.Duration(v))))
+		opt := tcpip.KeepaliveIntervalOption(time.Second * time.Duration(v))
+		return syserr.TranslateNetstackError(ep.SetSockOpt(&opt))
 
 	case linux.TCP_KEEPCNT:
 		if len(optVal) < sizeOfInt32 {
@@ -2015,11 +2018,12 @@ func setSockOptTCP(t *kernel.Task, ep commonEndpoint, name int, optVal []byte) *
 		if v < 0 {
 			return syserr.ErrInvalidArgument
 		}
-		return syserr.TranslateNetstackError(ep.SetSockOpt(tcpip.TCPUserTimeoutOption(time.Millisecond * time.Duration(v))))
+		opt := tcpip.TCPUserTimeoutOption(time.Millisecond * time.Duration(v))
+		return syserr.TranslateNetstackError(ep.SetSockOpt(&opt))
 
 	case linux.TCP_CONGESTION:
 		v := tcpip.CongestionControlOption(optVal)
-		if err := ep.SetSockOpt(v); err != nil {
+		if err := ep.SetSockOpt(&v); err != nil {
 			return syserr.TranslateNetstackError(err)
 		}
 		return nil
@@ -2030,7 +2034,8 @@ func setSockOptTCP(t *kernel.Task, ep commonEndpoint, name int, optVal []byte) *
 		}
 
 		v := int32(usermem.ByteOrder.Uint32(optVal))
-		return syserr.TranslateNetstackError(ep.SetSockOpt(tcpip.TCPLingerTimeoutOption(time.Second * time.Duration(v))))
+		opt := tcpip.TCPLingerTimeoutOption(time.Second * time.Duration(v))
+		return syserr.TranslateNetstackError(ep.SetSockOpt(&opt))
 
 	case linux.TCP_DEFER_ACCEPT:
 		if len(optVal) < sizeOfInt32 {
@@ -2040,7 +2045,8 @@ func setSockOptTCP(t *kernel.Task, ep commonEndpoint, name int, optVal []byte) *
 		if v < 0 {
 			v = 0
 		}
-		return syserr.TranslateNetstackError(ep.SetSockOpt(tcpip.TCPDeferAcceptOption(time.Second * time.Duration(v))))
+		opt := tcpip.TCPDeferAcceptOption(time.Second * time.Duration(v))
+		return syserr.TranslateNetstackError(ep.SetSockOpt(&opt))
 
 	case linux.TCP_SYNCNT:
 		if len(optVal) < sizeOfInt32 {
@@ -2065,8 +2071,7 @@ func setSockOptTCP(t *kernel.Task, ep commonEndpoint, name int, optVal []byte) *
 		emitUnimplementedEventTCP(t, name)
 	}
 
-	// Default to the old behavior; hand off to network stack.
-	return syserr.TranslateNetstackError(ep.SetSockOpt(struct{}{}))
+	return nil
 }
 
 // setSockOptIPv6 implements SetSockOpt when level is SOL_IPV6.
@@ -2144,8 +2149,7 @@ func setSockOptIPv6(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, name
 		emitUnimplementedEventIPv6(t, name)
 	}
 
-	// Default to the old behavior; hand off to network stack.
-	return syserr.TranslateNetstackError(ep.SetSockOpt(struct{}{}))
+	return nil
 }
 
 var (
@@ -2223,7 +2227,7 @@ func setSockOptIP(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, name in
 			return err
 		}
 
-		return syserr.TranslateNetstackError(ep.SetSockOpt(tcpip.AddMembershipOption{
+		return syserr.TranslateNetstackError(ep.SetSockOpt(&tcpip.AddMembershipOption{
 			NIC: tcpip.NICID(req.InterfaceIndex),
 			// TODO(igudger): Change AddMembership to use the standard
 			// any address representation.
@@ -2237,7 +2241,7 @@ func setSockOptIP(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, name in
 			return err
 		}
 
-		return syserr.TranslateNetstackError(ep.SetSockOpt(tcpip.RemoveMembershipOption{
+		return syserr.TranslateNetstackError(ep.SetSockOpt(&tcpip.RemoveMembershipOption{
 			NIC: tcpip.NICID(req.InterfaceIndex),
 			// TODO(igudger): Change DropMembership to use the standard
 			// any address representation.
@@ -2251,7 +2255,7 @@ func setSockOptIP(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, name in
 			return err
 		}
 
-		return syserr.TranslateNetstackError(ep.SetSockOpt(tcpip.MulticastInterfaceOption{
+		return syserr.TranslateNetstackError(ep.SetSockOpt(&tcpip.MulticastInterfaceOption{
 			NIC:           tcpip.NICID(req.InterfaceIndex),
 			InterfaceAddr: bytesToIPAddress(req.InterfaceAddr[:]),
 		}))
@@ -2375,8 +2379,7 @@ func setSockOptIP(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, name in
 		t.Kernel().EmitUnimplementedEvent(t)
 	}
 
-	// Default to the old behavior; hand off to network stack.
-	return syserr.TranslateNetstackError(ep.SetSockOpt(struct{}{}))
+	return nil
 }
 
 // emitUnimplementedEventTCP emits unimplemented event if name is valid. This
diff --git a/pkg/sentry/socket/unix/transport/unix.go b/pkg/sentry/socket/unix/transport/unix.go
index 4bf06d4dc1..cc9d650fb4 100644
--- a/pkg/sentry/socket/unix/transport/unix.go
+++ b/pkg/sentry/socket/unix/transport/unix.go
@@ -172,9 +172,8 @@ type Endpoint interface {
 	// connected.
 	GetRemoteAddress() (tcpip.FullAddress, *tcpip.Error)
 
-	// SetSockOpt sets a socket option. opt should be one of the tcpip.*Option
-	// types.
-	SetSockOpt(opt interface{}) *tcpip.Error
+	// SetSockOpt sets a socket option.
+	SetSockOpt(opt tcpip.SettableSocketOption) *tcpip.Error
 
 	// SetSockOptBool sets a socket option for simple cases when a value has
 	// the int type.
@@ -184,9 +183,8 @@ type Endpoint interface {
 	// the int type.
 	SetSockOptInt(opt tcpip.SockOptInt, v int) *tcpip.Error
 
-	// GetSockOpt gets a socket option. opt should be a pointer to one of the
-	// tcpip.*Option types.
-	GetSockOpt(opt interface{}) *tcpip.Error
+	// GetSockOpt gets a socket option.
+	GetSockOpt(opt tcpip.GettableSocketOption) *tcpip.Error
 
 	// GetSockOptBool gets a socket option for simple cases when a return
 	// value has the int type.
@@ -841,7 +839,7 @@ func (e *baseEndpoint) SendMsg(ctx context.Context, data [][]byte, c ControlMess
 }
 
 // SetSockOpt sets a socket option. Currently not supported.
-func (e *baseEndpoint) SetSockOpt(opt interface{}) *tcpip.Error {
+func (e *baseEndpoint) SetSockOpt(tcpip.SettableSocketOption) *tcpip.Error {
 	return nil
 }
 
@@ -943,7 +941,7 @@ func (e *baseEndpoint) GetSockOptInt(opt tcpip.SockOptInt) (int, *tcpip.Error) {
 }
 
 // GetSockOpt implements tcpip.Endpoint.GetSockOpt.
-func (e *baseEndpoint) GetSockOpt(opt interface{}) *tcpip.Error {
+func (e *baseEndpoint) GetSockOpt(opt tcpip.GettableSocketOption) *tcpip.Error {
 	switch opt.(type) {
 	case *tcpip.LingerOption:
 		return nil
diff --git a/pkg/tcpip/stack/transport_demuxer_test.go b/pkg/tcpip/stack/transport_demuxer_test.go
index 1339edc2df..4d6d62eec8 100644
--- a/pkg/tcpip/stack/transport_demuxer_test.go
+++ b/pkg/tcpip/stack/transport_demuxer_test.go
@@ -312,8 +312,8 @@ func TestBindToDeviceDistribution(t *testing.T) {
 							t.Fatalf("SetSockOptBool(ReusePortOption, %t) on endpoint %d failed: %s", endpoint.reuse, i, err)
 						}
 						bindToDeviceOption := tcpip.BindToDeviceOption(endpoint.bindToDevice)
-						if err := ep.SetSockOpt(bindToDeviceOption); err != nil {
-							t.Fatalf("SetSockOpt(%#v) on endpoint %d failed: %s", bindToDeviceOption, i, err)
+						if err := ep.SetSockOpt(&bindToDeviceOption); err != nil {
+							t.Fatalf("SetSockOpt(&%T(%d)) on endpoint %d failed: %s", bindToDeviceOption, bindToDeviceOption, i, err)
 						}
 
 						var dstAddr tcpip.Address
diff --git a/pkg/tcpip/stack/transport_test.go b/pkg/tcpip/stack/transport_test.go
index 7869bb98b4..a1458c899e 100644
--- a/pkg/tcpip/stack/transport_test.go
+++ b/pkg/tcpip/stack/transport_test.go
@@ -105,7 +105,7 @@ func (*fakeTransportEndpoint) Peek([][]byte) (int64, tcpip.ControlMessages, *tcp
 }
 
 // SetSockOpt sets a socket option. Currently not supported.
-func (*fakeTransportEndpoint) SetSockOpt(interface{}) *tcpip.Error {
+func (*fakeTransportEndpoint) SetSockOpt(tcpip.SettableSocketOption) *tcpip.Error {
 	return tcpip.ErrInvalidEndpointState
 }
 
@@ -130,7 +130,7 @@ func (*fakeTransportEndpoint) GetSockOptInt(opt tcpip.SockOptInt) (int, *tcpip.E
 }
 
 // GetSockOpt implements tcpip.Endpoint.GetSockOpt.
-func (*fakeTransportEndpoint) GetSockOpt(opt interface{}) *tcpip.Error {
+func (*fakeTransportEndpoint) GetSockOpt(tcpip.GettableSocketOption) *tcpip.Error {
 	return tcpip.ErrInvalidEndpointState
 }
 
diff --git a/pkg/tcpip/tcpip.go b/pkg/tcpip/tcpip.go
index cae9436082..cd72d4f021 100644
--- a/pkg/tcpip/tcpip.go
+++ b/pkg/tcpip/tcpip.go
@@ -578,8 +578,8 @@ type Endpoint interface {
 	// if waiter.EventIn is set, the endpoint is immediately readable.
 	Readiness(mask waiter.EventMask) waiter.EventMask
 
-	// SetSockOpt sets a socket option. opt should be one of the *Option types.
-	SetSockOpt(opt interface{}) *Error
+	// SetSockOpt sets a socket option.
+	SetSockOpt(opt SettableSocketOption) *Error
 
 	// SetSockOptBool sets a socket option, for simple cases where a value
 	// has the bool type.
@@ -589,9 +589,8 @@ type Endpoint interface {
 	// has the int type.
 	SetSockOptInt(opt SockOptInt, v int) *Error
 
-	// GetSockOpt gets a socket option. opt should be a pointer to one of the
-	// *Option types.
-	GetSockOpt(opt interface{}) *Error
+	// GetSockOpt gets a socket option.
+	GetSockOpt(opt GettableSocketOption) *Error
 
 	// GetSockOptBool gets a socket option for simple cases where a return
 	// value has the bool type.
@@ -842,10 +841,37 @@ const (
 	PMTUDiscoveryProbe
 )
 
+// DefaultTTLOption is used by stack.(*Stack).NetworkProtocolOption to specify
+// a default TTL.
+type DefaultTTLOption uint8
+
+// AvailableCongestionControlOption is used to query the supported congestion
+// control algorithms.
+type AvailableCongestionControlOption string
+
+// ModerateReceiveBufferOption is used by buffer moderation.
+type ModerateReceiveBufferOption bool
+
+// GettableSocketOption is a marker interface for socket options that may be
+// queried.
+type GettableSocketOption interface {
+	isGettableSocketOption()
+}
+
+// SettableSocketOption is a marker interface for socket options that may be
+// configured.
+type SettableSocketOption interface {
+	isSettableSocketOption()
+}
+
 // BindToDeviceOption is used by SetSockOpt/GetSockOpt to specify that sockets
 // should bind only on a specific NIC.
 type BindToDeviceOption NICID
 
+func (*BindToDeviceOption) isGettableSocketOption() {}
+
+func (*BindToDeviceOption) isSettableSocketOption() {}
+
 // TCPInfoOption is used by GetSockOpt to expose TCP statistics.
 //
 // TODO(b/64800844): Add and populate stat fields.
@@ -854,68 +880,111 @@ type TCPInfoOption struct {
 	RTTVar time.Duration
 }
 
+func (*TCPInfoOption) isGettableSocketOption() {}
+
 // KeepaliveIdleOption is used by SetSockOpt/GetSockOpt to specify the time a
 // connection must remain idle before the first TCP keepalive packet is sent.
 // Once this time is reached, KeepaliveIntervalOption is used instead.
 type KeepaliveIdleOption time.Duration
 
+func (*KeepaliveIdleOption) isGettableSocketOption() {}
+
+func (*KeepaliveIdleOption) isSettableSocketOption() {}
+
 // KeepaliveIntervalOption is used by SetSockOpt/GetSockOpt to specify the
 // interval between sending TCP keepalive packets.
 type KeepaliveIntervalOption time.Duration
 
+func (*KeepaliveIntervalOption) isGettableSocketOption() {}
+
+func (*KeepaliveIntervalOption) isSettableSocketOption() {}
+
 // TCPUserTimeoutOption is used by SetSockOpt/GetSockOpt to specify a user
 // specified timeout for a given TCP connection.
 // See: RFC5482 for details.
 type TCPUserTimeoutOption time.Duration
 
+func (*TCPUserTimeoutOption) isGettableSocketOption() {}
+
+func (*TCPUserTimeoutOption) isSettableSocketOption() {}
+
 // CongestionControlOption is used by SetSockOpt/GetSockOpt to set/get
 // the current congestion control algorithm.
 type CongestionControlOption string
 
-// AvailableCongestionControlOption is used to query the supported congestion
-// control algorithms.
-type AvailableCongestionControlOption string
+func (*CongestionControlOption) isGettableSocketOption() {}
 
-// ModerateReceiveBufferOption is used by buffer moderation.
-type ModerateReceiveBufferOption bool
+func (*CongestionControlOption) isSettableSocketOption() {}
 
 // TCPLingerTimeoutOption is used by SetSockOpt/GetSockOpt to set/get the
 // maximum duration for which a socket lingers in the TCP_FIN_WAIT_2 state
 // before being marked closed.
 type TCPLingerTimeoutOption time.Duration
 
+func (*TCPLingerTimeoutOption) isGettableSocketOption() {}
+
+func (*TCPLingerTimeoutOption) isSettableSocketOption() {}
+
 // TCPTimeWaitTimeoutOption is used by SetSockOpt/GetSockOpt to set/get the
 // maximum duration for which a socket lingers in the TIME_WAIT state
 // before being marked closed.
 type TCPTimeWaitTimeoutOption time.Duration
 
+func (*TCPTimeWaitTimeoutOption) isGettableSocketOption() {}
+
+func (*TCPTimeWaitTimeoutOption) isSettableSocketOption() {}
+
 // TCPDeferAcceptOption is used by SetSockOpt/GetSockOpt to allow a
 // accept to return a completed connection only when there is data to be
 // read. This usually means the listening socket will drop the final ACK
 // for a handshake till the specified timeout until a segment with data arrives.
 type TCPDeferAcceptOption time.Duration
 
+func (*TCPDeferAcceptOption) isGettableSocketOption() {}
+
+func (*TCPDeferAcceptOption) isSettableSocketOption() {}
+
 // TCPMinRTOOption is use by SetSockOpt/GetSockOpt to allow overriding
 // default MinRTO used by the Stack.
 type TCPMinRTOOption time.Duration
 
+func (*TCPMinRTOOption) isGettableSocketOption() {}
+
+func (*TCPMinRTOOption) isSettableSocketOption() {}
+
 // TCPMaxRTOOption is use by SetSockOpt/GetSockOpt to allow overriding
 // default MaxRTO used by the Stack.
 type TCPMaxRTOOption time.Duration
 
+func (*TCPMaxRTOOption) isGettableSocketOption() {}
+
+func (*TCPMaxRTOOption) isSettableSocketOption() {}
+
 // TCPMaxRetriesOption is used by SetSockOpt/GetSockOpt to set/get the
 // maximum number of retransmits after which we time out the connection.
 type TCPMaxRetriesOption uint64
 
+func (*TCPMaxRetriesOption) isGettableSocketOption() {}
+
+func (*TCPMaxRetriesOption) isSettableSocketOption() {}
+
 // TCPSynRcvdCountThresholdOption is used by SetSockOpt/GetSockOpt to specify
 // the number of endpoints that can be in SYN-RCVD state before the stack
 // switches to using SYN cookies.
 type TCPSynRcvdCountThresholdOption uint64
 
+func (*TCPSynRcvdCountThresholdOption) isGettableSocketOption() {}
+
+func (*TCPSynRcvdCountThresholdOption) isSettableSocketOption() {}
+
 // TCPSynRetriesOption is used by SetSockOpt/GetSockOpt to specify stack-wide
 // default for number of times SYN is retransmitted before aborting a connect.
 type TCPSynRetriesOption uint8
 
+func (*TCPSynRetriesOption) isGettableSocketOption() {}
+
+func (*TCPSynRetriesOption) isSettableSocketOption() {}
+
 // MulticastInterfaceOption is used by SetSockOpt/GetSockOpt to specify a
 // default interface for multicast.
 type MulticastInterfaceOption struct {
@@ -923,45 +992,57 @@ type MulticastInterfaceOption struct {
 	InterfaceAddr Address
 }
 
-// MembershipOption is used by SetSockOpt/GetSockOpt as an argument to
-// AddMembershipOption and RemoveMembershipOption.
+func (*MulticastInterfaceOption) isGettableSocketOption() {}
+
+func (*MulticastInterfaceOption) isSettableSocketOption() {}
+
+// MembershipOption is used to identify a multicast membership on an interface.
 type MembershipOption struct {
 	NIC           NICID
 	InterfaceAddr Address
 	MulticastAddr Address
 }
 
-// AddMembershipOption is used by SetSockOpt/GetSockOpt to join a multicast
-// group identified by the given multicast address, on the interface matching
-// the given interface address.
+// AddMembershipOption identifies a multicast group to join on some interface.
 type AddMembershipOption MembershipOption
 
-// RemoveMembershipOption is used by SetSockOpt/GetSockOpt to leave a multicast
-// group identified by the given multicast address, on the interface matching
-// the given interface address.
+func (*AddMembershipOption) isSettableSocketOption() {}
+
+// RemoveMembershipOption identifies a multicast group to leave on some
+// interface.
 type RemoveMembershipOption MembershipOption
 
+func (*RemoveMembershipOption) isSettableSocketOption() {}
+
 // OutOfBandInlineOption is used by SetSockOpt/GetSockOpt to specify whether
 // TCP out-of-band data is delivered along with the normal in-band data.
 type OutOfBandInlineOption int
 
-// DefaultTTLOption is used by stack.(*Stack).NetworkProtocolOption to specify
-// a default TTL.
-type DefaultTTLOption uint8
+func (*OutOfBandInlineOption) isGettableSocketOption() {}
+
+func (*OutOfBandInlineOption) isSettableSocketOption() {}
 
 // SocketDetachFilterOption is used by SetSockOpt to detach a previously attached
 // classic BPF filter on a given endpoint.
 type SocketDetachFilterOption int
 
+func (*SocketDetachFilterOption) isSettableSocketOption() {}
+
 // OriginalDestinationOption is used to get the original destination address
 // and port of a redirected packet.
 type OriginalDestinationOption FullAddress
 
+func (*OriginalDestinationOption) isGettableSocketOption() {}
+
 // TCPTimeWaitReuseOption is used stack.(*Stack).TransportProtocolOption to
 // specify if the stack can reuse the port bound by an endpoint in TIME-WAIT for
 // new connections when it is safe from protocol viewpoint.
 type TCPTimeWaitReuseOption uint8
 
+func (*TCPTimeWaitReuseOption) isGettableSocketOption() {}
+
+func (*TCPTimeWaitReuseOption) isSettableSocketOption() {}
+
 const (
 	// TCPTimeWaitReuseDisabled indicates reuse of port bound by endponts in TIME-WAIT cannot
 	// be reused for new connections.
@@ -986,6 +1067,10 @@ type LingerOption struct {
 	Timeout time.Duration
 }
 
+func (*LingerOption) isGettableSocketOption() {}
+
+func (*LingerOption) isSettableSocketOption() {}
+
 // IPPacketInfo is the message structure for IP_PKTINFO.
 //
 // +stateify savable
diff --git a/pkg/tcpip/transport/icmp/endpoint.go b/pkg/tcpip/transport/icmp/endpoint.go
index c545c8367c..346ca4bdaa 100644
--- a/pkg/tcpip/transport/icmp/endpoint.go
+++ b/pkg/tcpip/transport/icmp/endpoint.go
@@ -343,9 +343,9 @@ func (e *endpoint) Peek([][]byte) (int64, tcpip.ControlMessages, *tcpip.Error) {
 }
 
 // SetSockOpt sets a socket option.
-func (e *endpoint) SetSockOpt(opt interface{}) *tcpip.Error {
+func (e *endpoint) SetSockOpt(opt tcpip.SettableSocketOption) *tcpip.Error {
 	switch opt.(type) {
-	case tcpip.SocketDetachFilterOption:
+	case *tcpip.SocketDetachFilterOption:
 		return nil
 	}
 	return nil
@@ -415,7 +415,7 @@ func (e *endpoint) GetSockOptInt(opt tcpip.SockOptInt) (int, *tcpip.Error) {
 }
 
 // GetSockOpt implements tcpip.Endpoint.GetSockOpt.
-func (*endpoint) GetSockOpt(interface{}) *tcpip.Error {
+func (*endpoint) GetSockOpt(tcpip.GettableSocketOption) *tcpip.Error {
 	return tcpip.ErrUnknownProtocolOption
 }
 
diff --git a/pkg/tcpip/transport/packet/endpoint.go b/pkg/tcpip/transport/packet/endpoint.go
index 95dc8ed578..81093e9ca6 100644
--- a/pkg/tcpip/transport/packet/endpoint.go
+++ b/pkg/tcpip/transport/packet/endpoint.go
@@ -297,9 +297,9 @@ func (ep *endpoint) Readiness(mask waiter.EventMask) waiter.EventMask {
 // SetSockOpt implements tcpip.Endpoint.SetSockOpt. Packet sockets cannot be
 // used with SetSockOpt, and this function always returns
 // tcpip.ErrNotSupported.
-func (ep *endpoint) SetSockOpt(opt interface{}) *tcpip.Error {
+func (ep *endpoint) SetSockOpt(opt tcpip.SettableSocketOption) *tcpip.Error {
 	switch opt.(type) {
-	case tcpip.SocketDetachFilterOption:
+	case *tcpip.SocketDetachFilterOption:
 		return nil
 
 	default:
@@ -366,7 +366,7 @@ func (ep *endpoint) LastError() *tcpip.Error {
 }
 
 // GetSockOpt implements tcpip.Endpoint.GetSockOpt.
-func (*endpoint) GetSockOpt(interface{}) *tcpip.Error {
+func (*endpoint) GetSockOpt(tcpip.GettableSocketOption) *tcpip.Error {
 	return tcpip.ErrNotSupported
 }
 
diff --git a/pkg/tcpip/transport/raw/endpoint.go b/pkg/tcpip/transport/raw/endpoint.go
index 2087bcfa81..71feeb748a 100644
--- a/pkg/tcpip/transport/raw/endpoint.go
+++ b/pkg/tcpip/transport/raw/endpoint.go
@@ -510,9 +510,9 @@ func (e *endpoint) Readiness(mask waiter.EventMask) waiter.EventMask {
 }
 
 // SetSockOpt implements tcpip.Endpoint.SetSockOpt.
-func (e *endpoint) SetSockOpt(opt interface{}) *tcpip.Error {
+func (e *endpoint) SetSockOpt(opt tcpip.SettableSocketOption) *tcpip.Error {
 	switch opt.(type) {
-	case tcpip.SocketDetachFilterOption:
+	case *tcpip.SocketDetachFilterOption:
 		return nil
 
 	default:
@@ -577,7 +577,7 @@ func (e *endpoint) SetSockOptInt(opt tcpip.SockOptInt, v int) *tcpip.Error {
 }
 
 // GetSockOpt implements tcpip.Endpoint.GetSockOpt.
-func (*endpoint) GetSockOpt(interface{}) *tcpip.Error {
+func (*endpoint) GetSockOpt(tcpip.GettableSocketOption) *tcpip.Error {
 	return tcpip.ErrUnknownProtocolOption
 }
 
diff --git a/pkg/tcpip/transport/tcp/endpoint.go b/pkg/tcpip/transport/tcp/endpoint.go
index 8a5e993b51..c5d9eba5dd 100644
--- a/pkg/tcpip/transport/tcp/endpoint.go
+++ b/pkg/tcpip/transport/tcp/endpoint.go
@@ -1736,10 +1736,10 @@ func (e *endpoint) SetSockOptInt(opt tcpip.SockOptInt, v int) *tcpip.Error {
 }
 
 // SetSockOpt sets a socket option.
-func (e *endpoint) SetSockOpt(opt interface{}) *tcpip.Error {
+func (e *endpoint) SetSockOpt(opt tcpip.SettableSocketOption) *tcpip.Error {
 	switch v := opt.(type) {
-	case tcpip.BindToDeviceOption:
-		id := tcpip.NICID(v)
+	case *tcpip.BindToDeviceOption:
+		id := tcpip.NICID(*v)
 		if id != 0 && !e.stack.HasNIC(id) {
 			return tcpip.ErrUnknownDevice
 		}
@@ -1747,27 +1747,27 @@ func (e *endpoint) SetSockOpt(opt interface{}) *tcpip.Error {
 		e.bindToDevice = id
 		e.UnlockUser()
 
-	case tcpip.KeepaliveIdleOption:
+	case *tcpip.KeepaliveIdleOption:
 		e.keepalive.Lock()
-		e.keepalive.idle = time.Duration(v)
+		e.keepalive.idle = time.Duration(*v)
 		e.keepalive.Unlock()
 		e.notifyProtocolGoroutine(notifyKeepaliveChanged)
 
-	case tcpip.KeepaliveIntervalOption:
+	case *tcpip.KeepaliveIntervalOption:
 		e.keepalive.Lock()
-		e.keepalive.interval = time.Duration(v)
+		e.keepalive.interval = time.Duration(*v)
 		e.keepalive.Unlock()
 		e.notifyProtocolGoroutine(notifyKeepaliveChanged)
 
-	case tcpip.OutOfBandInlineOption:
+	case *tcpip.OutOfBandInlineOption:
 		// We don't currently support disabling this option.
 
-	case tcpip.TCPUserTimeoutOption:
+	case *tcpip.TCPUserTimeoutOption:
 		e.LockUser()
-		e.userTimeout = time.Duration(v)
+		e.userTimeout = time.Duration(*v)
 		e.UnlockUser()
 
-	case tcpip.CongestionControlOption:
+	case *tcpip.CongestionControlOption:
 		// Query the available cc algorithms in the stack and
 		// validate that the specified algorithm is actually
 		// supported in the stack.
@@ -1777,10 +1777,10 @@ func (e *endpoint) SetSockOpt(opt interface{}) *tcpip.Error {
 		}
 		availCC := strings.Split(string(avail), " ")
 		for _, cc := range availCC {
-			if v == tcpip.CongestionControlOption(cc) {
+			if *v == tcpip.CongestionControlOption(cc) {
 				e.LockUser()
 				state := e.EndpointState()
-				e.cc = v
+				e.cc = *v
 				switch state {
 				case StateEstablished:
 					if e.EndpointState() == state {
@@ -1796,43 +1796,43 @@ func (e *endpoint) SetSockOpt(opt interface{}) *tcpip.Error {
 		// control algorithm is specified.
 		return tcpip.ErrNoSuchFile
 
-	case tcpip.TCPLingerTimeoutOption:
+	case *tcpip.TCPLingerTimeoutOption:
 		e.LockUser()
 
 		switch {
-		case v < 0:
+		case *v < 0:
 			// Same as effectively disabling TCPLinger timeout.
-			v = -1
-		case v == 0:
+			*v = -1
+		case *v == 0:
 			// Same as the stack default.
 			var stackLingerTimeout tcpip.TCPLingerTimeoutOption
 			if err := e.stack.TransportProtocolOption(ProtocolNumber, &stackLingerTimeout); err != nil {
 				panic(fmt.Sprintf("e.stack.TransportProtocolOption(%d, %+v) = %v", ProtocolNumber, &stackLingerTimeout, err))
 			}
-			v = stackLingerTimeout
-		case v > tcpip.TCPLingerTimeoutOption(MaxTCPLingerTimeout):
+			*v = stackLingerTimeout
+		case *v > tcpip.TCPLingerTimeoutOption(MaxTCPLingerTimeout):
 			// Cap it to Stack's default TCP_LINGER2 timeout.
-			v = tcpip.TCPLingerTimeoutOption(MaxTCPLingerTimeout)
+			*v = tcpip.TCPLingerTimeoutOption(MaxTCPLingerTimeout)
 		default:
 		}
 
-		e.tcpLingerTimeout = time.Duration(v)
+		e.tcpLingerTimeout = time.Duration(*v)
 		e.UnlockUser()
 
-	case tcpip.TCPDeferAcceptOption:
+	case *tcpip.TCPDeferAcceptOption:
 		e.LockUser()
-		if time.Duration(v) > MaxRTO {
-			v = tcpip.TCPDeferAcceptOption(MaxRTO)
+		if time.Duration(*v) > MaxRTO {
+			*v = tcpip.TCPDeferAcceptOption(MaxRTO)
 		}
-		e.deferAccept = time.Duration(v)
+		e.deferAccept = time.Duration(*v)
 		e.UnlockUser()
 
-	case tcpip.SocketDetachFilterOption:
+	case *tcpip.SocketDetachFilterOption:
 		return nil
 
-	case tcpip.LingerOption:
+	case *tcpip.LingerOption:
 		e.LockUser()
-		e.linger = v
+		e.linger = *v
 		e.UnlockUser()
 
 	default:
@@ -1993,7 +1993,7 @@ func (e *endpoint) GetSockOptInt(opt tcpip.SockOptInt) (int, *tcpip.Error) {
 }
 
 // GetSockOpt implements tcpip.Endpoint.GetSockOpt.
-func (e *endpoint) GetSockOpt(opt interface{}) *tcpip.Error {
+func (e *endpoint) GetSockOpt(opt tcpip.GettableSocketOption) *tcpip.Error {
 	switch o := opt.(type) {
 	case *tcpip.BindToDeviceOption:
 		e.LockUser()
diff --git a/pkg/tcpip/transport/tcp/tcp_test.go b/pkg/tcpip/transport/tcp/tcp_test.go
index 3d3034d509..adb32e4288 100644
--- a/pkg/tcpip/transport/tcp/tcp_test.go
+++ b/pkg/tcpip/transport/tcp/tcp_test.go
@@ -1349,7 +1349,9 @@ func TestConnectBindToDevice(t *testing.T) {
 
 			c.Create(-1)
 			bindToDevice := tcpip.BindToDeviceOption(test.device)
-			c.EP.SetSockOpt(bindToDevice)
+			if err := c.EP.SetSockOpt(&bindToDevice); err != nil {
+				t.Fatalf("c.EP.SetSockOpt(&%T(%d)): %s", bindToDevice, bindToDevice, err)
+			}
 			// Start connection attempt.
 			waitEntry, _ := waiter.NewChannelEntry(nil)
 			c.WQ.EventRegister(&waitEntry, waiter.EventOut)
@@ -4321,16 +4323,15 @@ func TestBindToDeviceOption(t *testing.T) {
 		t.Run(testAction.name, func(t *testing.T) {
 			if testAction.setBindToDevice != nil {
 				bindToDevice := tcpip.BindToDeviceOption(*testAction.setBindToDevice)
-				if gotErr, wantErr := ep.SetSockOpt(bindToDevice), testAction.setBindToDeviceError; gotErr != wantErr {
-					t.Errorf("SetSockOpt(%#v) got %v, want %v", bindToDevice, gotErr, wantErr)
+				if gotErr, wantErr := ep.SetSockOpt(&bindToDevice), testAction.setBindToDeviceError; gotErr != wantErr {
+					t.Errorf("got SetSockOpt(&%T(%d)) = %s, want = %s", bindToDevice, bindToDevice, gotErr, wantErr)
 				}
 			}
 			bindToDevice := tcpip.BindToDeviceOption(88888)
 			if err := ep.GetSockOpt(&bindToDevice); err != nil {
-				t.Errorf("GetSockOpt got %s, want %v", err, nil)
-			}
-			if got, want := bindToDevice, testAction.getBindToDevice; got != want {
-				t.Errorf("bindToDevice got %d, want %d", got, want)
+				t.Errorf("GetSockOpt(&%T): %s", bindToDevice, err)
+			} else if bindToDevice != testAction.getBindToDevice {
+				t.Errorf("got bindToDevice = %d, want %d", bindToDevice, testAction.getBindToDevice)
 			}
 		})
 	}
@@ -4806,20 +4807,20 @@ func TestEndpointSetCongestionControl(t *testing.T) {
 
 				var oldCC tcpip.CongestionControlOption
 				if err := c.EP.GetSockOpt(&oldCC); err != nil {
-					t.Fatalf("c.EP.SockOpt(%v) = %s", &oldCC, err)
+					t.Fatalf("c.EP.GetSockOpt(&%T) = %s", oldCC, err)
 				}
 
 				if connected {
 					c.Connect(789 /* iss */, 32768 /* rcvWnd */, nil)
 				}
 
-				if err := c.EP.SetSockOpt(tc.cc); err != tc.err {
-					t.Fatalf("c.EP.SetSockOpt(%v) = %s, want %s", tc.cc, err, tc.err)
+				if err := c.EP.SetSockOpt(&tc.cc); err != tc.err {
+					t.Fatalf("got c.EP.SetSockOpt(&%#v) = %s, want %s", tc.cc, err, tc.err)
 				}
 
 				var cc tcpip.CongestionControlOption
 				if err := c.EP.GetSockOpt(&cc); err != nil {
-					t.Fatalf("c.EP.SockOpt(%v) = %s", &cc, err)
+					t.Fatalf("c.EP.GetSockOpt(&%T): %s", cc, err)
 				}
 
 				got, want := cc, oldCC
@@ -4831,7 +4832,7 @@ func TestEndpointSetCongestionControl(t *testing.T) {
 					want = tc.cc
 				}
 				if got != want {
-					t.Fatalf("got congestion control: %v, want: %v", got, want)
+					t.Fatalf("got congestion control = %+v, want = %+v", got, want)
 				}
 			})
 		}
@@ -4852,11 +4853,23 @@ func TestKeepalive(t *testing.T) {
 
 	c.CreateConnected(789, 30000, -1 /* epRcvBuf */)
 
+	const keepAliveIdle = 100 * time.Millisecond
 	const keepAliveInterval = 3 * time.Second
-	c.EP.SetSockOpt(tcpip.KeepaliveIdleOption(100 * time.Millisecond))
-	c.EP.SetSockOpt(tcpip.KeepaliveIntervalOption(keepAliveInterval))
+	keepAliveIdleOpt := tcpip.KeepaliveIdleOption(keepAliveIdle)
+	if err := c.EP.SetSockOpt(&keepAliveIdleOpt); err != nil {
+		t.Fatalf("c.EP.SetSockOpt(&%T(%s)): %s", keepAliveIdleOpt, keepAliveIdle, err)
+	}
+	keepAliveIntervalOpt := tcpip.KeepaliveIntervalOption(keepAliveInterval)
+	if err := c.EP.SetSockOpt(&keepAliveIntervalOpt); err != nil {
+		t.Fatalf("c.EP.SetSockOpt(&%T(%s)): %s", keepAliveIntervalOpt, keepAliveInterval, err)
+	}
 	c.EP.SetSockOptInt(tcpip.KeepaliveCountOption, 5)
-	c.EP.SetSockOptBool(tcpip.KeepaliveEnabledOption, true)
+	if err := c.EP.SetSockOptInt(tcpip.KeepaliveCountOption, 5); err != nil {
+		t.Fatalf("c.EP.SetSockOptInt(tcpip.KeepaliveCountOption, 5): %s", err)
+	}
+	if err := c.EP.SetSockOptBool(tcpip.KeepaliveEnabledOption, true); err != nil {
+		t.Fatalf("c.EP.SetSockOptBool(tcpip.KeepaliveEnabledOption, true): %s", err)
+	}
 
 	// 5 unacked keepalives are sent. ACK each one, and check that the
 	// connection stays alive after 5.
@@ -6216,15 +6229,17 @@ func TestTCPLingerTimeout(t *testing.T) {
 	}
 	for _, tc := range testCases {
 		t.Run(tc.name, func(t *testing.T) {
-			if err := c.EP.SetSockOpt(tcpip.TCPLingerTimeoutOption(tc.tcpLingerTimeout)); err != nil {
-				t.Fatalf("SetSockOpt(%s) = %s", tc.tcpLingerTimeout, err)
+			v := tcpip.TCPLingerTimeoutOption(tc.tcpLingerTimeout)
+			if err := c.EP.SetSockOpt(&v); err != nil {
+				t.Fatalf("SetSockOpt(&%T(%s)) = %s", v, tc.tcpLingerTimeout, err)
 			}
-			var v tcpip.TCPLingerTimeoutOption
+
+			v = 0
 			if err := c.EP.GetSockOpt(&v); err != nil {
-				t.Fatalf("GetSockOpt(tcpip.TCPLingerTimeoutOption) = %s", err)
+				t.Fatalf("GetSockOpt(&%T) = %s", v, err)
 			}
 			if got, want := time.Duration(v), tc.want; got != want {
-				t.Fatalf("unexpected linger timeout got: %s, want: %s", got, want)
+				t.Fatalf("got linger timeout = %s, want = %s", got, want)
 			}
 		})
 	}
@@ -6941,7 +6956,10 @@ func TestTCPUserTimeout(t *testing.T) {
 	// expired.
 	initRTO := 1 * time.Second
 	userTimeout := initRTO / 2
-	c.EP.SetSockOpt(tcpip.TCPUserTimeoutOption(userTimeout))
+	v := tcpip.TCPUserTimeoutOption(userTimeout)
+	if err := c.EP.SetSockOpt(&v); err != nil {
+		t.Fatalf("c.EP.SetSockOpt(&%T(%s): %s", v, userTimeout, err)
+	}
 
 	// Send some data and wait before ACKing it.
 	view := buffer.NewView(3)
@@ -7015,18 +7033,31 @@ func TestKeepaliveWithUserTimeout(t *testing.T) {
 
 	origEstablishedTimedout := c.Stack().Stats().TCP.EstablishedTimedout.Value()
 
+	const keepAliveIdle = 100 * time.Millisecond
 	const keepAliveInterval = 3 * time.Second
-	c.EP.SetSockOpt(tcpip.KeepaliveIdleOption(100 * time.Millisecond))
-	c.EP.SetSockOpt(tcpip.KeepaliveIntervalOption(keepAliveInterval))
-	c.EP.SetSockOptInt(tcpip.KeepaliveCountOption, 10)
-	c.EP.SetSockOptBool(tcpip.KeepaliveEnabledOption, true)
+	keepAliveIdleOption := tcpip.KeepaliveIdleOption(keepAliveIdle)
+	if err := c.EP.SetSockOpt(&keepAliveIdleOption); err != nil {
+		t.Fatalf("c.EP.SetSockOpt(&%T(%s)): %s", keepAliveIdleOption, keepAliveIdle, err)
+	}
+	keepAliveIntervalOption := tcpip.KeepaliveIntervalOption(keepAliveInterval)
+	if err := c.EP.SetSockOpt(&keepAliveIntervalOption); err != nil {
+		t.Fatalf("c.EP.SetSockOpt(&%T(%s)): %s", keepAliveIntervalOption, keepAliveInterval, err)
+	}
+	if err := c.EP.SetSockOptInt(tcpip.KeepaliveCountOption, 10); err != nil {
+		t.Fatalf("c.EP.SetSockOptInt(tcpip.KeepaliveCountOption, 10): %s", err)
+	}
+	if err := c.EP.SetSockOptBool(tcpip.KeepaliveEnabledOption, true); err != nil {
+		t.Fatalf("c.EP.SetSockOptBool(tcpip.KeepaliveEnabledOption, true): %s", err)
+	}
 
 	// Set userTimeout to be the duration to be 1 keepalive
 	// probes. Which means that after the first probe is sent
 	// the second one should cause the connection to be
 	// closed due to userTimeout being hit.
-	userTimeout := 1 * keepAliveInterval
-	c.EP.SetSockOpt(tcpip.TCPUserTimeoutOption(userTimeout))
+	userTimeout := tcpip.TCPUserTimeoutOption(keepAliveInterval)
+	if err := c.EP.SetSockOpt(&userTimeout); err != nil {
+		t.Fatalf("c.EP.SetSockOpt(&%T(%s)): %s", userTimeout, keepAliveInterval, err)
+	}
 
 	// Check that the connection is still alive.
 	if _, _, err := c.EP.Read(nil); err != tcpip.ErrWouldBlock {
@@ -7233,8 +7264,9 @@ func TestTCPDeferAccept(t *testing.T) {
 	}
 
 	const tcpDeferAccept = 1 * time.Second
-	if err := c.EP.SetSockOpt(tcpip.TCPDeferAcceptOption(tcpDeferAccept)); err != nil {
-		t.Fatalf("c.EP.SetSockOpt(TCPDeferAcceptOption(%s) failed: %s", tcpDeferAccept, err)
+	tcpDeferAcceptOption := tcpip.TCPDeferAcceptOption(tcpDeferAccept)
+	if err := c.EP.SetSockOpt(&tcpDeferAcceptOption); err != nil {
+		t.Fatalf("c.EP.SetSockOpt(&%T(%s)): %s", tcpDeferAcceptOption, tcpDeferAccept, err)
 	}
 
 	irs, iss := executeHandshake(t, c, context.TestPort, false /* synCookiesInUse */)
@@ -7290,8 +7322,9 @@ func TestTCPDeferAcceptTimeout(t *testing.T) {
 	}
 
 	const tcpDeferAccept = 1 * time.Second
-	if err := c.EP.SetSockOpt(tcpip.TCPDeferAcceptOption(tcpDeferAccept)); err != nil {
-		t.Fatalf("c.EP.SetSockOpt(TCPDeferAcceptOption(%s) failed: %s", tcpDeferAccept, err)
+	tcpDeferAcceptOpt := tcpip.TCPDeferAcceptOption(tcpDeferAccept)
+	if err := c.EP.SetSockOpt(&tcpDeferAcceptOpt); err != nil {
+		t.Fatalf("c.EP.SetSockOpt(&%T(%s)) failed: %s", tcpDeferAcceptOpt, tcpDeferAccept, err)
 	}
 
 	irs, iss := executeHandshake(t, c, context.TestPort, false /* synCookiesInUse */)
diff --git a/pkg/tcpip/transport/udp/endpoint.go b/pkg/tcpip/transport/udp/endpoint.go
index 1d5ebe3f2b..c74bc4d946 100644
--- a/pkg/tcpip/transport/udp/endpoint.go
+++ b/pkg/tcpip/transport/udp/endpoint.go
@@ -683,9 +683,9 @@ func (e *endpoint) SetSockOptInt(opt tcpip.SockOptInt, v int) *tcpip.Error {
 }
 
 // SetSockOpt implements tcpip.Endpoint.SetSockOpt.
-func (e *endpoint) SetSockOpt(opt interface{}) *tcpip.Error {
+func (e *endpoint) SetSockOpt(opt tcpip.SettableSocketOption) *tcpip.Error {
 	switch v := opt.(type) {
-	case tcpip.MulticastInterfaceOption:
+	case *tcpip.MulticastInterfaceOption:
 		e.mu.Lock()
 		defer e.mu.Unlock()
 
@@ -721,7 +721,7 @@ func (e *endpoint) SetSockOpt(opt interface{}) *tcpip.Error {
 		e.multicastNICID = nic
 		e.multicastAddr = addr
 
-	case tcpip.AddMembershipOption:
+	case *tcpip.AddMembershipOption:
 		if !header.IsV4MulticastAddress(v.MulticastAddr) && !header.IsV6MulticastAddress(v.MulticastAddr) {
 			return tcpip.ErrInvalidOptionValue
 		}
@@ -764,7 +764,7 @@ func (e *endpoint) SetSockOpt(opt interface{}) *tcpip.Error {
 
 		e.multicastMemberships = append(e.multicastMemberships, memToInsert)
 
-	case tcpip.RemoveMembershipOption:
+	case *tcpip.RemoveMembershipOption:
 		if !header.IsV4MulticastAddress(v.MulticastAddr) && !header.IsV6MulticastAddress(v.MulticastAddr) {
 			return tcpip.ErrInvalidOptionValue
 		}
@@ -808,8 +808,8 @@ func (e *endpoint) SetSockOpt(opt interface{}) *tcpip.Error {
 		e.multicastMemberships[memToRemoveIndex] = e.multicastMemberships[len(e.multicastMemberships)-1]
 		e.multicastMemberships = e.multicastMemberships[:len(e.multicastMemberships)-1]
 
-	case tcpip.BindToDeviceOption:
-		id := tcpip.NICID(v)
+	case *tcpip.BindToDeviceOption:
+		id := tcpip.NICID(*v)
 		if id != 0 && !e.stack.HasNIC(id) {
 			return tcpip.ErrUnknownDevice
 		}
@@ -817,7 +817,7 @@ func (e *endpoint) SetSockOpt(opt interface{}) *tcpip.Error {
 		e.bindToDevice = id
 		e.mu.Unlock()
 
-	case tcpip.SocketDetachFilterOption:
+	case *tcpip.SocketDetachFilterOption:
 		return nil
 	}
 	return nil
@@ -960,7 +960,7 @@ func (e *endpoint) GetSockOptInt(opt tcpip.SockOptInt) (int, *tcpip.Error) {
 }
 
 // GetSockOpt implements tcpip.Endpoint.GetSockOpt.
-func (e *endpoint) GetSockOpt(opt interface{}) *tcpip.Error {
+func (e *endpoint) GetSockOpt(opt tcpip.GettableSocketOption) *tcpip.Error {
 	switch o := opt.(type) {
 	case *tcpip.MulticastInterfaceOption:
 		e.mu.Lock()
diff --git a/pkg/tcpip/transport/udp/udp_test.go b/pkg/tcpip/transport/udp/udp_test.go
index bd1c8ac318..0cbc045d85 100644
--- a/pkg/tcpip/transport/udp/udp_test.go
+++ b/pkg/tcpip/transport/udp/udp_test.go
@@ -539,7 +539,7 @@ func TestBindToDeviceOption(t *testing.T) {
 
 	opts := stack.NICOptions{Name: "my_device"}
 	if err := s.CreateNICWithOptions(321, loopback.New(), opts); err != nil {
-		t.Errorf("CreateNICWithOptions(_, _, %+v) failed: %v", opts, err)
+		t.Errorf("CreateNICWithOptions(_, _, %+v) failed: %s", opts, err)
 	}
 
 	// nicIDPtr is used instead of taking the address of NICID literals, which is
@@ -563,16 +563,15 @@ func TestBindToDeviceOption(t *testing.T) {
 		t.Run(testAction.name, func(t *testing.T) {
 			if testAction.setBindToDevice != nil {
 				bindToDevice := tcpip.BindToDeviceOption(*testAction.setBindToDevice)
-				if gotErr, wantErr := ep.SetSockOpt(bindToDevice), testAction.setBindToDeviceError; gotErr != wantErr {
-					t.Errorf("SetSockOpt(%v) got %v, want %v", bindToDevice, gotErr, wantErr)
+				if gotErr, wantErr := ep.SetSockOpt(&bindToDevice), testAction.setBindToDeviceError; gotErr != wantErr {
+					t.Errorf("got SetSockOpt(&%T(%d)) = %s, want = %s", bindToDevice, bindToDevice, gotErr, wantErr)
 				}
 			}
 			bindToDevice := tcpip.BindToDeviceOption(88888)
 			if err := ep.GetSockOpt(&bindToDevice); err != nil {
-				t.Errorf("GetSockOpt got %v, want %v", err, nil)
-			}
-			if got, want := bindToDevice, testAction.getBindToDevice; got != want {
-				t.Errorf("bindToDevice got %d, want %d", got, want)
+				t.Errorf("GetSockOpt(&%T): %s", bindToDevice, err)
+			} else if bindToDevice != testAction.getBindToDevice {
+				t.Errorf("got bindToDevice = %d, want = %d", bindToDevice, testAction.getBindToDevice)
 			}
 		})
 	}
@@ -628,12 +627,12 @@ func testReadInternal(c *testContext, flow testFlow, packetShouldBeDropped, expe
 	// Check the peer address.
 	h := flow.header4Tuple(incoming)
 	if addr.Addr != h.srcAddr.Addr {
-		c.t.Fatalf("unexpected remote address: got %s, want %v", addr.Addr, h.srcAddr)
+		c.t.Fatalf("got address = %s, want = %s", addr.Addr, h.srcAddr.Addr)
 	}
 
 	// Check the payload.
 	if !bytes.Equal(payload, v) {
-		c.t.Fatalf("bad payload: got %x, want %x", v, payload)
+		c.t.Fatalf("got payload = %x, want = %x", v, payload)
 	}
 
 	// Run any checkers against the ControlMessages.
@@ -694,7 +693,7 @@ func TestBindReservedPort(t *testing.T) {
 		}
 		defer ep.Close()
 		if got, want := ep.Bind(addr), tcpip.ErrPortInUse; got != want {
-			t.Fatalf("got ep.Bind(...) = %v, want = %v", got, want)
+			t.Fatalf("got ep.Bind(...) = %s, want = %s", got, want)
 		}
 	}
 
@@ -707,7 +706,7 @@ func TestBindReservedPort(t *testing.T) {
 		// We can't bind ipv4-any on the port reserved by the connected endpoint
 		// above, since the endpoint is dual-stack.
 		if got, want := ep.Bind(tcpip.FullAddress{Port: addr.Port}), tcpip.ErrPortInUse; got != want {
-			t.Fatalf("got ep.Bind(...) = %v, want = %v", got, want)
+			t.Fatalf("got ep.Bind(...) = %s, want = %s", got, want)
 		}
 		// We can bind an ipv4 address on this port, though.
 		if err := ep.Bind(tcpip.FullAddress{Addr: stackAddr, Port: addr.Port}); err != nil {
@@ -830,7 +829,7 @@ func TestV4ReadSelfSource(t *testing.T) {
 			}
 
 			if _, _, err := c.ep.Read(nil); err != tt.wantErr {
-				t.Errorf("c.ep.Read() got error %v, want %v", err, tt.wantErr)
+				t.Errorf("got c.ep.Read(nil) = %s, want = %s", err, tt.wantErr)
 			}
 		})
 	}
@@ -871,8 +870,8 @@ func TestReadOnBoundToMulticast(t *testing.T) {
 
 			// Join multicast group.
 			ifoptSet := tcpip.AddMembershipOption{NIC: 1, MulticastAddr: mcastAddr}
-			if err := c.ep.SetSockOpt(ifoptSet); err != nil {
-				c.t.Fatal("SetSockOpt failed:", err)
+			if err := c.ep.SetSockOpt(&ifoptSet); err != nil {
+				c.t.Fatalf("SetSockOpt(&%#v): %s", ifoptSet, err)
 			}
 
 			// Check that we receive multicast packets but not unicast or broadcast
@@ -1403,8 +1402,8 @@ func TestReadIPPacketInfo(t *testing.T) {
 
 			if test.flow.isMulticast() {
 				ifoptSet := tcpip.AddMembershipOption{NIC: 1, MulticastAddr: test.flow.getMcastAddr()}
-				if err := c.ep.SetSockOpt(ifoptSet); err != nil {
-					c.t.Fatalf("SetSockOpt(%+v): %s:", ifoptSet, err)
+				if err := c.ep.SetSockOpt(&ifoptSet); err != nil {
+					c.t.Fatalf("SetSockOpt(&%#v): %s:", ifoptSet, err)
 				}
 			}
 
@@ -1547,7 +1546,7 @@ func TestSetTOS(t *testing.T) {
 			}
 			// Test for expected default value.
 			if v != 0 {
-				c.t.Errorf("got GetSockOpt(IPv4TOSOption) = 0x%x, want = 0x%x", v, 0)
+				c.t.Errorf("got GetSockOptInt(IPv4TOSOption) = 0x%x, want = 0x%x", v, 0)
 			}
 
 			if err := c.ep.SetSockOptInt(tcpip.IPv4TOSOption, tos); err != nil {
@@ -1708,19 +1707,17 @@ func TestMulticastInterfaceOption(t *testing.T) {
 								}
 							}
 
-							if err := c.ep.SetSockOpt(ifoptSet); err != nil {
-								c.t.Fatalf("SetSockOpt failed: %s", err)
+							if err := c.ep.SetSockOpt(&ifoptSet); err != nil {
+								c.t.Fatalf("SetSockOpt(&%#v): %s", ifoptSet, err)
 							}
 
 							// Verify multicast interface addr and NIC were set correctly.
 							// Note that NIC must be 1 since this is our outgoing interface.
-							ifoptWant := tcpip.MulticastInterfaceOption{NIC: 1, InterfaceAddr: ifoptSet.InterfaceAddr}
 							var ifoptGot tcpip.MulticastInterfaceOption
 							if err := c.ep.GetSockOpt(&ifoptGot); err != nil {
-								c.t.Fatalf("GetSockOpt failed: %s", err)
-							}
-							if ifoptGot != ifoptWant {
-								c.t.Errorf("got GetSockOpt() = %#v, want = %#v", ifoptGot, ifoptWant)
+								c.t.Fatalf("GetSockOpt(&%T): %s", ifoptGot, err)
+							} else if ifoptWant := (tcpip.MulticastInterfaceOption{NIC: 1, InterfaceAddr: ifoptSet.InterfaceAddr}); ifoptGot != ifoptWant {
+								c.t.Errorf("got multicast interface option = %#v, want = %#v", ifoptGot, ifoptWant)
 							}
 						})
 					}

From e9b5fda2f1d44a50d67ae3c30400f9b05048fc9d Mon Sep 17 00:00:00 2001
From: Ayush Ranjan <ayushranjan@google.com>
Date: Thu, 27 Aug 2020 16:28:36 -0700
Subject: [PATCH 102/211] [go-marshal] Support for usermem.IOOpts.

PiperOrigin-RevId: 328839759
---
 pkg/sentry/kernel/task_usermem.go | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/pkg/sentry/kernel/task_usermem.go b/pkg/sentry/kernel/task_usermem.go
index 4550b9f89a..0cb86e3900 100644
--- a/pkg/sentry/kernel/task_usermem.go
+++ b/pkg/sentry/kernel/task_usermem.go
@@ -301,3 +301,30 @@ func (t *Task) IovecsIOSequence(addr usermem.Addr, iovcnt int, opts usermem.IOOp
 		Opts:  opts,
 	}, nil
 }
+
+// CopyContextWithOpts wraps a task to allow copying memory to and from the
+// task memory with user specified usermem.IOOpts.
+type CopyContextWithOpts struct {
+	*Task
+	opts usermem.IOOpts
+}
+
+// AsCopyContextWithOpts wraps the task and returns it as CopyContextWithOpts.
+func (t *Task) AsCopyContextWithOpts(opts usermem.IOOpts) *CopyContextWithOpts {
+	return &CopyContextWithOpts{t, opts}
+}
+
+// CopyInString copies a string in from the task's memory.
+func (t *CopyContextWithOpts) CopyInString(addr usermem.Addr, maxLen int) (string, error) {
+	return usermem.CopyStringIn(t, t.MemoryManager(), addr, maxLen, t.opts)
+}
+
+// CopyInBytes copies task memory into dst from an IO context.
+func (t *CopyContextWithOpts) CopyInBytes(addr usermem.Addr, dst []byte) (int, error) {
+	return t.MemoryManager().CopyIn(t, addr, dst, t.opts)
+}
+
+// CopyOutBytes copies src into task memoryfrom an IO context.
+func (t *CopyContextWithOpts) CopyOutBytes(addr usermem.Addr, src []byte) (int, error) {
+	return t.MemoryManager().CopyOut(t, addr, src, t.opts)
+}

From 05166f14c93323d6279987ae3fe9a803ad188ade Mon Sep 17 00:00:00 2001
From: Andrei Vagin <avagin@google.com>
Date: Thu, 27 Aug 2020 16:50:11 -0700
Subject: [PATCH 103/211] unix: return ECONNREFUSE if a socket file exists but
 a socket isn't bound to it

PiperOrigin-RevId: 328843560
---
 pkg/sentry/fsimpl/gofer/filesystem.go |  4 +++-
 pkg/sentry/fsimpl/tmpfs/filesystem.go |  3 +++
 test/syscalls/linux/mknod.cc          | 19 +++++++++++++++++++
 3 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/pkg/sentry/fsimpl/gofer/filesystem.go b/pkg/sentry/fsimpl/gofer/filesystem.go
index 4d581fc299..5d0f487db6 100644
--- a/pkg/sentry/fsimpl/gofer/filesystem.go
+++ b/pkg/sentry/fsimpl/gofer/filesystem.go
@@ -1512,7 +1512,9 @@ func (fs *filesystem) BoundEndpointAt(ctx context.Context, rp *vfs.ResolvingPath
 				path:   opts.Addr,
 			}, nil
 		}
-		return d.endpoint, nil
+		if d.endpoint != nil {
+			return d.endpoint, nil
+		}
 	}
 	return nil, syserror.ECONNREFUSED
 }
diff --git a/pkg/sentry/fsimpl/tmpfs/filesystem.go b/pkg/sentry/fsimpl/tmpfs/filesystem.go
index eddfeab768..e0de04e05c 100644
--- a/pkg/sentry/fsimpl/tmpfs/filesystem.go
+++ b/pkg/sentry/fsimpl/tmpfs/filesystem.go
@@ -783,6 +783,9 @@ func (fs *filesystem) BoundEndpointAt(ctx context.Context, rp *vfs.ResolvingPath
 	}
 	switch impl := d.inode.impl.(type) {
 	case *socketFile:
+		if impl.ep == nil {
+			return nil, syserror.ECONNREFUSED
+		}
 		return impl.ep, nil
 	default:
 		return nil, syserror.ECONNREFUSED
diff --git a/test/syscalls/linux/mknod.cc b/test/syscalls/linux/mknod.cc
index 05dfb375a5..2ba8c11b8c 100644
--- a/test/syscalls/linux/mknod.cc
+++ b/test/syscalls/linux/mknod.cc
@@ -14,6 +14,7 @@
 
 #include <errno.h>
 #include <fcntl.h>
+#include <sys/socket.h>
 #include <sys/stat.h>
 #include <sys/types.h>
 #include <sys/un.h>
@@ -103,6 +104,24 @@ TEST(MknodTest, UnimplementedTypesReturnError) {
   ASSERT_THAT(mknod(path.c_str(), S_IFBLK, 0), SyscallFailsWithErrno(EPERM));
 }
 
+TEST(MknodTest, Socket) {
+  ASSERT_THAT(chdir(GetAbsoluteTestTmpdir().c_str()), SyscallSucceeds());
+
+  SKIP_IF(IsRunningOnGvisor() && IsRunningWithVFS1());
+
+  ASSERT_THAT(mknod("./file0", S_IFSOCK | S_IRUSR | S_IWUSR, 0),
+              SyscallSucceeds());
+
+  int sk;
+  ASSERT_THAT(sk = socket(AF_UNIX, SOCK_SEQPACKET, 0), SyscallSucceeds());
+  FileDescriptor fd(sk);
+
+  struct sockaddr_un addr = {.sun_family = AF_UNIX};
+  absl::SNPrintF(addr.sun_path, sizeof(addr.sun_path), "./file0");
+  ASSERT_THAT(connect(sk, (struct sockaddr *)&addr, sizeof(addr)),
+              SyscallFailsWithErrno(ECONNREFUSED));
+}
+
 TEST(MknodTest, Fifo) {
   const std::string fifo = NewTempAbsPath();
   ASSERT_THAT(mknod(fifo.c_str(), S_IFIFO | S_IRUSR | S_IWUSR, 0),

From bb089f9c9075a78e8bde7ff946bac77badc08894 Mon Sep 17 00:00:00 2001
From: Dean Deng <deandeng@google.com>
Date: Thu, 27 Aug 2020 16:52:21 -0700
Subject: [PATCH 104/211] Fix vfs2 pipe behavior when splicing to a non-pipe.

Fixes *.sh Java runtime tests, where splice()-ing from a pipe to /dev/zero
would not actually empty the pipe.

There was no guarantee that the data would actually be consumed on a splice
operation unless the output file's implementation of Write/PWrite actually
called VFSPipeFD.CopyIn. Now, whatever bytes are "written" are consumed
regardless of whether CopyIn is called or not.

Furthermore, the number of bytes in the IOSequence for reads is now capped at
the amount of data actually available. Before, splicing to /dev/zero would
always return the requested splice size without taking the actual available
data into account.

This change also refactors the case where an input file is spliced into an
output pipe so that it follows a similar pattern, which is arguably cleaner
anyway.

Updates #3576.

PiperOrigin-RevId: 328843954
---
 pkg/buffer/BUILD                         |  2 +
 pkg/sentry/kernel/pipe/pipe.go           | 14 ++++--
 pkg/sentry/kernel/pipe/vfs.go            | 63 ++++++++++++++++++-----
 pkg/sentry/syscalls/linux/vfs2/splice.go | 35 ++++++-------
 test/syscalls/linux/sendfile.cc          | 36 ++++++++++++-
 test/syscalls/linux/splice.cc            | 64 +++++++++++++++++++++++-
 6 files changed, 174 insertions(+), 40 deletions(-)

diff --git a/pkg/buffer/BUILD b/pkg/buffer/BUILD
index dcd0862984..b03d46d18b 100644
--- a/pkg/buffer/BUILD
+++ b/pkg/buffer/BUILD
@@ -26,8 +26,10 @@ go_library(
     ],
     visibility = ["//visibility:public"],
     deps = [
+        "//pkg/context",
         "//pkg/log",
         "//pkg/safemem",
+        "//pkg/usermem",
     ],
 )
 
diff --git a/pkg/sentry/kernel/pipe/pipe.go b/pkg/sentry/kernel/pipe/pipe.go
index 297e8f28fb..c410c96aaf 100644
--- a/pkg/sentry/kernel/pipe/pipe.go
+++ b/pkg/sentry/kernel/pipe/pipe.go
@@ -200,17 +200,17 @@ type readOps struct {
 //
 // Precondition: this pipe must have readers.
 func (p *Pipe) read(ctx context.Context, ops readOps) (int64, error) {
-	// Don't block for a zero-length read even if the pipe is empty.
-	if ops.left() == 0 {
-		return 0, nil
-	}
-
 	p.mu.Lock()
 	defer p.mu.Unlock()
 	return p.readLocked(ctx, ops)
 }
 
 func (p *Pipe) readLocked(ctx context.Context, ops readOps) (int64, error) {
+	// Don't block for a zero-length read even if the pipe is empty.
+	if ops.left() == 0 {
+		return 0, nil
+	}
+
 	// Is the pipe empty?
 	if p.view.Size() == 0 {
 		if !p.HasWriters() {
@@ -388,6 +388,10 @@ func (p *Pipe) rwReadiness() waiter.EventMask {
 func (p *Pipe) queued() int64 {
 	p.mu.Lock()
 	defer p.mu.Unlock()
+	return p.queuedLocked()
+}
+
+func (p *Pipe) queuedLocked() int64 {
 	return p.view.Size()
 }
 
diff --git a/pkg/sentry/kernel/pipe/vfs.go b/pkg/sentry/kernel/pipe/vfs.go
index 28f998e45d..f223d59e19 100644
--- a/pkg/sentry/kernel/pipe/vfs.go
+++ b/pkg/sentry/kernel/pipe/vfs.go
@@ -244,19 +244,57 @@ func (fd *VFSPipeFD) SetPipeSize(size int64) (int64, error) {
 	return fd.pipe.SetFifoSize(size)
 }
 
-// IOSequence returns a useremm.IOSequence that reads up to count bytes from,
-// or writes up to count bytes to, fd.
-func (fd *VFSPipeFD) IOSequence(count int64) usermem.IOSequence {
-	return usermem.IOSequence{
+// SpliceToNonPipe performs a splice operation from fd to a non-pipe file.
+func (fd *VFSPipeFD) SpliceToNonPipe(ctx context.Context, out *vfs.FileDescription, off, count int64) (int64, error) {
+	fd.pipe.mu.Lock()
+	defer fd.pipe.mu.Unlock()
+
+	// Cap the sequence at number of bytes actually available.
+	v := fd.pipe.queuedLocked()
+	if v < count {
+		count = v
+	}
+	src := usermem.IOSequence{
 		IO:    fd,
 		Addrs: usermem.AddrRangeSeqOf(usermem.AddrRange{0, usermem.Addr(count)}),
 	}
+
+	var (
+		n   int64
+		err error
+	)
+	if off == -1 {
+		n, err = out.Write(ctx, src, vfs.WriteOptions{})
+	} else {
+		n, err = out.PWrite(ctx, src, off, vfs.WriteOptions{})
+	}
+	if n > 0 {
+		fd.pipe.view.TrimFront(n)
+	}
+	return n, err
+}
+
+// SpliceFromNonPipe performs a splice operation from a non-pipe file to fd.
+func (fd *VFSPipeFD) SpliceFromNonPipe(ctx context.Context, in *vfs.FileDescription, off, count int64) (int64, error) {
+	fd.pipe.mu.Lock()
+	defer fd.pipe.mu.Unlock()
+
+	dst := usermem.IOSequence{
+		IO:    fd,
+		Addrs: usermem.AddrRangeSeqOf(usermem.AddrRange{0, usermem.Addr(count)}),
+	}
+
+	if off == -1 {
+		return in.Read(ctx, dst, vfs.ReadOptions{})
+	}
+	return in.PRead(ctx, dst, off, vfs.ReadOptions{})
 }
 
-// CopyIn implements usermem.IO.CopyIn.
+// CopyIn implements usermem.IO.CopyIn. Note that it is the caller's
+// responsibility to trim fd.pipe.view after the read is completed.
 func (fd *VFSPipeFD) CopyIn(ctx context.Context, addr usermem.Addr, dst []byte, opts usermem.IOOpts) (int, error) {
 	origCount := int64(len(dst))
-	n, err := fd.pipe.read(ctx, readOps{
+	n, err := fd.pipe.readLocked(ctx, readOps{
 		left: func() int64 {
 			return int64(len(dst))
 		},
@@ -265,7 +303,6 @@ func (fd *VFSPipeFD) CopyIn(ctx context.Context, addr usermem.Addr, dst []byte,
 		},
 		read: func(view *buffer.View) (int64, error) {
 			n, err := view.ReadAt(dst, 0)
-			view.TrimFront(int64(n))
 			return int64(n), err
 		},
 	})
@@ -281,7 +318,7 @@ func (fd *VFSPipeFD) CopyIn(ctx context.Context, addr usermem.Addr, dst []byte,
 // CopyOut implements usermem.IO.CopyOut.
 func (fd *VFSPipeFD) CopyOut(ctx context.Context, addr usermem.Addr, src []byte, opts usermem.IOOpts) (int, error) {
 	origCount := int64(len(src))
-	n, err := fd.pipe.write(ctx, writeOps{
+	n, err := fd.pipe.writeLocked(ctx, writeOps{
 		left: func() int64 {
 			return int64(len(src))
 		},
@@ -305,7 +342,7 @@ func (fd *VFSPipeFD) CopyOut(ctx context.Context, addr usermem.Addr, src []byte,
 // ZeroOut implements usermem.IO.ZeroOut.
 func (fd *VFSPipeFD) ZeroOut(ctx context.Context, addr usermem.Addr, toZero int64, opts usermem.IOOpts) (int64, error) {
 	origCount := toZero
-	n, err := fd.pipe.write(ctx, writeOps{
+	n, err := fd.pipe.writeLocked(ctx, writeOps{
 		left: func() int64 {
 			return toZero
 		},
@@ -326,14 +363,15 @@ func (fd *VFSPipeFD) ZeroOut(ctx context.Context, addr usermem.Addr, toZero int6
 	return n, err
 }
 
-// CopyInTo implements usermem.IO.CopyInTo.
+// CopyInTo implements usermem.IO.CopyInTo. Note that it is the caller's
+// responsibility to trim fd.pipe.view after the read is completed.
 func (fd *VFSPipeFD) CopyInTo(ctx context.Context, ars usermem.AddrRangeSeq, dst safemem.Writer, opts usermem.IOOpts) (int64, error) {
 	count := ars.NumBytes()
 	if count == 0 {
 		return 0, nil
 	}
 	origCount := count
-	n, err := fd.pipe.read(ctx, readOps{
+	n, err := fd.pipe.readLocked(ctx, readOps{
 		left: func() int64 {
 			return count
 		},
@@ -342,7 +380,6 @@ func (fd *VFSPipeFD) CopyInTo(ctx context.Context, ars usermem.AddrRangeSeq, dst
 		},
 		read: func(view *buffer.View) (int64, error) {
 			n, err := view.ReadToSafememWriter(dst, uint64(count))
-			view.TrimFront(int64(n))
 			return int64(n), err
 		},
 	})
@@ -362,7 +399,7 @@ func (fd *VFSPipeFD) CopyOutFrom(ctx context.Context, ars usermem.AddrRangeSeq,
 		return 0, nil
 	}
 	origCount := count
-	n, err := fd.pipe.write(ctx, writeOps{
+	n, err := fd.pipe.writeLocked(ctx, writeOps{
 		left: func() int64 {
 			return count
 		},
diff --git a/pkg/sentry/syscalls/linux/vfs2/splice.go b/pkg/sentry/syscalls/linux/vfs2/splice.go
index 75bfa2c79e..1924113934 100644
--- a/pkg/sentry/syscalls/linux/vfs2/splice.go
+++ b/pkg/sentry/syscalls/linux/vfs2/splice.go
@@ -131,18 +131,14 @@ func Splice(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal
 		case inIsPipe && outIsPipe:
 			n, err = pipe.Splice(t, outPipeFD, inPipeFD, count)
 		case inIsPipe:
+			n, err = inPipeFD.SpliceToNonPipe(t, outFile, outOffset, count)
 			if outOffset != -1 {
-				n, err = outFile.PWrite(t, inPipeFD.IOSequence(count), outOffset, vfs.WriteOptions{})
 				outOffset += n
-			} else {
-				n, err = outFile.Write(t, inPipeFD.IOSequence(count), vfs.WriteOptions{})
 			}
 		case outIsPipe:
+			n, err = outPipeFD.SpliceFromNonPipe(t, inFile, inOffset, count)
 			if inOffset != -1 {
-				n, err = inFile.PRead(t, outPipeFD.IOSequence(count), inOffset, vfs.ReadOptions{})
 				inOffset += n
-			} else {
-				n, err = inFile.Read(t, outPipeFD.IOSequence(count), vfs.ReadOptions{})
 			}
 		default:
 			panic("not possible")
@@ -341,17 +337,15 @@ func Sendfile(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc
 	if outIsPipe {
 		for n < count {
 			var spliceN int64
-			if offset != -1 {
-				spliceN, err = inFile.PRead(t, outPipeFD.IOSequence(count), offset, vfs.ReadOptions{})
-				offset += spliceN
-			} else {
-				spliceN, err = inFile.Read(t, outPipeFD.IOSequence(count), vfs.ReadOptions{})
-			}
+			spliceN, err = outPipeFD.SpliceFromNonPipe(t, inFile, offset, count)
 			if spliceN == 0 && err == io.EOF {
 				// We reached the end of the file. Eat the error and exit the loop.
 				err = nil
 				break
 			}
+			if offset != -1 {
+				offset += spliceN
+			}
 			n += spliceN
 			if err == syserror.ErrWouldBlock && !nonBlock {
 				err = dw.waitForBoth(t)
@@ -371,19 +365,18 @@ func Sendfile(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc
 			} else {
 				readN, err = inFile.Read(t, usermem.BytesIOSequence(buf), vfs.ReadOptions{})
 			}
-			if readN == 0 && err == io.EOF {
-				// We reached the end of the file. Eat the error and exit the loop.
-				err = nil
+			if readN == 0 && err != nil {
+				if err == io.EOF {
+					// We reached the end of the file. Eat the error before exiting the loop.
+					err = nil
+				}
 				break
 			}
 			n += readN
-			if err != nil {
-				break
-			}
 
 			// Write all of the bytes that we read. This may need
 			// multiple write calls to complete.
-			wbuf := buf[:n]
+			wbuf := buf[:readN]
 			for len(wbuf) > 0 {
 				var writeN int64
 				writeN, err = outFile.Write(t, usermem.BytesIOSequence(wbuf), vfs.WriteOptions{})
@@ -398,6 +391,10 @@ func Sendfile(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc
 					notWritten := int64(len(wbuf))
 					n -= notWritten
 					if offset != -1 {
+						// TODO(gvisor.dev/issue/3779): The inFile offset will be incorrect if we
+						// roll back, because it has already been advanced by the full amount.
+						// Merely seeking on inFile does not work, because there may be concurrent
+						// file operations.
 						offset -= notWritten
 					}
 					break
diff --git a/test/syscalls/linux/sendfile.cc b/test/syscalls/linux/sendfile.cc
index 64123e9043..e65387f599 100644
--- a/test/syscalls/linux/sendfile.cc
+++ b/test/syscalls/linux/sendfile.cc
@@ -198,7 +198,39 @@ TEST(SendFileTest, SendAndUpdateFileOffset) {
   EXPECT_EQ(absl::string_view(kData, kHalfDataSize),
             absl::string_view(actual, bytes_sent));
 
-  // Verify that the input file offset has been updated
+  // Verify that the input file offset has been updated.
+  ASSERT_THAT(read(inf.get(), &actual, kDataSize - bytes_sent),
+              SyscallSucceedsWithValue(kHalfDataSize));
+  EXPECT_EQ(
+      absl::string_view(kData + kDataSize - bytes_sent, kDataSize - bytes_sent),
+      absl::string_view(actual, kHalfDataSize));
+}
+
+TEST(SendFileTest, SendToDevZeroAndUpdateFileOffset) {
+  // Create temp files.
+  // Test input string length must be > 2 AND even.
+  constexpr char kData[] = "The slings and arrows of outrageous fortune,";
+  constexpr int kDataSize = sizeof(kData) - 1;
+  constexpr int kHalfDataSize = kDataSize / 2;
+  const TempPath in_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+      GetAbsoluteTestTmpdir(), kData, TempPath::kDefaultFileMode));
+
+  // Open the input file as read only.
+  const FileDescriptor inf =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(in_file.path(), O_RDONLY));
+
+  // Open /dev/zero as write only.
+  const FileDescriptor outf =
+      ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_WRONLY));
+
+  // Send data and verify that sendfile returns the correct value.
+  int bytes_sent;
+  EXPECT_THAT(
+      bytes_sent = sendfile(outf.get(), inf.get(), nullptr, kHalfDataSize),
+      SyscallSucceedsWithValue(kHalfDataSize));
+
+  char actual[kHalfDataSize];
+  // Verify that the input file offset has been updated.
   ASSERT_THAT(read(inf.get(), &actual, kDataSize - bytes_sent),
               SyscallSucceedsWithValue(kHalfDataSize));
   EXPECT_EQ(
@@ -250,7 +282,7 @@ TEST(SendFileTest, SendAndUpdateFileOffsetFromNonzeroStartingPoint) {
   EXPECT_EQ(absl::string_view(kData + kQuarterDataSize, kHalfDataSize),
             absl::string_view(actual, bytes_sent));
 
-  // Verify that the input file offset has been updated
+  // Verify that the input file offset has been updated.
   ASSERT_THAT(read(inf.get(), &actual, kQuarterDataSize),
               SyscallSucceedsWithValue(kQuarterDataSize));
 
diff --git a/test/syscalls/linux/splice.cc b/test/syscalls/linux/splice.cc
index 08fc4b1b7a..be3fb48402 100644
--- a/test/syscalls/linux/splice.cc
+++ b/test/syscalls/linux/splice.cc
@@ -342,7 +342,7 @@ TEST(SpliceTest, FromPipe) {
   ASSERT_THAT(write(wfd.get(), buf.data(), buf.size()),
               SyscallSucceedsWithValue(kPageSize));
 
-  // Open the input file.
+  // Open the output file.
   const TempPath out_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
   const FileDescriptor out_fd =
       ASSERT_NO_ERRNO_AND_VALUE(Open(out_file.path(), O_RDWR));
@@ -364,6 +364,40 @@ TEST(SpliceTest, FromPipe) {
   EXPECT_EQ(memcmp(rbuf.data(), buf.data(), buf.size()), 0);
 }
 
+TEST(SpliceTest, FromPipeMultiple) {
+  // Create a new pipe.
+  int fds[2];
+  ASSERT_THAT(pipe(fds), SyscallSucceeds());
+  const FileDescriptor rfd(fds[0]);
+  const FileDescriptor wfd(fds[1]);
+
+  std::string buf = "abcABC123";
+  ASSERT_THAT(write(wfd.get(), buf.c_str(), buf.size()),
+              SyscallSucceedsWithValue(buf.size()));
+
+  // Open the output file.
+  const TempPath out_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  const FileDescriptor out_fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(out_file.path(), O_RDWR));
+
+  // Splice from the pipe to the output file over several calls.
+  EXPECT_THAT(splice(rfd.get(), nullptr, out_fd.get(), nullptr, 3, 0),
+              SyscallSucceedsWithValue(3));
+  EXPECT_THAT(splice(rfd.get(), nullptr, out_fd.get(), nullptr, 3, 0),
+              SyscallSucceedsWithValue(3));
+  EXPECT_THAT(splice(rfd.get(), nullptr, out_fd.get(), nullptr, 3, 0),
+              SyscallSucceedsWithValue(3));
+
+  // Reset cursor to zero so that we can check the contents.
+  ASSERT_THAT(lseek(out_fd.get(), 0, SEEK_SET), SyscallSucceedsWithValue(0));
+
+  // Contents should be equal.
+  std::vector<char> rbuf(buf.size());
+  ASSERT_THAT(read(out_fd.get(), rbuf.data(), rbuf.size()),
+              SyscallSucceedsWithValue(rbuf.size()));
+  EXPECT_EQ(memcmp(rbuf.data(), buf.c_str(), buf.size()), 0);
+}
+
 TEST(SpliceTest, FromPipeOffset) {
   // Create a new pipe.
   int fds[2];
@@ -693,6 +727,34 @@ TEST(SpliceTest, FromPipeMaxFileSize) {
   EXPECT_EQ(memcmp(rbuf.data(), buf.data(), buf.size()), 0);
 }
 
+TEST(SpliceTest, FromPipeToDevZero) {
+  // Create a new pipe.
+  int fds[2];
+  ASSERT_THAT(pipe(fds), SyscallSucceeds());
+  const FileDescriptor rfd(fds[0]);
+  FileDescriptor wfd(fds[1]);
+
+  // Fill with some random data.
+  std::vector<char> buf(kPageSize);
+  RandomizeBuffer(buf.data(), buf.size());
+  ASSERT_THAT(write(wfd.get(), buf.data(), buf.size()),
+              SyscallSucceedsWithValue(kPageSize));
+
+  const FileDescriptor zero =
+      ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_WRONLY));
+
+  // Close the write end to prevent blocking below.
+  wfd.reset();
+
+  // Splice to /dev/zero. The first call should empty the pipe, and the return
+  // value should not exceed the number of bytes available for reading.
+  EXPECT_THAT(
+      splice(rfd.get(), nullptr, zero.get(), nullptr, kPageSize + 123, 0),
+      SyscallSucceedsWithValue(kPageSize));
+  EXPECT_THAT(splice(rfd.get(), nullptr, zero.get(), nullptr, 1, 0),
+              SyscallSucceedsWithValue(0));
+}
+
 }  // namespace
 
 }  // namespace testing

From cd6374cad39d4aea4a97b425de681b16e05851d3 Mon Sep 17 00:00:00 2001
From: Ayush Ranjan <ayushranjan@google.com>
Date: Thu, 27 Aug 2020 19:25:23 -0700
Subject: [PATCH 105/211] [go-marshal] Enable auto-marshalling for tundev.

PiperOrigin-RevId: 328863725
---
 pkg/sentry/devices/tundev/tundev.go | 17 +++++++----------
 pkg/sentry/fs/dev/net_tun.go        | 17 +++++++----------
 2 files changed, 14 insertions(+), 20 deletions(-)

diff --git a/pkg/sentry/devices/tundev/tundev.go b/pkg/sentry/devices/tundev/tundev.go
index a40625e197..0b701a2899 100644
--- a/pkg/sentry/devices/tundev/tundev.go
+++ b/pkg/sentry/devices/tundev/tundev.go
@@ -64,12 +64,13 @@ func (fd *tunFD) Ioctl(ctx context.Context, uio usermem.IO, args arch.SyscallArg
 	request := args[1].Uint()
 	data := args[2].Pointer()
 
+	t := kernel.TaskFromContext(ctx)
+	if t == nil {
+		panic("Ioctl should be called from a task context")
+	}
+
 	switch request {
 	case linux.TUNSETIFF:
-		t := kernel.TaskFromContext(ctx)
-		if t == nil {
-			panic("Ioctl should be called from a task context")
-		}
 		if !t.HasCapability(linux.CAP_NET_ADMIN) {
 			return 0, syserror.EPERM
 		}
@@ -79,9 +80,7 @@ func (fd *tunFD) Ioctl(ctx context.Context, uio usermem.IO, args arch.SyscallArg
 		}
 
 		var req linux.IFReq
-		if _, err := usermem.CopyObjectIn(ctx, uio, data, &req, usermem.IOOpts{
-			AddressSpaceActive: true,
-		}); err != nil {
+		if _, err := req.CopyIn(t, data); err != nil {
 			return 0, err
 		}
 		flags := usermem.ByteOrder.Uint16(req.Data[:])
@@ -97,9 +96,7 @@ func (fd *tunFD) Ioctl(ctx context.Context, uio usermem.IO, args arch.SyscallArg
 		flags := fd.device.Flags() | linux.IFF_NOFILTER
 		usermem.ByteOrder.PutUint16(req.Data[:], flags)
 
-		_, err := usermem.CopyObjectOut(ctx, uio, data, &req, usermem.IOOpts{
-			AddressSpaceActive: true,
-		})
+		_, err := req.CopyOut(t, data)
 		return 0, err
 
 	default:
diff --git a/pkg/sentry/fs/dev/net_tun.go b/pkg/sentry/fs/dev/net_tun.go
index ec474e5545..5f8c9b5a22 100644
--- a/pkg/sentry/fs/dev/net_tun.go
+++ b/pkg/sentry/fs/dev/net_tun.go
@@ -89,12 +89,13 @@ func (fops *netTunFileOperations) Ioctl(ctx context.Context, file *fs.File, io u
 	request := args[1].Uint()
 	data := args[2].Pointer()
 
+	t := kernel.TaskFromContext(ctx)
+	if t == nil {
+		panic("Ioctl should be called from a task context")
+	}
+
 	switch request {
 	case linux.TUNSETIFF:
-		t := kernel.TaskFromContext(ctx)
-		if t == nil {
-			panic("Ioctl should be called from a task context")
-		}
 		if !t.HasCapability(linux.CAP_NET_ADMIN) {
 			return 0, syserror.EPERM
 		}
@@ -104,9 +105,7 @@ func (fops *netTunFileOperations) Ioctl(ctx context.Context, file *fs.File, io u
 		}
 
 		var req linux.IFReq
-		if _, err := usermem.CopyObjectIn(ctx, io, data, &req, usermem.IOOpts{
-			AddressSpaceActive: true,
-		}); err != nil {
+		if _, err := req.CopyIn(t, data); err != nil {
 			return 0, err
 		}
 		flags := usermem.ByteOrder.Uint16(req.Data[:])
@@ -122,9 +121,7 @@ func (fops *netTunFileOperations) Ioctl(ctx context.Context, file *fs.File, io u
 		flags := fops.device.Flags() | linux.IFF_NOFILTER
 		usermem.ByteOrder.PutUint16(req.Data[:], flags)
 
-		_, err := usermem.CopyObjectOut(ctx, io, data, &req, usermem.IOOpts{
-			AddressSpaceActive: true,
-		})
+		_, err := req.CopyOut(t, data)
 		return 0, err
 
 	default:

From c77a532936f245b0525703eb7e72a6cdf62c00b0 Mon Sep 17 00:00:00 2001
From: Ghanan Gowripalan <ghanan@google.com>
Date: Fri, 28 Aug 2020 05:06:50 -0700
Subject: [PATCH 106/211] Use a single NetworkEndpoint per address

This change was already done as of
https://github.com/google/gvisor/commit/1736b2208f but
https://github.com/google/gvisor/commit/a174aa7597 conflicted with that
change and it was missed in reviews.

This change fixes the conflict.

PiperOrigin-RevId: 328920372
---
 pkg/tcpip/stack/nic.go | 28 ++++++++++++----------------
 1 file changed, 12 insertions(+), 16 deletions(-)

diff --git a/pkg/tcpip/stack/nic.go b/pkg/tcpip/stack/nic.go
index 0c811efdb0..8e700990dd 100644
--- a/pkg/tcpip/stack/nic.go
+++ b/pkg/tcpip/stack/nic.go
@@ -137,6 +137,7 @@ func newNIC(stack *Stack, id tcpip.NICID, name string, ep LinkEndpoint, ctx NICC
 	nic.mu.ndp.initializeTempAddrState()
 
 	// Check for Neighbor Unreachability Detection support.
+	var nud NUDHandler
 	if ep.Capabilities()&CapabilityResolutionRequired != 0 && len(stack.linkAddrResolvers) != 0 && stack.useNeighborCache {
 		rng := rand.New(rand.NewSource(stack.clock.NowNanoseconds()))
 		nic.neigh = &neighborCache{
@@ -144,16 +145,24 @@ func newNIC(stack *Stack, id tcpip.NICID, name string, ep LinkEndpoint, ctx NICC
 			state: NewNUDState(stack.nudConfigs, rng),
 			cache: make(map[tcpip.Address]*neighborEntry, neighborCacheSize),
 		}
+
+		// An interface value that holds a nil pointer but non-nil type is not the
+		// same as the nil interface. Because of this, nud must only be assignd if
+		// nic.neigh is non-nil since a nil reference to a neighborCache is not
+		// valid.
+		//
+		// See https://golang.org/doc/faq#nil_error for more information.
+		nud = nic.neigh
 	}
 
-	// Register supported packet endpoint protocols.
+	// Register supported packet and network endpoint protocols.
 	for _, netProto := range header.Ethertypes {
 		nic.mu.packetEPs[netProto] = []PacketEndpoint{}
 	}
 	for _, netProto := range stack.networkProtocols {
 		netNum := netProto.Number()
 		nic.mu.packetEPs[netNum] = nil
-		nic.networkEndpoints[netNum] = netProto.NewEndpoint(id, stack, nic.neigh, nic, ep, stack)
+		nic.networkEndpoints[netNum] = netProto.NewEndpoint(id, stack, nud, nic, ep, stack)
 	}
 
 	nic.linkEP.Attach(nic)
@@ -819,24 +828,11 @@ func (n *NIC) addAddressLocked(protocolAddress tcpip.ProtocolAddress, peb Primar
 		}
 	}
 
-	netProto, ok := n.stack.networkProtocols[protocolAddress.Protocol]
+	ep, ok := n.networkEndpoints[protocolAddress.Protocol]
 	if !ok {
 		return nil, tcpip.ErrUnknownProtocol
 	}
 
-	var nud NUDHandler
-	if n.neigh != nil {
-		// An interface value that holds a nil concrete value is itself non-nil.
-		// For this reason, n.neigh cannot be passed directly to NewEndpoint so
-		// NetworkEndpoints don't confuse it for non-nil.
-		//
-		// See https://golang.org/doc/faq#nil_error for more information.
-		nud = n.neigh
-	}
-
-	// Create the new network endpoint.
-	ep := netProto.NewEndpoint(n.id, n.stack, nud, n, n.linkEP, n.stack)
-
 	isIPv6Unicast := protocolAddress.Protocol == header.IPv6ProtocolNumber && header.IsV6UnicastAddress(protocolAddress.AddressWithPrefix.Address)
 
 	// If the address is an IPv6 address and it is a permanent address,

From e50be6f7bab47c271e718dabae027c9c3590e4b9 Mon Sep 17 00:00:00 2001
From: Tamir Duberstein <tamird@google.com>
Date: Fri, 28 Aug 2020 05:31:46 -0700
Subject: [PATCH 107/211] Add test demonstrating accept bug

Updates #3780.

PiperOrigin-RevId: 328922573
---
 test/syscalls/linux/socket_inet_loopback.cc | 76 +++++++++++++++++++++
 1 file changed, 76 insertions(+)

diff --git a/test/syscalls/linux/socket_inet_loopback.cc b/test/syscalls/linux/socket_inet_loopback.cc
index 67893033cc..425084228f 100644
--- a/test/syscalls/linux/socket_inet_loopback.cc
+++ b/test/syscalls/linux/socket_inet_loopback.cc
@@ -1111,6 +1111,82 @@ TEST_P(SocketInetLoopbackTest, AcceptedInheritsTCPUserTimeout) {
   EXPECT_EQ(get, kUserTimeout);
 }
 
+TEST_P(SocketInetLoopbackTest, TCPAcceptAfterReset) {
+  auto const& param = GetParam();
+  TestAddress const& listener = param.listener;
+  TestAddress const& connector = param.connector;
+
+  // Create the listening socket.
+  const FileDescriptor listen_fd = ASSERT_NO_ERRNO_AND_VALUE(
+      Socket(listener.family(), SOCK_STREAM, IPPROTO_TCP));
+  sockaddr_storage listen_addr = listener.addr;
+  ASSERT_THAT(bind(listen_fd.get(), reinterpret_cast<sockaddr*>(&listen_addr),
+                   listener.addr_len),
+              SyscallSucceeds());
+  ASSERT_THAT(listen(listen_fd.get(), SOMAXCONN), SyscallSucceeds());
+
+  // Get the port bound by the listening socket.
+  {
+    socklen_t addrlen = listener.addr_len;
+    ASSERT_THAT(
+        getsockname(listen_fd.get(), reinterpret_cast<sockaddr*>(&listen_addr),
+                    &addrlen),
+        SyscallSucceeds());
+  }
+
+  const uint16_t port =
+      ASSERT_NO_ERRNO_AND_VALUE(AddrPort(listener.family(), listen_addr));
+
+  // Connect to the listening socket.
+  FileDescriptor conn_fd = ASSERT_NO_ERRNO_AND_VALUE(
+      Socket(connector.family(), SOCK_STREAM, IPPROTO_TCP));
+
+  sockaddr_storage conn_addr = connector.addr;
+  ASSERT_NO_ERRNO(SetAddrPort(connector.family(), &conn_addr, port));
+  ASSERT_THAT(RetryEINTR(connect)(conn_fd.get(),
+                                  reinterpret_cast<sockaddr*>(&conn_addr),
+                                  connector.addr_len),
+              SyscallSucceeds());
+
+  // Trigger a RST by turning linger off and closing the socket.
+  struct linger opt = {
+      .l_onoff = 1,
+      .l_linger = 0,
+  };
+  ASSERT_THAT(
+      setsockopt(conn_fd.get(), SOL_SOCKET, SO_LINGER, &opt, sizeof(opt)),
+      SyscallSucceeds());
+  ASSERT_THAT(close(conn_fd.release()), SyscallSucceeds());
+
+  // TODO(gvisor.dev/issue/3780): Remove this.
+  if (IsRunningOnGvisor()) {
+    // Wait for the RST to be observed.
+    absl::SleepFor(absl::Milliseconds(100));
+  }
+
+  sockaddr_storage accept_addr;
+  socklen_t addrlen = sizeof(accept_addr);
+
+  // TODO(gvisor.dev/issue/3780): Remove this.
+  if (IsRunningOnGvisor()) {
+    ASSERT_THAT(accept(listen_fd.get(),
+                       reinterpret_cast<sockaddr*>(&accept_addr), &addrlen),
+                SyscallFailsWithErrno(ENOTCONN));
+    return;
+  }
+
+  conn_fd = ASSERT_NO_ERRNO_AND_VALUE(Accept(
+      listen_fd.get(), reinterpret_cast<sockaddr*>(&accept_addr), &addrlen));
+  ASSERT_EQ(addrlen, listener.addr_len);
+
+  int err;
+  socklen_t optlen = sizeof(err);
+  ASSERT_THAT(getsockopt(conn_fd.get(), SOL_SOCKET, SO_ERROR, &err, &optlen),
+              SyscallSucceeds());
+  ASSERT_EQ(err, ECONNRESET);
+  ASSERT_EQ(optlen, sizeof(err));
+}
+
 // TODO(gvisor.dev/issue/1688): Partially completed passive endpoints are not
 // saved. Enable S/R once issue is fixed.
 TEST_P(SocketInetLoopbackTest, TCPDeferAccept_NoRandomSave) {

From c9842f21ce4a9308dba983fd712cc688b26237d5 Mon Sep 17 00:00:00 2001
From: Kevin Krakauer <krakauer@google.com>
Date: Fri, 28 Aug 2020 10:33:44 -0700
Subject: [PATCH 108/211] fix panic when calling SO_ORIGINAL_DST without
 initializing iptables

Reported-by: syzbot+074ec22c42305725b79f@syzkaller.appspotmail.com
PiperOrigin-RevId: 328963899
---
 pkg/tcpip/stack/iptables.go     |  5 +++++
 test/syscalls/linux/iptables.cc | 13 +++++++++++++
 2 files changed, 18 insertions(+)

diff --git a/pkg/tcpip/stack/iptables.go b/pkg/tcpip/stack/iptables.go
index 30aa41db23..0e33cbe922 100644
--- a/pkg/tcpip/stack/iptables.go
+++ b/pkg/tcpip/stack/iptables.go
@@ -427,5 +427,10 @@ func (it *IPTables) checkRule(hook Hook, pkt *PacketBuffer, table Table, ruleIdx
 // OriginalDst returns the original destination of redirected connections. It
 // returns an error if the connection doesn't exist or isn't redirected.
 func (it *IPTables) OriginalDst(epID TransportEndpointID) (tcpip.Address, uint16, *tcpip.Error) {
+	it.mu.RLock()
+	defer it.mu.RUnlock()
+	if !it.modified {
+		return "", 0, tcpip.ErrNotConnected
+	}
 	return it.connections.originalDst(epID)
 }
diff --git a/test/syscalls/linux/iptables.cc b/test/syscalls/linux/iptables.cc
index f1af8f097c..83b6a164af 100644
--- a/test/syscalls/linux/iptables.cc
+++ b/test/syscalls/linux/iptables.cc
@@ -104,6 +104,19 @@ TEST(IPTablesBasic, GetEntriesErrorPrecedence) {
       SyscallFailsWithErrno(EINVAL));
 }
 
+TEST(IPTablesBasic, OriginalDstErrors) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+
+  int sock;
+  ASSERT_THAT(sock = socket(AF_INET, SOCK_STREAM, 0), SyscallSucceeds());
+
+  // Sockets not affected by NAT should fail to find an original destination.
+  struct sockaddr_in addr = {};
+  socklen_t addr_len = sizeof(addr);
+  EXPECT_THAT(getsockopt(sock, SOL_IP, SO_ORIGINAL_DST, &addr, &addr_len),
+              SyscallFailsWithErrno(ENOTCONN));
+}
+
 // Fixture for iptables tests.
 class IPTablesTest : public ::testing::Test {
  protected:

From 4346e36ba286338f6615eb9b22425808cf186775 Mon Sep 17 00:00:00 2001
From: Dean Deng <deandeng@google.com>
Date: Fri, 28 Aug 2020 11:26:25 -0700
Subject: [PATCH 109/211] Fix EOF handling for splice.

Also, add corresponding EOF tests for splice/sendfile.

Discovered by syzkaller.

PiperOrigin-RevId: 328975990
---
 pkg/sentry/syscalls/linux/vfs2/splice.go |  7 ++++++-
 test/syscalls/linux/sendfile.cc          | 16 ++++++++++++++++
 test/syscalls/linux/splice.cc            | 17 +++++++++++++++++
 3 files changed, 39 insertions(+), 1 deletion(-)

diff --git a/pkg/sentry/syscalls/linux/vfs2/splice.go b/pkg/sentry/syscalls/linux/vfs2/splice.go
index 1924113934..68ce947789 100644
--- a/pkg/sentry/syscalls/linux/vfs2/splice.go
+++ b/pkg/sentry/syscalls/linux/vfs2/splice.go
@@ -141,9 +141,14 @@ func Splice(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal
 				inOffset += n
 			}
 		default:
-			panic("not possible")
+			panic("at least one end of splice must be a pipe")
 		}
 
+		if n == 0 && err == io.EOF {
+			// We reached the end of the file. Eat the error and exit the loop.
+			err = nil
+			break
+		}
 		if n != 0 || err != syserror.ErrWouldBlock || nonBlock {
 			break
 		}
diff --git a/test/syscalls/linux/sendfile.cc b/test/syscalls/linux/sendfile.cc
index e65387f599..a8bfb01f17 100644
--- a/test/syscalls/linux/sendfile.cc
+++ b/test/syscalls/linux/sendfile.cc
@@ -533,6 +533,22 @@ TEST(SendFileTest, SendPipeWouldBlock) {
               SyscallFailsWithErrno(EWOULDBLOCK));
 }
 
+TEST(SendFileTest, SendPipeEOF) {
+  // Create and open an empty input file.
+  const TempPath in_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  const FileDescriptor inf =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(in_file.path(), O_RDONLY));
+
+  // Setup the output named pipe.
+  int fds[2];
+  ASSERT_THAT(pipe2(fds, O_NONBLOCK), SyscallSucceeds());
+  const FileDescriptor rfd(fds[0]);
+  const FileDescriptor wfd(fds[1]);
+
+  EXPECT_THAT(sendfile(wfd.get(), inf.get(), nullptr, 123),
+              SyscallSucceedsWithValue(0));
+}
+
 TEST(SendFileTest, SendPipeBlocks) {
   // Create temp file.
   constexpr char kData[] =
diff --git a/test/syscalls/linux/splice.cc b/test/syscalls/linux/splice.cc
index be3fb48402..a1d2b9b119 100644
--- a/test/syscalls/linux/splice.cc
+++ b/test/syscalls/linux/splice.cc
@@ -298,6 +298,23 @@ TEST(SpliceTest, ToPipe) {
   EXPECT_EQ(memcmp(rbuf.data(), buf.data(), buf.size()), 0);
 }
 
+TEST(SpliceTest, ToPipeEOF) {
+  // Create and open an empty input file.
+  const TempPath in_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  const FileDescriptor in_fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(in_file.path(), O_RDONLY));
+
+  // Create a new pipe.
+  int fds[2];
+  ASSERT_THAT(pipe(fds), SyscallSucceeds());
+  const FileDescriptor rfd(fds[0]);
+  const FileDescriptor wfd(fds[1]);
+
+  // Splice from the empty file to the pipe.
+  EXPECT_THAT(splice(in_fd.get(), nullptr, wfd.get(), nullptr, 123, 0),
+              SyscallSucceedsWithValue(0));
+}
+
 TEST(SpliceTest, ToPipeOffset) {
   // Open the input file.
   const TempPath in_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());

From 91e81aaf69ac5fc4cd7b677139c6a23801eabb02 Mon Sep 17 00:00:00 2001
From: Ghanan Gowripalan <ghanan@google.com>
Date: Fri, 28 Aug 2020 11:47:58 -0700
Subject: [PATCH 110/211] Improve type safety for network protocol options

The existing implementation for NetworkProtocol.{Set}Option take
arguments of an empty interface type which all types (implicitly)
implement; any type may be passed to the functions.

This change introduces marker interfaces for network protocol options
that may be set or queried which network protocol option types implement
to ensure that invalid types are caught at compile time. Different
interfaces are used to allow the compiler to enforce read-only or
set-only socket options.

PiperOrigin-RevId: 328980359
---
 pkg/tcpip/network/arp/arp.go      |  4 +-
 pkg/tcpip/network/ipv4/ipv4.go    |  8 ++--
 pkg/tcpip/network/ipv6/ipv6.go    |  8 ++--
 pkg/tcpip/stack/forwarder_test.go | 10 ++---
 pkg/tcpip/stack/nic_test.go       |  4 +-
 pkg/tcpip/stack/registration.go   |  4 +-
 pkg/tcpip/stack/stack.go          |  4 +-
 pkg/tcpip/stack/stack_test.go     | 68 ++++++++-----------------------
 pkg/tcpip/tcpip.go                | 16 ++++++++
 runsc/boot/loader.go              |  9 +++-
 10 files changed, 61 insertions(+), 74 deletions(-)

diff --git a/pkg/tcpip/network/arp/arp.go b/pkg/tcpip/network/arp/arp.go
index cbbe5b77f2..7aaee08c43 100644
--- a/pkg/tcpip/network/arp/arp.go
+++ b/pkg/tcpip/network/arp/arp.go
@@ -217,12 +217,12 @@ func (*protocol) ResolveStaticAddress(addr tcpip.Address) (tcpip.LinkAddress, bo
 }
 
 // SetOption implements stack.NetworkProtocol.SetOption.
-func (*protocol) SetOption(option interface{}) *tcpip.Error {
+func (*protocol) SetOption(tcpip.SettableNetworkProtocolOption) *tcpip.Error {
 	return tcpip.ErrUnknownProtocolOption
 }
 
 // Option implements stack.NetworkProtocol.Option.
-func (*protocol) Option(option interface{}) *tcpip.Error {
+func (*protocol) Option(tcpip.GettableNetworkProtocolOption) *tcpip.Error {
 	return tcpip.ErrUnknownProtocolOption
 }
 
diff --git a/pkg/tcpip/network/ipv4/ipv4.go b/pkg/tcpip/network/ipv4/ipv4.go
index 55ca94268c..fa4ae20124 100644
--- a/pkg/tcpip/network/ipv4/ipv4.go
+++ b/pkg/tcpip/network/ipv4/ipv4.go
@@ -486,10 +486,10 @@ func (*protocol) ParseAddresses(v buffer.View) (src, dst tcpip.Address) {
 }
 
 // SetOption implements NetworkProtocol.SetOption.
-func (p *protocol) SetOption(option interface{}) *tcpip.Error {
+func (p *protocol) SetOption(option tcpip.SettableNetworkProtocolOption) *tcpip.Error {
 	switch v := option.(type) {
-	case tcpip.DefaultTTLOption:
-		p.SetDefaultTTL(uint8(v))
+	case *tcpip.DefaultTTLOption:
+		p.SetDefaultTTL(uint8(*v))
 		return nil
 	default:
 		return tcpip.ErrUnknownProtocolOption
@@ -497,7 +497,7 @@ func (p *protocol) SetOption(option interface{}) *tcpip.Error {
 }
 
 // Option implements NetworkProtocol.Option.
-func (p *protocol) Option(option interface{}) *tcpip.Error {
+func (p *protocol) Option(option tcpip.GettableNetworkProtocolOption) *tcpip.Error {
 	switch v := option.(type) {
 	case *tcpip.DefaultTTLOption:
 		*v = tcpip.DefaultTTLOption(p.DefaultTTL())
diff --git a/pkg/tcpip/network/ipv6/ipv6.go b/pkg/tcpip/network/ipv6/ipv6.go
index 36fbbebf09..af3cd91c64 100644
--- a/pkg/tcpip/network/ipv6/ipv6.go
+++ b/pkg/tcpip/network/ipv6/ipv6.go
@@ -469,10 +469,10 @@ func (p *protocol) NewEndpoint(nicID tcpip.NICID, linkAddrCache stack.LinkAddres
 }
 
 // SetOption implements NetworkProtocol.SetOption.
-func (p *protocol) SetOption(option interface{}) *tcpip.Error {
+func (p *protocol) SetOption(option tcpip.SettableNetworkProtocolOption) *tcpip.Error {
 	switch v := option.(type) {
-	case tcpip.DefaultTTLOption:
-		p.SetDefaultTTL(uint8(v))
+	case *tcpip.DefaultTTLOption:
+		p.SetDefaultTTL(uint8(*v))
 		return nil
 	default:
 		return tcpip.ErrUnknownProtocolOption
@@ -480,7 +480,7 @@ func (p *protocol) SetOption(option interface{}) *tcpip.Error {
 }
 
 // Option implements NetworkProtocol.Option.
-func (p *protocol) Option(option interface{}) *tcpip.Error {
+func (p *protocol) Option(option tcpip.GettableNetworkProtocolOption) *tcpip.Error {
 	switch v := option.(type) {
 	case *tcpip.DefaultTTLOption:
 		*v = tcpip.DefaultTTLOption(p.DefaultTTL())
diff --git a/pkg/tcpip/stack/forwarder_test.go b/pkg/tcpip/stack/forwarder_test.go
index 91165ebc7e..54759091ab 100644
--- a/pkg/tcpip/stack/forwarder_test.go
+++ b/pkg/tcpip/stack/forwarder_test.go
@@ -154,17 +154,17 @@ func (f *fwdTestNetworkProtocol) NewEndpoint(nicID tcpip.NICID, _ LinkAddressCac
 	}
 }
 
-func (f *fwdTestNetworkProtocol) SetOption(option interface{}) *tcpip.Error {
+func (*fwdTestNetworkProtocol) SetOption(tcpip.SettableNetworkProtocolOption) *tcpip.Error {
 	return tcpip.ErrUnknownProtocolOption
 }
 
-func (f *fwdTestNetworkProtocol) Option(option interface{}) *tcpip.Error {
+func (*fwdTestNetworkProtocol) Option(tcpip.GettableNetworkProtocolOption) *tcpip.Error {
 	return tcpip.ErrUnknownProtocolOption
 }
 
-func (f *fwdTestNetworkProtocol) Close() {}
+func (*fwdTestNetworkProtocol) Close() {}
 
-func (f *fwdTestNetworkProtocol) Wait() {}
+func (*fwdTestNetworkProtocol) Wait() {}
 
 func (f *fwdTestNetworkProtocol) LinkAddressRequest(addr, localAddr tcpip.Address, remoteLinkAddr tcpip.LinkAddress, linkEP LinkEndpoint) *tcpip.Error {
 	if f.onLinkAddressResolved != nil {
@@ -182,7 +182,7 @@ func (f *fwdTestNetworkProtocol) ResolveStaticAddress(addr tcpip.Address) (tcpip
 	return "", false
 }
 
-func (f *fwdTestNetworkProtocol) LinkAddressProtocol() tcpip.NetworkProtocolNumber {
+func (*fwdTestNetworkProtocol) LinkAddressProtocol() tcpip.NetworkProtocolNumber {
 	return fwdTestNetNumber
 }
 
diff --git a/pkg/tcpip/stack/nic_test.go b/pkg/tcpip/stack/nic_test.go
index 1e065b5c1f..dd64742976 100644
--- a/pkg/tcpip/stack/nic_test.go
+++ b/pkg/tcpip/stack/nic_test.go
@@ -201,12 +201,12 @@ func (p *testIPv6Protocol) NewEndpoint(nicID tcpip.NICID, _ LinkAddressCache, _
 }
 
 // SetOption implements NetworkProtocol.SetOption.
-func (*testIPv6Protocol) SetOption(interface{}) *tcpip.Error {
+func (*testIPv6Protocol) SetOption(tcpip.SettableNetworkProtocolOption) *tcpip.Error {
 	return nil
 }
 
 // Option implements NetworkProtocol.Option.
-func (*testIPv6Protocol) Option(interface{}) *tcpip.Error {
+func (*testIPv6Protocol) Option(tcpip.GettableNetworkProtocolOption) *tcpip.Error {
 	return nil
 }
 
diff --git a/pkg/tcpip/stack/registration.go b/pkg/tcpip/stack/registration.go
index 21ac38583d..2d88fa1f78 100644
--- a/pkg/tcpip/stack/registration.go
+++ b/pkg/tcpip/stack/registration.go
@@ -303,12 +303,12 @@ type NetworkProtocol interface {
 	// SetOption allows enabling/disabling protocol specific features.
 	// SetOption returns an error if the option is not supported or the
 	// provided option value is invalid.
-	SetOption(option interface{}) *tcpip.Error
+	SetOption(option tcpip.SettableNetworkProtocolOption) *tcpip.Error
 
 	// Option allows retrieving protocol specific option values.
 	// Option returns an error if the option is not supported or the
 	// provided option value is invalid.
-	Option(option interface{}) *tcpip.Error
+	Option(option tcpip.GettableNetworkProtocolOption) *tcpip.Error
 
 	// Close requests that any worker goroutines owned by the protocol
 	// stop.
diff --git a/pkg/tcpip/stack/stack.go b/pkg/tcpip/stack/stack.go
index 7f5ed9e83d..c86ee1c132 100644
--- a/pkg/tcpip/stack/stack.go
+++ b/pkg/tcpip/stack/stack.go
@@ -785,7 +785,7 @@ func (s *Stack) UniqueID() uint64 {
 // options. This method returns an error if the protocol is not supported or
 // option is not supported by the protocol implementation or the provided value
 // is incorrect.
-func (s *Stack) SetNetworkProtocolOption(network tcpip.NetworkProtocolNumber, option interface{}) *tcpip.Error {
+func (s *Stack) SetNetworkProtocolOption(network tcpip.NetworkProtocolNumber, option tcpip.SettableNetworkProtocolOption) *tcpip.Error {
 	netProto, ok := s.networkProtocols[network]
 	if !ok {
 		return tcpip.ErrUnknownProtocol
@@ -802,7 +802,7 @@ func (s *Stack) SetNetworkProtocolOption(network tcpip.NetworkProtocolNumber, op
 // if err != nil {
 //   ...
 // }
-func (s *Stack) NetworkProtocolOption(network tcpip.NetworkProtocolNumber, option interface{}) *tcpip.Error {
+func (s *Stack) NetworkProtocolOption(network tcpip.NetworkProtocolNumber, option tcpip.GettableNetworkProtocolOption) *tcpip.Error {
 	netProto, ok := s.networkProtocols[network]
 	if !ok {
 		return tcpip.ErrUnknownProtocol
diff --git a/pkg/tcpip/stack/stack_test.go b/pkg/tcpip/stack/stack_test.go
index 1deeccb898..60b54c244d 100644
--- a/pkg/tcpip/stack/stack_test.go
+++ b/pkg/tcpip/stack/stack_test.go
@@ -158,23 +158,13 @@ func (*fakeNetworkEndpoint) WriteHeaderIncludedPacket(r *stack.Route, pkt *stack
 
 func (*fakeNetworkEndpoint) Close() {}
 
-type fakeNetGoodOption bool
-
-type fakeNetBadOption bool
-
-type fakeNetInvalidValueOption int
-
-type fakeNetOptions struct {
-	good bool
-}
-
 // fakeNetworkProtocol is a network-layer protocol descriptor. It aggregates the
 // number of packets sent and received via endpoints of this protocol. The index
 // where packets are added is given by the packet's destination address MOD 10.
 type fakeNetworkProtocol struct {
 	packetCount     [10]int
 	sendPacketCount [10]int
-	opts            fakeNetOptions
+	defaultTTL      uint8
 }
 
 func (f *fakeNetworkProtocol) Number() tcpip.NetworkProtocolNumber {
@@ -206,22 +196,20 @@ func (f *fakeNetworkProtocol) NewEndpoint(nicID tcpip.NICID, _ stack.LinkAddress
 	}
 }
 
-func (f *fakeNetworkProtocol) SetOption(option interface{}) *tcpip.Error {
+func (f *fakeNetworkProtocol) SetOption(option tcpip.SettableNetworkProtocolOption) *tcpip.Error {
 	switch v := option.(type) {
-	case fakeNetGoodOption:
-		f.opts.good = bool(v)
+	case *tcpip.DefaultTTLOption:
+		f.defaultTTL = uint8(*v)
 		return nil
-	case fakeNetInvalidValueOption:
-		return tcpip.ErrInvalidOptionValue
 	default:
 		return tcpip.ErrUnknownProtocolOption
 	}
 }
 
-func (f *fakeNetworkProtocol) Option(option interface{}) *tcpip.Error {
+func (f *fakeNetworkProtocol) Option(option tcpip.GettableNetworkProtocolOption) *tcpip.Error {
 	switch v := option.(type) {
-	case *fakeNetGoodOption:
-		*v = fakeNetGoodOption(f.opts.good)
+	case *tcpip.DefaultTTLOption:
+		*v = tcpip.DefaultTTLOption(f.defaultTTL)
 		return nil
 	default:
 		return tcpip.ErrUnknownProtocolOption
@@ -1640,46 +1628,24 @@ func TestMulticastOrIPv6LinkLocalNeedsNoRoute(t *testing.T) {
 	}
 }
 
-func TestNetworkOptions(t *testing.T) {
+func TestNetworkOption(t *testing.T) {
 	s := stack.New(stack.Options{
 		NetworkProtocols:   []stack.NetworkProtocol{fakeNetFactory()},
 		TransportProtocols: []stack.TransportProtocol{},
 	})
 
-	// Try an unsupported network protocol.
-	if err := s.SetNetworkProtocolOption(tcpip.NetworkProtocolNumber(99999), fakeNetGoodOption(false)); err != tcpip.ErrUnknownProtocol {
-		t.Fatalf("SetNetworkProtocolOption(fakeNet2, blah, false) = %v, want = tcpip.ErrUnknownProtocol", err)
+	opt := tcpip.DefaultTTLOption(5)
+	if err := s.SetNetworkProtocolOption(fakeNetNumber, &opt); err != nil {
+		t.Fatalf("s.SetNetworkProtocolOption(%d, &%T(%d)): %s", fakeNetNumber, opt, opt, err)
 	}
 
-	testCases := []struct {
-		option   interface{}
-		wantErr  *tcpip.Error
-		verifier func(t *testing.T, p stack.NetworkProtocol)
-	}{
-		{fakeNetGoodOption(true), nil, func(t *testing.T, p stack.NetworkProtocol) {
-			t.Helper()
-			fakeNet := p.(*fakeNetworkProtocol)
-			if fakeNet.opts.good != true {
-				t.Fatalf("fakeNet.opts.good = false, want = true")
-			}
-			var v fakeNetGoodOption
-			if err := s.NetworkProtocolOption(fakeNetNumber, &v); err != nil {
-				t.Fatalf("s.NetworkProtocolOption(fakeNetNumber, &v) = %v, want = nil, where v is option %T", v, err)
-			}
-			if v != true {
-				t.Fatalf("s.NetworkProtocolOption(fakeNetNumber, &v) returned v = %v, want = true", v)
-			}
-		}},
-		{fakeNetBadOption(true), tcpip.ErrUnknownProtocolOption, nil},
-		{fakeNetInvalidValueOption(1), tcpip.ErrInvalidOptionValue, nil},
+	var optGot tcpip.DefaultTTLOption
+	if err := s.NetworkProtocolOption(fakeNetNumber, &optGot); err != nil {
+		t.Fatalf("s.NetworkProtocolOption(%d, &%T): %s", fakeNetNumber, optGot, err)
 	}
-	for _, tc := range testCases {
-		if got := s.SetNetworkProtocolOption(fakeNetNumber, tc.option); got != tc.wantErr {
-			t.Errorf("s.SetNetworkProtocolOption(fakeNet, %v) = %v, want = %v", tc.option, got, tc.wantErr)
-		}
-		if tc.verifier != nil {
-			tc.verifier(t, s.NetworkProtocolInstance(fakeNetNumber))
-		}
+
+	if opt != optGot {
+		t.Errorf("got optGot = %d, want = %d", optGot, opt)
 	}
 }
 
diff --git a/pkg/tcpip/tcpip.go b/pkg/tcpip/tcpip.go
index cd72d4f021..47a8d7c869 100644
--- a/pkg/tcpip/tcpip.go
+++ b/pkg/tcpip/tcpip.go
@@ -841,10 +841,26 @@ const (
 	PMTUDiscoveryProbe
 )
 
+// GettableNetworkProtocolOption is a marker interface for network protocol
+// options that may be queried.
+type GettableNetworkProtocolOption interface {
+	isGettableNetworkProtocolOption()
+}
+
+// SettableNetworkProtocolOption is a marker interface for network protocol
+// options that may be set.
+type SettableNetworkProtocolOption interface {
+	isSettableNetworkProtocolOption()
+}
+
 // DefaultTTLOption is used by stack.(*Stack).NetworkProtocolOption to specify
 // a default TTL.
 type DefaultTTLOption uint8
 
+func (*DefaultTTLOption) isGettableNetworkProtocolOption() {}
+
+func (*DefaultTTLOption) isSettableNetworkProtocolOption() {}
+
 // AvailableCongestionControlOption is used to query the supported congestion
 // control algorithms.
 type AvailableCongestionControlOption string
diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go
index e8ea5093b6..c3c754046c 100644
--- a/runsc/boot/loader.go
+++ b/runsc/boot/loader.go
@@ -1066,8 +1066,13 @@ func newEmptySandboxNetworkStack(clock tcpip.Clock, uniqueID stack.UniqueID) (in
 	}
 
 	// Set default TTLs as required by socket/netstack.
-	s.Stack.SetNetworkProtocolOption(ipv4.ProtocolNumber, tcpip.DefaultTTLOption(netstack.DefaultTTL))
-	s.Stack.SetNetworkProtocolOption(ipv6.ProtocolNumber, tcpip.DefaultTTLOption(netstack.DefaultTTL))
+	opt := tcpip.DefaultTTLOption(netstack.DefaultTTL)
+	if err := s.Stack.SetNetworkProtocolOption(ipv4.ProtocolNumber, &opt); err != nil {
+		return nil, fmt.Errorf("SetNetworkProtocolOption(%d, &%T(%d)): %s", ipv4.ProtocolNumber, opt, opt, err)
+	}
+	if err := s.Stack.SetNetworkProtocolOption(ipv6.ProtocolNumber, &opt); err != nil {
+		return nil, fmt.Errorf("SetNetworkProtocolOption(%d, &%T(%d)): %s", ipv6.ProtocolNumber, opt, opt, err)
+	}
 
 	// Enable Receive Buffer Auto-Tuning.
 	if err := s.Stack.SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.ModerateReceiveBufferOption(true)); err != nil {

From 8d75fc4883ca8c10fb615203993d56d33a9e36b6 Mon Sep 17 00:00:00 2001
From: Rahat Mahmood <rahat@google.com>
Date: Fri, 28 Aug 2020 14:29:16 -0700
Subject: [PATCH 111/211] Implement StatFS for various VFS2 filesystems.

This mainly involved enabling kernfs' client filesystems to provide a
StatFS implementation.

Fixes #3411, #3515.

PiperOrigin-RevId: 329009864
---
 pkg/abi/linux/fs.go                           |  1 +
 pkg/sentry/fsimpl/devpts/devpts.go            | 10 +++++++++-
 pkg/sentry/fsimpl/devpts/master.go            |  1 +
 pkg/sentry/fsimpl/devpts/slave.go             |  1 +
 pkg/sentry/fsimpl/fuse/fusefs.go              |  8 +++++++-
 pkg/sentry/fsimpl/host/host.go                |  1 +
 .../fsimpl/kernfs/dynamic_bytes_file.go       |  1 +
 pkg/sentry/fsimpl/kernfs/filesystem.go        |  5 ++---
 pkg/sentry/fsimpl/kernfs/inode_impl_util.go   | 16 +++++++++++++---
 pkg/sentry/fsimpl/kernfs/kernfs.go            |  5 +++++
 pkg/sentry/fsimpl/kernfs/kernfs_test.go       |  8 +++++---
 pkg/sentry/fsimpl/kernfs/symlink.go           |  1 +
 pkg/sentry/fsimpl/pipefs/pipefs.go            |  8 +++++---
 pkg/sentry/fsimpl/proc/filesystem.go          |  7 +++++++
 pkg/sentry/fsimpl/proc/subtasks.go            |  9 +++++----
 pkg/sentry/fsimpl/proc/task.go                |  7 ++++---
 pkg/sentry/fsimpl/proc/task_fds.go            | 19 +++++++++++--------
 pkg/sentry/fsimpl/proc/task_files.go          |  2 ++
 pkg/sentry/fsimpl/proc/tasks.go               |  9 +++++----
 pkg/sentry/fsimpl/proc/tasks_files.go         |  2 ++
 pkg/sentry/fsimpl/sockfs/sockfs.go            |  9 +++++++--
 pkg/sentry/fsimpl/sys/kcov.go                 |  3 ++-
 pkg/sentry/fsimpl/sys/sys.go                  | 14 ++++++++++++++
 pkg/sentry/vfs/filesystem_impl_util.go        | 13 +++++++++++++
 test/syscalls/BUILD                           |  1 +
 test/syscalls/linux/pipe.cc                   | 12 ++++++++++++
 test/syscalls/linux/proc.cc                   | 14 ++++++++++++++
 test/syscalls/linux/socket.cc                 | 17 +++++++++++++++++
 test/syscalls/linux/statfs.cc                 | 16 ++++++++++------
 29 files changed, 178 insertions(+), 42 deletions(-)

diff --git a/pkg/abi/linux/fs.go b/pkg/abi/linux/fs.go
index 2b1ef0d4e7..0d921ed6f4 100644
--- a/pkg/abi/linux/fs.go
+++ b/pkg/abi/linux/fs.go
@@ -29,6 +29,7 @@ const (
 	SYSFS_MAGIC           = 0x62656572
 	TMPFS_MAGIC           = 0x01021994
 	V9FS_MAGIC            = 0x01021997
+	FUSE_SUPER_MAGIC      = 0x65735546
 )
 
 // Filesystem path limits, from uapi/linux/limits.h.
diff --git a/pkg/sentry/fsimpl/devpts/devpts.go b/pkg/sentry/fsimpl/devpts/devpts.go
index 0eaff90873..57580f4d47 100644
--- a/pkg/sentry/fsimpl/devpts/devpts.go
+++ b/pkg/sentry/fsimpl/devpts/devpts.go
@@ -111,12 +111,13 @@ func (fs *filesystem) Release(ctx context.Context) {
 
 // rootInode is the root directory inode for the devpts mounts.
 type rootInode struct {
-	rootInodeRefs
+	implStatFS
 	kernfs.AlwaysValid
 	kernfs.InodeAttrs
 	kernfs.InodeDirectoryNoNewChildren
 	kernfs.InodeNotSymlink
 	kernfs.OrderedChildren
+	rootInodeRefs
 
 	locks vfs.FileLocks
 
@@ -240,3 +241,10 @@ func (i *rootInode) IterDirents(ctx context.Context, cb vfs.IterDirentsCallback,
 func (i *rootInode) DecRef(context.Context) {
 	i.rootInodeRefs.DecRef(i.Destroy)
 }
+
+type implStatFS struct{}
+
+// StatFS implements kernfs.Inode.StatFS.
+func (*implStatFS) StatFS(context.Context, *vfs.Filesystem) (linux.Statfs, error) {
+	return vfs.GenericStatFS(linux.DEVPTS_SUPER_MAGIC), nil
+}
diff --git a/pkg/sentry/fsimpl/devpts/master.go b/pkg/sentry/fsimpl/devpts/master.go
index 3bb397f715..60feb1993f 100644
--- a/pkg/sentry/fsimpl/devpts/master.go
+++ b/pkg/sentry/fsimpl/devpts/master.go
@@ -30,6 +30,7 @@ import (
 
 // masterInode is the inode for the master end of the Terminal.
 type masterInode struct {
+	implStatFS
 	kernfs.InodeAttrs
 	kernfs.InodeNoopRefCount
 	kernfs.InodeNotDirectory
diff --git a/pkg/sentry/fsimpl/devpts/slave.go b/pkg/sentry/fsimpl/devpts/slave.go
index 32e4e19085..a9da7af648 100644
--- a/pkg/sentry/fsimpl/devpts/slave.go
+++ b/pkg/sentry/fsimpl/devpts/slave.go
@@ -29,6 +29,7 @@ import (
 
 // slaveInode is the inode for the slave end of the Terminal.
 type slaveInode struct {
+	implStatFS
 	kernfs.InodeAttrs
 	kernfs.InodeNoopRefCount
 	kernfs.InodeNotDirectory
diff --git a/pkg/sentry/fsimpl/fuse/fusefs.go b/pkg/sentry/fsimpl/fuse/fusefs.go
index 9717c0e15b..810819ae41 100644
--- a/pkg/sentry/fsimpl/fuse/fusefs.go
+++ b/pkg/sentry/fsimpl/fuse/fusefs.go
@@ -200,9 +200,9 @@ func (fs *filesystem) Release(ctx context.Context) {
 type inode struct {
 	inodeRefs
 	kernfs.InodeAttrs
+	kernfs.InodeDirectoryNoNewChildren
 	kernfs.InodeNoDynamicLookup
 	kernfs.InodeNotSymlink
-	kernfs.InodeDirectoryNoNewChildren
 	kernfs.OrderedChildren
 
 	locks vfs.FileLocks
@@ -331,3 +331,9 @@ func (i *inode) Stat(ctx context.Context, fs *vfs.Filesystem, opts vfs.StatOptio
 func (i *inode) DecRef(context.Context) {
 	i.inodeRefs.DecRef(i.Destroy)
 }
+
+// StatFS implements kernfs.Inode.StatFS.
+func (i *inode) StatFS(ctx context.Context, fs *vfs.Filesystem) (linux.Statfs, error) {
+	// TODO(gvisor.dev/issues/3413): Complete the implementation of statfs.
+	return vfs.GenericStatFS(linux.FUSE_SUPER_MAGIC), nil
+}
diff --git a/pkg/sentry/fsimpl/host/host.go b/pkg/sentry/fsimpl/host/host.go
index 2d3821f333..7561f821cb 100644
--- a/pkg/sentry/fsimpl/host/host.go
+++ b/pkg/sentry/fsimpl/host/host.go
@@ -186,6 +186,7 @@ func (fs *filesystem) PrependPath(ctx context.Context, vfsroot, vd vfs.VirtualDe
 
 // inode implements kernfs.Inode.
 type inode struct {
+	kernfs.InodeNoStatFS
 	kernfs.InodeNotDirectory
 	kernfs.InodeNotSymlink
 
diff --git a/pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go b/pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go
index 12adf727a5..1ee0896201 100644
--- a/pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go
+++ b/pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go
@@ -35,6 +35,7 @@ import (
 // +stateify savable
 type DynamicBytesFile struct {
 	InodeAttrs
+	InodeNoStatFS
 	InodeNoopRefCount
 	InodeNotDirectory
 	InodeNotSymlink
diff --git a/pkg/sentry/fsimpl/kernfs/filesystem.go b/pkg/sentry/fsimpl/kernfs/filesystem.go
index e5d6b5c35c..0e30116899 100644
--- a/pkg/sentry/fsimpl/kernfs/filesystem.go
+++ b/pkg/sentry/fsimpl/kernfs/filesystem.go
@@ -721,14 +721,13 @@ func (fs *Filesystem) StatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vf
 // StatFSAt implements vfs.FilesystemImpl.StatFSAt.
 func (fs *Filesystem) StatFSAt(ctx context.Context, rp *vfs.ResolvingPath) (linux.Statfs, error) {
 	fs.mu.RLock()
-	_, _, err := fs.walkExistingLocked(ctx, rp)
+	_, inode, err := fs.walkExistingLocked(ctx, rp)
 	fs.mu.RUnlock()
 	fs.processDeferredDecRefs(ctx)
 	if err != nil {
 		return linux.Statfs{}, err
 	}
-	// TODO(gvisor.dev/issue/1193): actually implement statfs.
-	return linux.Statfs{}, syserror.ENOSYS
+	return inode.StatFS(ctx, fs.VFSFilesystem())
 }
 
 // SymlinkAt implements vfs.FilesystemImpl.SymlinkAt.
diff --git a/pkg/sentry/fsimpl/kernfs/inode_impl_util.go b/pkg/sentry/fsimpl/kernfs/inode_impl_util.go
index f442a56060..c0b863ba4d 100644
--- a/pkg/sentry/fsimpl/kernfs/inode_impl_util.go
+++ b/pkg/sentry/fsimpl/kernfs/inode_impl_util.go
@@ -546,12 +546,13 @@ func (InodeSymlink) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.D
 //
 // +stateify savable
 type StaticDirectory struct {
-	StaticDirectoryRefs
-	InodeNotSymlink
-	InodeDirectoryNoNewChildren
 	InodeAttrs
+	InodeDirectoryNoNewChildren
 	InodeNoDynamicLookup
+	InodeNoStatFS
+	InodeNotSymlink
 	OrderedChildren
+	StaticDirectoryRefs
 
 	locks  vfs.FileLocks
 	fdOpts GenericDirectoryFDOptions
@@ -609,3 +610,12 @@ type AlwaysValid struct{}
 func (*AlwaysValid) Valid(context.Context) bool {
 	return true
 }
+
+// InodeNoStatFS partially implements the Inode interface, where the client
+// filesystem doesn't support statfs(2).
+type InodeNoStatFS struct{}
+
+// StatFS implements Inode.StatFS.
+func (*InodeNoStatFS) StatFS(context.Context, *vfs.Filesystem) (linux.Statfs, error) {
+	return linux.Statfs{}, syserror.ENOSYS
+}
diff --git a/pkg/sentry/fsimpl/kernfs/kernfs.go b/pkg/sentry/fsimpl/kernfs/kernfs.go
index ca36858005..88fcd54aa6 100644
--- a/pkg/sentry/fsimpl/kernfs/kernfs.go
+++ b/pkg/sentry/fsimpl/kernfs/kernfs.go
@@ -320,6 +320,11 @@ type Inode interface {
 	// Precondition: rp.Done(). vfsd.Impl() must be the kernfs Dentry containing
 	// the inode on which Open() is being called.
 	Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error)
+
+	// StatFS returns filesystem statistics for the client filesystem. This
+	// corresponds to vfs.FilesystemImpl.StatFSAt. If the client filesystem
+	// doesn't support statfs(2), this should return ENOSYS.
+	StatFS(ctx context.Context, fs *vfs.Filesystem) (linux.Statfs, error)
 }
 
 type inodeRefs interface {
diff --git a/pkg/sentry/fsimpl/kernfs/kernfs_test.go b/pkg/sentry/fsimpl/kernfs/kernfs_test.go
index e376d1736a..675587c6b9 100644
--- a/pkg/sentry/fsimpl/kernfs/kernfs_test.go
+++ b/pkg/sentry/fsimpl/kernfs/kernfs_test.go
@@ -98,9 +98,10 @@ func (*attrs) SetStat(context.Context, *vfs.Filesystem, *auth.Credentials, vfs.S
 type readonlyDir struct {
 	readonlyDirRefs
 	attrs
-	kernfs.InodeNotSymlink
-	kernfs.InodeNoDynamicLookup
 	kernfs.InodeDirectoryNoNewChildren
+	kernfs.InodeNoDynamicLookup
+	kernfs.InodeNoStatFS
+	kernfs.InodeNotSymlink
 	kernfs.OrderedChildren
 
 	locks vfs.FileLocks
@@ -137,9 +138,10 @@ func (d *readonlyDir) DecRef(context.Context) {
 type dir struct {
 	dirRefs
 	attrs
-	kernfs.InodeNotSymlink
 	kernfs.InodeNoDynamicLookup
+	kernfs.InodeNotSymlink
 	kernfs.OrderedChildren
+	kernfs.InodeNoStatFS
 
 	locks vfs.FileLocks
 
diff --git a/pkg/sentry/fsimpl/kernfs/symlink.go b/pkg/sentry/fsimpl/kernfs/symlink.go
index 2ab3f53fdf..64731a3e4a 100644
--- a/pkg/sentry/fsimpl/kernfs/symlink.go
+++ b/pkg/sentry/fsimpl/kernfs/symlink.go
@@ -28,6 +28,7 @@ type StaticSymlink struct {
 	InodeAttrs
 	InodeNoopRefCount
 	InodeSymlink
+	InodeNoStatFS
 
 	target string
 }
diff --git a/pkg/sentry/fsimpl/pipefs/pipefs.go b/pkg/sentry/fsimpl/pipefs/pipefs.go
index 2ca793db93..7053ad6db2 100644
--- a/pkg/sentry/fsimpl/pipefs/pipefs.go
+++ b/pkg/sentry/fsimpl/pipefs/pipefs.go
@@ -143,14 +143,16 @@ func (i *inode) SetStat(ctx context.Context, vfsfs *vfs.Filesystem, creds *auth.
 	return syserror.EPERM
 }
 
-// TODO(gvisor.dev/issue/1193): kernfs does not provide a way to implement
-// statfs, from which we should indicate PIPEFS_MAGIC.
-
 // Open implements kernfs.Inode.Open.
 func (i *inode) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
 	return i.pipe.Open(ctx, rp.Mount(), vfsd, opts.Flags, &i.locks)
 }
 
+// StatFS implements kernfs.Inode.StatFS.
+func (i *inode) StatFS(ctx context.Context, fs *vfs.Filesystem) (linux.Statfs, error) {
+	return vfs.GenericStatFS(linux.PIPEFS_MAGIC), nil
+}
+
 // NewConnectedPipeFDs returns a pair of FileDescriptions representing the read
 // and write ends of a newly-created pipe, as for pipe(2) and pipe2(2).
 //
diff --git a/pkg/sentry/fsimpl/proc/filesystem.go b/pkg/sentry/fsimpl/proc/filesystem.go
index c350ec1271..03b5941b90 100644
--- a/pkg/sentry/fsimpl/proc/filesystem.go
+++ b/pkg/sentry/fsimpl/proc/filesystem.go
@@ -121,3 +121,10 @@ func newStaticDir(creds *auth.Credentials, devMajor, devMinor uint32, ino uint64
 type InternalData struct {
 	Cgroups map[string]string
 }
+
+type implStatFS struct{}
+
+// StatFS implements kernfs.Inode.StatFS.
+func (*implStatFS) StatFS(context.Context, *vfs.Filesystem) (linux.Statfs, error) {
+	return vfs.GenericStatFS(linux.PROC_SUPER_MAGIC), nil
+}
diff --git a/pkg/sentry/fsimpl/proc/subtasks.go b/pkg/sentry/fsimpl/proc/subtasks.go
index 01c0efb3a4..d57d94dbc0 100644
--- a/pkg/sentry/fsimpl/proc/subtasks.go
+++ b/pkg/sentry/fsimpl/proc/subtasks.go
@@ -31,12 +31,13 @@ import (
 //
 // +stateify savable
 type subtasksInode struct {
-	subtasksInodeRefs
-	kernfs.InodeNotSymlink
-	kernfs.InodeDirectoryNoNewChildren
+	implStatFS
+	kernfs.AlwaysValid
 	kernfs.InodeAttrs
+	kernfs.InodeDirectoryNoNewChildren
+	kernfs.InodeNotSymlink
 	kernfs.OrderedChildren
-	kernfs.AlwaysValid
+	subtasksInodeRefs
 
 	locks vfs.FileLocks
 
diff --git a/pkg/sentry/fsimpl/proc/task.go b/pkg/sentry/fsimpl/proc/task.go
index 66b557abd7..dbdb5d929c 100644
--- a/pkg/sentry/fsimpl/proc/task.go
+++ b/pkg/sentry/fsimpl/proc/task.go
@@ -32,12 +32,13 @@ import (
 //
 // +stateify savable
 type taskInode struct {
-	taskInodeRefs
-	kernfs.InodeNotSymlink
+	implStatFS
+	kernfs.InodeAttrs
 	kernfs.InodeDirectoryNoNewChildren
 	kernfs.InodeNoDynamicLookup
-	kernfs.InodeAttrs
+	kernfs.InodeNotSymlink
 	kernfs.OrderedChildren
+	taskInodeRefs
 
 	locks vfs.FileLocks
 
diff --git a/pkg/sentry/fsimpl/proc/task_fds.go b/pkg/sentry/fsimpl/proc/task_fds.go
index 0527b2de8f..3f0d784613 100644
--- a/pkg/sentry/fsimpl/proc/task_fds.go
+++ b/pkg/sentry/fsimpl/proc/task_fds.go
@@ -100,13 +100,14 @@ func (i *fdDir) IterDirents(ctx context.Context, cb vfs.IterDirentsCallback, off
 //
 // +stateify savable
 type fdDirInode struct {
+	fdDir
 	fdDirInodeRefs
-	kernfs.InodeNotSymlink
-	kernfs.InodeDirectoryNoNewChildren
+	implStatFS
+	kernfs.AlwaysValid
 	kernfs.InodeAttrs
+	kernfs.InodeDirectoryNoNewChildren
+	kernfs.InodeNotSymlink
 	kernfs.OrderedChildren
-	kernfs.AlwaysValid
-	fdDir
 }
 
 var _ kernfs.Inode = (*fdDirInode)(nil)
@@ -185,6 +186,7 @@ func (i *fdDirInode) DecRef(context.Context) {
 //
 // +stateify savable
 type fdSymlink struct {
+	implStatFS
 	kernfs.InodeAttrs
 	kernfs.InodeNoopRefCount
 	kernfs.InodeSymlink
@@ -233,13 +235,14 @@ func (s *fdSymlink) Getlink(ctx context.Context, mnt *vfs.Mount) (vfs.VirtualDen
 //
 // +stateify savable
 type fdInfoDirInode struct {
+	fdDir
 	fdInfoDirInodeRefs
-	kernfs.InodeNotSymlink
-	kernfs.InodeDirectoryNoNewChildren
+	implStatFS
+	kernfs.AlwaysValid
 	kernfs.InodeAttrs
+	kernfs.InodeDirectoryNoNewChildren
+	kernfs.InodeNotSymlink
 	kernfs.OrderedChildren
-	kernfs.AlwaysValid
-	fdDir
 }
 
 var _ kernfs.Inode = (*fdInfoDirInode)(nil)
diff --git a/pkg/sentry/fsimpl/proc/task_files.go b/pkg/sentry/fsimpl/proc/task_files.go
index 830b789496..356036b9b9 100644
--- a/pkg/sentry/fsimpl/proc/task_files.go
+++ b/pkg/sentry/fsimpl/proc/task_files.go
@@ -648,6 +648,7 @@ func (o *oomScoreAdj) Write(ctx context.Context, src usermem.IOSequence, offset
 //
 // +stateify savable
 type exeSymlink struct {
+	implStatFS
 	kernfs.InodeAttrs
 	kernfs.InodeNoopRefCount
 	kernfs.InodeSymlink
@@ -832,6 +833,7 @@ func (s *namespaceSymlink) Getlink(ctx context.Context, mnt *vfs.Mount) (vfs.Vir
 // namespaceInode is a synthetic inode created to represent a namespace in
 // /proc/[pid]/ns/*.
 type namespaceInode struct {
+	implStatFS
 	kernfs.InodeAttrs
 	kernfs.InodeNoopRefCount
 	kernfs.InodeNotDirectory
diff --git a/pkg/sentry/fsimpl/proc/tasks.go b/pkg/sentry/fsimpl/proc/tasks.go
index 863c4467ef..3ea00ab874 100644
--- a/pkg/sentry/fsimpl/proc/tasks.go
+++ b/pkg/sentry/fsimpl/proc/tasks.go
@@ -37,12 +37,13 @@ const (
 //
 // +stateify savable
 type tasksInode struct {
-	tasksInodeRefs
-	kernfs.InodeNotSymlink
-	kernfs.InodeDirectoryNoNewChildren
+	implStatFS
+	kernfs.AlwaysValid
 	kernfs.InodeAttrs
+	kernfs.InodeDirectoryNoNewChildren
+	kernfs.InodeNotSymlink
 	kernfs.OrderedChildren
-	kernfs.AlwaysValid
+	tasksInodeRefs
 
 	locks vfs.FileLocks
 
diff --git a/pkg/sentry/fsimpl/proc/tasks_files.go b/pkg/sentry/fsimpl/proc/tasks_files.go
index 7d8983aa5b..8c41729e4b 100644
--- a/pkg/sentry/fsimpl/proc/tasks_files.go
+++ b/pkg/sentry/fsimpl/proc/tasks_files.go
@@ -32,6 +32,7 @@ import (
 )
 
 type selfSymlink struct {
+	implStatFS
 	kernfs.InodeAttrs
 	kernfs.InodeNoopRefCount
 	kernfs.InodeSymlink
@@ -74,6 +75,7 @@ func (*selfSymlink) SetStat(context.Context, *vfs.Filesystem, *auth.Credentials,
 }
 
 type threadSelfSymlink struct {
+	implStatFS
 	kernfs.InodeAttrs
 	kernfs.InodeNoopRefCount
 	kernfs.InodeSymlink
diff --git a/pkg/sentry/fsimpl/sockfs/sockfs.go b/pkg/sentry/fsimpl/sockfs/sockfs.go
index c61818ff6f..94a9985686 100644
--- a/pkg/sentry/fsimpl/sockfs/sockfs.go
+++ b/pkg/sentry/fsimpl/sockfs/sockfs.go
@@ -81,10 +81,10 @@ func (fs *filesystem) PrependPath(ctx context.Context, vfsroot, vd vfs.VirtualDe
 
 // inode implements kernfs.Inode.
 type inode struct {
-	kernfs.InodeNotDirectory
-	kernfs.InodeNotSymlink
 	kernfs.InodeAttrs
 	kernfs.InodeNoopRefCount
+	kernfs.InodeNotDirectory
+	kernfs.InodeNotSymlink
 }
 
 // Open implements kernfs.Inode.Open.
@@ -92,6 +92,11 @@ func (i *inode) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentr
 	return nil, syserror.ENXIO
 }
 
+// StatFS implements kernfs.Inode.StatFS.
+func (i *inode) StatFS(ctx context.Context, fs *vfs.Filesystem) (linux.Statfs, error) {
+	return vfs.GenericStatFS(linux.SOCKFS_MAGIC), nil
+}
+
 // NewDentry constructs and returns a sockfs dentry.
 //
 // Preconditions: mnt.Filesystem() must have been returned by NewFilesystem().
diff --git a/pkg/sentry/fsimpl/sys/kcov.go b/pkg/sentry/fsimpl/sys/kcov.go
index 92710d8775..73f3d3309f 100644
--- a/pkg/sentry/fsimpl/sys/kcov.go
+++ b/pkg/sentry/fsimpl/sys/kcov.go
@@ -39,8 +39,9 @@ func (fs *filesystem) newKcovFile(ctx context.Context, creds *auth.Credentials)
 type kcovInode struct {
 	kernfs.InodeAttrs
 	kernfs.InodeNoopRefCount
-	kernfs.InodeNotSymlink
 	kernfs.InodeNotDirectory
+	kernfs.InodeNotSymlink
+	implStatFS
 }
 
 func (i *kcovInode) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
diff --git a/pkg/sentry/fsimpl/sys/sys.go b/pkg/sentry/fsimpl/sys/sys.go
index ea30a4ec2f..39952d2d01 100644
--- a/pkg/sentry/fsimpl/sys/sys.go
+++ b/pkg/sentry/fsimpl/sys/sys.go
@@ -163,9 +163,16 @@ func (d *dir) DecRef(context.Context) {
 	d.dirRefs.DecRef(d.Destroy)
 }
 
+// StatFS implements kernfs.Inode.StatFS.
+func (d *dir) StatFS(ctx context.Context, fs *vfs.Filesystem) (linux.Statfs, error) {
+	return vfs.GenericStatFS(linux.SYSFS_MAGIC), nil
+}
+
 // cpuFile implements kernfs.Inode.
 type cpuFile struct {
+	implStatFS
 	kernfs.DynamicBytesFile
+
 	maxCores uint
 }
 
@@ -182,3 +189,10 @@ func (fs *filesystem) newCPUFile(creds *auth.Credentials, maxCores uint, mode li
 	d.Init(c)
 	return d
 }
+
+type implStatFS struct{}
+
+// StatFS implements kernfs.Inode.StatFS.
+func (*implStatFS) StatFS(context.Context, *vfs.Filesystem) (linux.Statfs, error) {
+	return vfs.GenericStatFS(linux.SYSFS_MAGIC), nil
+}
diff --git a/pkg/sentry/vfs/filesystem_impl_util.go b/pkg/sentry/vfs/filesystem_impl_util.go
index 465e610e06..2620cf9751 100644
--- a/pkg/sentry/vfs/filesystem_impl_util.go
+++ b/pkg/sentry/vfs/filesystem_impl_util.go
@@ -16,6 +16,9 @@ package vfs
 
 import (
 	"strings"
+
+	"gvisor.dev/gvisor/pkg/abi/linux"
+	"gvisor.dev/gvisor/pkg/usermem"
 )
 
 // GenericParseMountOptions parses a comma-separated list of options of the
@@ -41,3 +44,13 @@ func GenericParseMountOptions(str string) map[string]string {
 	}
 	return m
 }
+
+// GenericStatFS returns a statfs struct filled with the common fields for a
+// general filesystem. This is analogous to Linux's fs/libfs.cs:simple_statfs().
+func GenericStatFS(fsMagic uint64) linux.Statfs {
+	return linux.Statfs{
+		Type:       fsMagic,
+		BlockSize:  usermem.PageSize,
+		NameLength: linux.NAME_MAX,
+	}
+}
diff --git a/test/syscalls/BUILD b/test/syscalls/BUILD
index 65e8299c3e..f949bc0e3b 100644
--- a/test/syscalls/BUILD
+++ b/test/syscalls/BUILD
@@ -803,6 +803,7 @@ syscall_test(
 syscall_test(
     add_overlay = True,
     test = "//test/syscalls/linux:statfs_test",
+    use_tmpfs = True,  # Test specifically relies on TEST_TMPDIR to be tmpfs.
 )
 
 syscall_test(
diff --git a/test/syscalls/linux/pipe.cc b/test/syscalls/linux/pipe.cc
index 34291850d6..c097c9187d 100644
--- a/test/syscalls/linux/pipe.cc
+++ b/test/syscalls/linux/pipe.cc
@@ -13,7 +13,9 @@
 // limitations under the License.
 
 #include <fcntl.h> /* Obtain O_* constant definitions */
+#include <linux/magic.h>
 #include <sys/ioctl.h>
+#include <sys/statfs.h>
 #include <sys/uio.h>
 #include <unistd.h>
 
@@ -198,6 +200,16 @@ TEST_P(PipeTest, NonBlocking) {
               SyscallFailsWithErrno(EWOULDBLOCK));
 }
 
+TEST(PipeTest, StatFS) {
+  int fds[2];
+  ASSERT_THAT(pipe(fds), SyscallSucceeds());
+  struct statfs st;
+  EXPECT_THAT(fstatfs(fds[0], &st), SyscallSucceeds());
+  EXPECT_EQ(st.f_type, PIPEFS_MAGIC);
+  EXPECT_EQ(st.f_bsize, getpagesize());
+  EXPECT_EQ(st.f_namelen, NAME_MAX);
+}
+
 TEST(Pipe2Test, CloExec) {
   int fds[2];
   ASSERT_THAT(pipe2(fds, O_CLOEXEC), SyscallSucceeds());
diff --git a/test/syscalls/linux/proc.cc b/test/syscalls/linux/proc.cc
index d6b875dbff..b73189e551 100644
--- a/test/syscalls/linux/proc.cc
+++ b/test/syscalls/linux/proc.cc
@@ -16,6 +16,7 @@
 #include <errno.h>
 #include <fcntl.h>
 #include <limits.h>
+#include <linux/magic.h>
 #include <sched.h>
 #include <signal.h>
 #include <stddef.h>
@@ -26,6 +27,7 @@
 #include <sys/mman.h>
 #include <sys/prctl.h>
 #include <sys/stat.h>
+#include <sys/statfs.h>
 #include <sys/utsname.h>
 #include <syscall.h>
 #include <unistd.h>
@@ -2159,6 +2161,18 @@ TEST(Proc, PidTidIOAccounting) {
   noop.Join();
 }
 
+TEST(Proc, Statfs) {
+  struct statfs st;
+  EXPECT_THAT(statfs("/proc", &st), SyscallSucceeds());
+  if (IsRunningWithVFS1()) {
+    EXPECT_EQ(st.f_type, ANON_INODE_FS_MAGIC);
+  } else {
+    EXPECT_EQ(st.f_type, PROC_SUPER_MAGIC);
+  }
+  EXPECT_EQ(st.f_bsize, getpagesize());
+  EXPECT_EQ(st.f_namelen, NAME_MAX);
+}
+
 }  // namespace
 }  // namespace testing
 }  // namespace gvisor
diff --git a/test/syscalls/linux/socket.cc b/test/syscalls/linux/socket.cc
index c20cd3fccc..e680d3dd71 100644
--- a/test/syscalls/linux/socket.cc
+++ b/test/syscalls/linux/socket.cc
@@ -14,6 +14,7 @@
 
 #include <sys/socket.h>
 #include <sys/stat.h>
+#include <sys/statfs.h>
 #include <sys/types.h>
 #include <unistd.h>
 
@@ -26,6 +27,9 @@
 namespace gvisor {
 namespace testing {
 
+// From linux/magic.h, but we can't depend on linux headers here.
+#define SOCKFS_MAGIC 0x534F434B
+
 TEST(SocketTest, UnixSocketPairProtocol) {
   int socks[2];
   ASSERT_THAT(socketpair(AF_UNIX, SOCK_STREAM, PF_UNIX, socks),
@@ -94,6 +98,19 @@ TEST(SocketTest, UnixSocketStat) {
   }
 }
 
+TEST(SocketTest, UnixSocketStatFS) {
+  SKIP_IF(IsRunningWithVFS1());
+
+  FileDescriptor bound =
+      ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_UNIX, SOCK_STREAM, PF_UNIX));
+
+  struct statfs st;
+  EXPECT_THAT(fstatfs(bound.get(), &st), SyscallSucceeds());
+  EXPECT_EQ(st.f_type, SOCKFS_MAGIC);
+  EXPECT_EQ(st.f_bsize, getpagesize());
+  EXPECT_EQ(st.f_namelen, NAME_MAX);
+}
+
 using SocketOpenTest = ::testing::TestWithParam<int>;
 
 // UDS cannot be opened.
diff --git a/test/syscalls/linux/statfs.cc b/test/syscalls/linux/statfs.cc
index aca51d30f5..49f2f156c7 100644
--- a/test/syscalls/linux/statfs.cc
+++ b/test/syscalls/linux/statfs.cc
@@ -13,6 +13,7 @@
 // limitations under the License.
 
 #include <fcntl.h>
+#include <linux/magic.h>
 #include <sys/statfs.h>
 #include <unistd.h>
 
@@ -26,6 +27,10 @@ namespace testing {
 
 namespace {
 
+// From linux/magic.h. For some reason, not defined in the headers for some
+// build environments.
+#define OVERLAYFS_SUPER_MAGIC 0x794c7630
+
 TEST(StatfsTest, CannotStatBadPath) {
   auto temp_file = NewTempAbsPathInDir("/tmp");
 
@@ -38,19 +43,18 @@ TEST(StatfsTest, InternalTmpfs) {
 
   struct statfs st;
   EXPECT_THAT(statfs(temp_file.path().c_str(), &st), SyscallSucceeds());
+  // Note: We could be an overlay or goferfs on some configurations.
+  EXPECT_TRUE(st.f_type == TMPFS_MAGIC || st.f_type == OVERLAYFS_SUPER_MAGIC ||
+              st.f_type == V9FS_MAGIC);
 }
 
 TEST(StatfsTest, InternalDevShm) {
   struct statfs st;
   EXPECT_THAT(statfs("/dev/shm", &st), SyscallSucceeds());
-}
-
-TEST(StatfsTest, NameLen) {
-  struct statfs st;
-  EXPECT_THAT(statfs("/dev/shm", &st), SyscallSucceeds());
 
   // This assumes that /dev/shm is tmpfs.
-  EXPECT_EQ(st.f_namelen, NAME_MAX);
+  // Note: We could be an overlay on some configurations.
+  EXPECT_TRUE(st.f_type == TMPFS_MAGIC || st.f_type == OVERLAYFS_SUPER_MAGIC);
 }
 
 TEST(FstatfsTest, CannotStatBadFd) {

From aaae7109d23cc9a97aea27efcf6f541a594eddf4 Mon Sep 17 00:00:00 2001
From: Ghanan Gowripalan <ghanan@google.com>
Date: Fri, 28 Aug 2020 14:37:53 -0700
Subject: [PATCH 112/211] Don't bind loopback to all IPs in an IPv6 subnet

An earlier change considered the loopback bound to all addresses in an
assigned subnet. This should have only be done for IPv4 to maintain
compatability with Linux:

```
$ ip addr show dev lo
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN group ...
link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
inet 127.0.0.1/8 scope host lo
valid_lft forever preferred_lft forever
inet6 ::1/128 scope host
valid_lft forever preferred_lft forever
$ ping 2001:db8::1
PING 2001:db8::1(2001:db8::1) 56 data bytes
^C
--- 2001:db8::1 ping statistics ---
4 packets transmitted, 0 received, 100% packet loss, time 3062ms

$ ping 2001:db8::2
PING 2001:db8::2(2001:db8::2) 56 data bytes
^C
--- 2001:db8::2 ping statistics ---
3 packets transmitted, 0 received, 100% packet loss, time 2030ms

$ sudo ip addr add 2001:db8::1/64 dev lo
$ ping 2001:db8::1
PING 2001:db8::1(2001:db8::1) 56 data bytes
64 bytes from 2001:db8::1: icmp_seq=1 ttl=64 time=0.055 ms
64 bytes from 2001:db8::1: icmp_seq=2 ttl=64 time=0.074 ms
64 bytes from 2001:db8::1: icmp_seq=3 ttl=64 time=0.073 ms
64 bytes from 2001:db8::1: icmp_seq=4 ttl=64 time=0.071 ms
^C
--- 2001:db8::1 ping statistics ---
4 packets transmitted, 4 received, 0% packet loss, time 3075ms
rtt min/avg/max/mdev = 0.055/0.068/0.074/0.007 ms
$ ping 2001:db8::2
PING 2001:db8::2(2001:db8::2) 56 data bytes
From 2001:db8::1 icmp_seq=1 Destination unreachable: No route
From 2001:db8::1 icmp_seq=2 Destination unreachable: No route
From 2001:db8::1 icmp_seq=3 Destination unreachable: No route
From 2001:db8::1 icmp_seq=4 Destination unreachable: No route
^C
--- 2001:db8::2 ping statistics ---
4 packets transmitted, 0 received, +4 errors, 100% packet loss, time 3070ms
```

Test: integration_test.TestLoopbackAcceptAllInSubnet
PiperOrigin-RevId: 329011566
---
 pkg/tcpip/stack/nic.go                        |  6 +-
 pkg/tcpip/tests/integration/loopback_test.go  | 40 -------------
 test/syscalls/linux/BUILD                     | 19 +-----
 .../socket_ip_udp_unbound_netlink_util.cc     | 58 -------------------
 .../socket_ip_udp_unbound_netlink_util.h      | 34 -----------
 .../linux/socket_ipv4_udp_unbound_netlink.cc  | 32 +++++++++-
 .../linux/socket_ipv4_udp_unbound_netlink.h   |  4 +-
 .../linux/socket_ipv6_udp_unbound_netlink.cc  | 28 +++------
 .../linux/socket_ipv6_udp_unbound_netlink.h   |  4 +-
 9 files changed, 49 insertions(+), 176 deletions(-)
 delete mode 100644 test/syscalls/linux/socket_ip_udp_unbound_netlink_util.cc
 delete mode 100644 test/syscalls/linux/socket_ip_udp_unbound_netlink_util.h

diff --git a/pkg/tcpip/stack/nic.go b/pkg/tcpip/stack/nic.go
index 8e700990dd..863ef6bee2 100644
--- a/pkg/tcpip/stack/nic.go
+++ b/pkg/tcpip/stack/nic.go
@@ -676,10 +676,10 @@ func (n *NIC) getRefOrCreateTemp(protocol tcpip.NetworkProtocolNumber, address t
 	}
 
 	// A usable reference was not found, create a temporary one if requested by
-	// the caller or if the address is found in the NIC's subnets and the NIC is
-	// a loopback interface.
+	// the caller or if the IPv4 address is found in the NIC's subnets and the NIC
+	// is a loopback interface.
 	createTempEP := spoofingOrPromiscuous
-	if !createTempEP && n.isLoopback() {
+	if !createTempEP && n.isLoopback() && protocol == header.IPv4ProtocolNumber {
 		for _, r := range n.mu.endpoints {
 			addr := r.addrWithPrefix()
 			subnet := addr.Subnet()
diff --git a/pkg/tcpip/tests/integration/loopback_test.go b/pkg/tcpip/tests/integration/loopback_test.go
index 3a2f758377..1b18023c56 100644
--- a/pkg/tcpip/tests/integration/loopback_test.go
+++ b/pkg/tcpip/tests/integration/loopback_test.go
@@ -109,52 +109,12 @@ func TestLoopbackAcceptAllInSubnet(t *testing.T) {
 			dstAddr:    ipv6Addr.Address,
 			expectRx:   true,
 		},
-		{
-			name:       "IPv6 bind to wildcard and send to assigned address",
-			addAddress: ipv6ProtocolAddress,
-			dstAddr:    ipv6Addr.Address,
-			expectRx:   true,
-		},
 		{
 			name:       "IPv6 bind to wildcard and send to other subnet-local address",
 			addAddress: ipv6ProtocolAddress,
 			dstAddr:    otherIPv6Address,
-			expectRx:   true,
-		},
-		{
-			name:       "IPv6 bind to wildcard send to other address",
-			addAddress: ipv6ProtocolAddress,
-			dstAddr:    remoteIPv6Addr,
-			expectRx:   false,
-		},
-		{
-			name:       "IPv6 bind to other subnet-local address and send to assigned address",
-			addAddress: ipv6ProtocolAddress,
-			bindAddr:   otherIPv6Address,
-			dstAddr:    ipv6Addr.Address,
-			expectRx:   false,
-		},
-		{
-			name:       "IPv6 bind and send to other subnet-local address",
-			addAddress: ipv6ProtocolAddress,
-			bindAddr:   otherIPv6Address,
-			dstAddr:    otherIPv6Address,
-			expectRx:   true,
-		},
-		{
-			name:       "IPv6 bind to assigned address and send to other subnet-local address",
-			addAddress: ipv6ProtocolAddress,
-			bindAddr:   ipv6Addr.Address,
-			dstAddr:    otherIPv6Address,
 			expectRx:   false,
 		},
-		{
-			name:       "IPv6 bind and send to assigned address",
-			addAddress: ipv6ProtocolAddress,
-			bindAddr:   ipv6Addr.Address,
-			dstAddr:    ipv6Addr.Address,
-			expectRx:   true,
-		},
 	}
 
 	for _, test := range tests {
diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD
index 5a323d3319..fad3be7bf2 100644
--- a/test/syscalls/linux/BUILD
+++ b/test/syscalls/linux/BUILD
@@ -2416,21 +2416,6 @@ cc_library(
     alwayslink = 1,
 )
 
-cc_library(
-    name = "socket_ip_udp_unbound_netlink_test_utils",
-    testonly = 1,
-    srcs = [
-        "socket_ip_udp_unbound_netlink_util.cc",
-    ],
-    hdrs = [
-        "socket_ip_udp_unbound_netlink_util.h",
-    ],
-    deps = [
-        ":socket_test_util",
-    ],
-    alwayslink = 1,
-)
-
 cc_library(
     name = "socket_ipv4_udp_unbound_netlink_test_cases",
     testonly = 1,
@@ -2441,8 +2426,8 @@ cc_library(
         "socket_ipv4_udp_unbound_netlink.h",
     ],
     deps = [
-        ":socket_ip_udp_unbound_netlink_test_utils",
         ":socket_netlink_route_util",
+        ":socket_test_util",
         "//test/util:capability_util",
         gtest,
     ],
@@ -2459,8 +2444,8 @@ cc_library(
         "socket_ipv6_udp_unbound_netlink.h",
     ],
     deps = [
-        ":socket_ip_udp_unbound_netlink_test_utils",
         ":socket_netlink_route_util",
+        ":socket_test_util",
         "//test/util:capability_util",
         gtest,
     ],
diff --git a/test/syscalls/linux/socket_ip_udp_unbound_netlink_util.cc b/test/syscalls/linux/socket_ip_udp_unbound_netlink_util.cc
deleted file mode 100644
index 13ffafde72..0000000000
--- a/test/syscalls/linux/socket_ip_udp_unbound_netlink_util.cc
+++ /dev/null
@@ -1,58 +0,0 @@
-// Copyright 2020 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "test/syscalls/linux/socket_ip_udp_unbound_netlink_util.h"
-
-namespace gvisor {
-namespace testing {
-
-const size_t kSendBufSize = 200;
-
-void IPUDPUnboundSocketNetlinkTest::TestSendRecv(TestAddress sender_addr,
-                                                 TestAddress receiver_addr) {
-  auto snd_sock = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
-  auto rcv_sock = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
-
-  EXPECT_THAT(
-      bind(snd_sock->get(), reinterpret_cast<sockaddr*>(&sender_addr.addr),
-           sender_addr.addr_len),
-      SyscallSucceeds());
-
-  EXPECT_THAT(
-      bind(rcv_sock->get(), reinterpret_cast<sockaddr*>(&receiver_addr.addr),
-           receiver_addr.addr_len),
-      SyscallSucceeds());
-  socklen_t receiver_addr_len = receiver_addr.addr_len;
-  ASSERT_THAT(getsockname(rcv_sock->get(),
-                          reinterpret_cast<sockaddr*>(&receiver_addr.addr),
-                          &receiver_addr_len),
-              SyscallSucceeds());
-  EXPECT_EQ(receiver_addr_len, receiver_addr.addr_len);
-  char send_buf[kSendBufSize];
-  RandomizeBuffer(send_buf, kSendBufSize);
-  EXPECT_THAT(
-      RetryEINTR(sendto)(snd_sock->get(), send_buf, kSendBufSize, 0,
-                         reinterpret_cast<sockaddr*>(&receiver_addr.addr),
-                         receiver_addr.addr_len),
-      SyscallSucceedsWithValue(kSendBufSize));
-
-  // Check that we received the packet.
-  char recv_buf[kSendBufSize] = {};
-  ASSERT_THAT(RetryEINTR(recv)(rcv_sock->get(), recv_buf, kSendBufSize, 0),
-              SyscallSucceedsWithValue(kSendBufSize));
-  EXPECT_EQ(0, memcmp(send_buf, recv_buf, kSendBufSize));
-}
-
-}  // namespace testing
-}  // namespace gvisor
diff --git a/test/syscalls/linux/socket_ip_udp_unbound_netlink_util.h b/test/syscalls/linux/socket_ip_udp_unbound_netlink_util.h
deleted file mode 100644
index 157fb0939b..0000000000
--- a/test/syscalls/linux/socket_ip_udp_unbound_netlink_util.h
+++ /dev/null
@@ -1,34 +0,0 @@
-// Copyright 2020 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef GVISOR_TEST_SYSCALLS_LINUX_SOCKET_IP_UDP_UNBOUND_NETLINK_UTIL_H_
-#define GVISOR_TEST_SYSCALLS_LINUX_SOCKET_IP_UDP_UNBOUND_NETLINK_UTIL_H_
-
-#include "test/syscalls/linux/socket_test_util.h"
-
-namespace gvisor {
-namespace testing {
-
-// Test fixture for tests that apply to IP UDP sockets.
-class IPUDPUnboundSocketNetlinkTest : public SimpleSocketTest {
- public:
-  // TestSendRecv tests sending and receiving a UDP packet from |sender_addr| to
-  // |receiver_addr|.
-  void TestSendRecv(TestAddress sender_addr, TestAddress receiver_addr);
-};
-
-}  // namespace testing
-}  // namespace gvisor
-
-#endif  // GVISOR_TEST_SYSCALLS_LINUX_SOCKET_IP_UDP_UNBOUND_NETLINK_UTIL_H_
diff --git a/test/syscalls/linux/socket_ipv4_udp_unbound_netlink.cc b/test/syscalls/linux/socket_ipv4_udp_unbound_netlink.cc
index 696fbb1898..79eb48afa5 100644
--- a/test/syscalls/linux/socket_ipv4_udp_unbound_netlink.cc
+++ b/test/syscalls/linux/socket_ipv4_udp_unbound_netlink.cc
@@ -23,6 +23,8 @@
 namespace gvisor {
 namespace testing {
 
+constexpr size_t kSendBufSize = 200;
+
 // Checks that the loopback interface considers itself bound to all IPs in an
 // associated subnet.
 TEST_P(IPv4UDPUnboundSocketNetlinkTest, JoinSubnet) {
@@ -35,6 +37,9 @@ TEST_P(IPv4UDPUnboundSocketNetlinkTest, JoinSubnet) {
   EXPECT_NO_ERRNO(LinkAddLocalAddr(loopback_link.index, AF_INET,
                                    /*prefixlen=*/24, &addr, sizeof(addr)));
 
+  auto snd_sock = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+  auto rcv_sock = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
   // Send from an unassigned address but an address that is in the subnet
   // associated with the loopback interface.
   TestAddress sender_addr("V4NotAssignd1");
@@ -43,6 +48,10 @@ TEST_P(IPv4UDPUnboundSocketNetlinkTest, JoinSubnet) {
   EXPECT_EQ(1, inet_pton(AF_INET, "192.0.2.2",
                          &(reinterpret_cast<sockaddr_in*>(&sender_addr.addr)
                                ->sin_addr.s_addr)));
+  EXPECT_THAT(
+      bind(snd_sock->get(), reinterpret_cast<sockaddr*>(&sender_addr.addr),
+           sender_addr.addr_len),
+      SyscallSucceeds());
 
   // Send the packet to an unassigned address but an address that is in the
   // subnet associated with the loopback interface.
@@ -52,8 +61,29 @@ TEST_P(IPv4UDPUnboundSocketNetlinkTest, JoinSubnet) {
   EXPECT_EQ(1, inet_pton(AF_INET, "192.0.2.254",
                          &(reinterpret_cast<sockaddr_in*>(&receiver_addr.addr)
                                ->sin_addr.s_addr)));
+  EXPECT_THAT(
+      bind(rcv_sock->get(), reinterpret_cast<sockaddr*>(&receiver_addr.addr),
+           receiver_addr.addr_len),
+      SyscallSucceeds());
+  socklen_t receiver_addr_len = receiver_addr.addr_len;
+  ASSERT_THAT(getsockname(rcv_sock->get(),
+                          reinterpret_cast<sockaddr*>(&receiver_addr.addr),
+                          &receiver_addr_len),
+              SyscallSucceeds());
+  EXPECT_EQ(receiver_addr_len, receiver_addr.addr_len);
+  char send_buf[kSendBufSize];
+  RandomizeBuffer(send_buf, kSendBufSize);
+  EXPECT_THAT(
+      RetryEINTR(sendto)(snd_sock->get(), send_buf, kSendBufSize, 0,
+                         reinterpret_cast<sockaddr*>(&receiver_addr.addr),
+                         receiver_addr.addr_len),
+      SyscallSucceedsWithValue(kSendBufSize));
 
-  TestSendRecv(sender_addr, receiver_addr);
+  // Check that we received the packet.
+  char recv_buf[kSendBufSize] = {};
+  ASSERT_THAT(RetryEINTR(recv)(rcv_sock->get(), recv_buf, kSendBufSize, 0),
+              SyscallSucceedsWithValue(kSendBufSize));
+  EXPECT_EQ(0, memcmp(send_buf, recv_buf, kSendBufSize));
 }
 
 }  // namespace testing
diff --git a/test/syscalls/linux/socket_ipv4_udp_unbound_netlink.h b/test/syscalls/linux/socket_ipv4_udp_unbound_netlink.h
index fcfb3318e2..73e7836d50 100644
--- a/test/syscalls/linux/socket_ipv4_udp_unbound_netlink.h
+++ b/test/syscalls/linux/socket_ipv4_udp_unbound_netlink.h
@@ -15,13 +15,13 @@
 #ifndef GVISOR_TEST_SYSCALLS_LINUX_SOCKET_IPV4_UDP_UNBOUND_NETLINK_UTIL_H_
 #define GVISOR_TEST_SYSCALLS_LINUX_SOCKET_IPV4_UDP_UNBOUND_NETLINK_UTIL_H_
 
-#include "test/syscalls/linux/socket_ip_udp_unbound_netlink_util.h"
+#include "test/syscalls/linux/socket_test_util.h"
 
 namespace gvisor {
 namespace testing {
 
 // Test fixture for tests that apply to IPv4 UDP sockets.
-using IPv4UDPUnboundSocketNetlinkTest = IPUDPUnboundSocketNetlinkTest;
+using IPv4UDPUnboundSocketNetlinkTest = SimpleSocketTest;
 
 }  // namespace testing
 }  // namespace gvisor
diff --git a/test/syscalls/linux/socket_ipv6_udp_unbound_netlink.cc b/test/syscalls/linux/socket_ipv6_udp_unbound_netlink.cc
index 539a4ec551..2ee218231e 100644
--- a/test/syscalls/linux/socket_ipv6_udp_unbound_netlink.cc
+++ b/test/syscalls/linux/socket_ipv6_udp_unbound_netlink.cc
@@ -23,13 +23,10 @@
 namespace gvisor {
 namespace testing {
 
-// Checks that the loopback interface considers itself bound to all IPs in an
-// associated subnet.
+// Checks that the loopback interface does not consider itself bound to all IPs
+// in an associated subnet.
 TEST_P(IPv6UDPUnboundSocketNetlinkTest, JoinSubnet) {
-  // TODO(b/166440211): Only run this test on gvisor or remove if the loopback
-  // interface should not consider itself bound to all IPs in an IPv6 subnet.
-  SKIP_IF(!IsRunningOnGvisor() ||
-          !ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_ADMIN)));
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_ADMIN)));
 
   // Add an IP address to the loopback interface.
   Link loopback_link = ASSERT_NO_ERRNO_AND_VALUE(LoopbackLink());
@@ -38,25 +35,18 @@ TEST_P(IPv6UDPUnboundSocketNetlinkTest, JoinSubnet) {
   EXPECT_NO_ERRNO(LinkAddLocalAddr(loopback_link.index, AF_INET6,
                                    /*prefixlen=*/64, &addr, sizeof(addr)));
 
-  // Send from an unassigned address but an address that is in the subnet
-  // associated with the loopback interface.
+  // Binding to an unassigned address but an address that is in the subnet
+  // associated with the loopback interface should fail.
   TestAddress sender_addr("V6NotAssignd1");
   sender_addr.addr.ss_family = AF_INET6;
   sender_addr.addr_len = sizeof(sockaddr_in6);
   EXPECT_EQ(1, inet_pton(AF_INET6, "2001:db8::2",
                          reinterpret_cast<sockaddr_in6*>(&sender_addr.addr)
                              ->sin6_addr.s6_addr));
-
-  // Send the packet to an unassigned address but an address that is in the
-  // subnet associated with the loopback interface.
-  TestAddress receiver_addr("V6NotAssigned2");
-  receiver_addr.addr.ss_family = AF_INET6;
-  receiver_addr.addr_len = sizeof(sockaddr_in6);
-  EXPECT_EQ(1, inet_pton(AF_INET6, "2001:db8::ffff:ffff:ffff:ffff",
-                         reinterpret_cast<sockaddr_in6*>(&receiver_addr.addr)
-                             ->sin6_addr.s6_addr));
-
-  TestSendRecv(sender_addr, receiver_addr);
+  auto sock = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+  EXPECT_THAT(bind(sock->get(), reinterpret_cast<sockaddr*>(&sender_addr.addr),
+                   sender_addr.addr_len),
+              SyscallFailsWithErrno(EADDRNOTAVAIL));
 }
 
 }  // namespace testing
diff --git a/test/syscalls/linux/socket_ipv6_udp_unbound_netlink.h b/test/syscalls/linux/socket_ipv6_udp_unbound_netlink.h
index 6a2b0a5be4..88098be820 100644
--- a/test/syscalls/linux/socket_ipv6_udp_unbound_netlink.h
+++ b/test/syscalls/linux/socket_ipv6_udp_unbound_netlink.h
@@ -15,13 +15,13 @@
 #ifndef GVISOR_TEST_SYSCALLS_LINUX_SOCKET_IPV6_UDP_UNBOUND_NETLINK_UTIL_H_
 #define GVISOR_TEST_SYSCALLS_LINUX_SOCKET_IPV6_UDP_UNBOUND_NETLINK_UTIL_H_
 
-#include "test/syscalls/linux/socket_ip_udp_unbound_netlink_util.h"
+#include "test/syscalls/linux/socket_test_util.h"
 
 namespace gvisor {
 namespace testing {
 
 // Test fixture for tests that apply to IPv6 UDP sockets.
-using IPv6UDPUnboundSocketNetlinkTest = IPUDPUnboundSocketNetlinkTest;
+using IPv6UDPUnboundSocketNetlinkTest = SimpleSocketTest;
 
 }  // namespace testing
 }  // namespace gvisor

From 1444327cd1a5cf9a570cf3ff37c0814e54d31daa Mon Sep 17 00:00:00 2001
From: Tamir Duberstein <tamird@google.com>
Date: Fri, 28 Aug 2020 16:43:24 -0700
Subject: [PATCH 113/211] Include command output on error

Currently the logs produce

  TestOne: packetimpact_test.go:182: listing devices on ... container: process terminated with status: 126

which is not actionable; presumably the `ip` command output is interesting.

PiperOrigin-RevId: 329032105
---
 test/packetimpact/runner/packetimpact_test.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/packetimpact/runner/packetimpact_test.go b/test/packetimpact/runner/packetimpact_test.go
index e8c183977c..cb9bfd5b73 100644
--- a/test/packetimpact/runner/packetimpact_test.go
+++ b/test/packetimpact/runner/packetimpact_test.go
@@ -369,11 +369,11 @@ func createDockerNetwork(ctx context.Context, n *dockerutil.Network) error {
 func deviceByIP(ctx context.Context, d *dockerutil.Container, ip net.IP) (string, netdevs.DeviceInfo, error) {
 	out, err := d.Exec(ctx, dockerutil.ExecOpts{}, "ip", "addr", "show")
 	if err != nil {
-		return "", netdevs.DeviceInfo{}, fmt.Errorf("listing devices on %s container: %w", d.Name, err)
+		return "", netdevs.DeviceInfo{}, fmt.Errorf("listing devices on %s container: %w\n%s", d.Name, err, out)
 	}
 	devs, err := netdevs.ParseDevices(out)
 	if err != nil {
-		return "", netdevs.DeviceInfo{}, fmt.Errorf("parsing devices from %s container: %w", d.Name, err)
+		return "", netdevs.DeviceInfo{}, fmt.Errorf("parsing devices from %s container: %w\n%s", d.Name, err, out)
 	}
 	testDevice, deviceInfo, err := netdevs.FindDeviceByIP(ip, devs)
 	if err != nil {

From fef6124b9dfa1e5c86e7b7b8c20f039d24291992 Mon Sep 17 00:00:00 2001
From: Nicolas Lacasse <nlacasse@google.com>
Date: Fri, 28 Aug 2020 17:18:43 -0700
Subject: [PATCH 114/211] Fix kernfs.Dentry reference leak.

PiperOrigin-RevId: 329036994
---
 pkg/sentry/socket/netstack/netstack_vfs2.go | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pkg/sentry/socket/netstack/netstack_vfs2.go b/pkg/sentry/socket/netstack/netstack_vfs2.go
index 59fa4c58f0..1f7d17f5fc 100644
--- a/pkg/sentry/socket/netstack/netstack_vfs2.go
+++ b/pkg/sentry/socket/netstack/netstack_vfs2.go
@@ -56,6 +56,7 @@ func NewVFS2(t *kernel.Task, family int, skType linux.SockType, protocol int, qu
 
 	mnt := t.Kernel().SocketMount()
 	d := sockfs.NewDentry(t.Credentials(), mnt)
+	defer d.DecRef(t)
 
 	s := &SocketVFS2{
 		socketOpsCommon: socketOpsCommon{

From 56a948dba6beca969d34b8581cd03acf21bd7a1d Mon Sep 17 00:00:00 2001
From: Ian Lewis <ianlewis@google.com>
Date: Fri, 28 Aug 2020 18:07:14 -0700
Subject: [PATCH 115/211] Add code search badge

PiperOrigin-RevId: 329042549
---
 README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.md b/README.md
index ed9e0e92b2..0a79e2cff4 100644
--- a/README.md
+++ b/README.md
@@ -2,6 +2,7 @@
 
 ![](https://github.com/google/gvisor/workflows/Build/badge.svg)
 [![gVisor chat](https://badges.gitter.im/gvisor/community.png)](https://gitter.im/gvisor/community)
+[![code search](https://img.shields.io/badge/code-search-blue)](https://cs.opensource.google/gvisor/gvisor)
 
 ## What is gVisor?
 

From bf4f4fceafcbd672f7dbbd4cdde0ff78fe216403 Mon Sep 17 00:00:00 2001
From: Rahat Mahmood <rahat@google.com>
Date: Mon, 31 Aug 2020 11:56:49 -0700
Subject: [PATCH 116/211] Run syscall tests in uts namespaces.

Some syscall tests, namely uname_test_* modify the host and domain
name, which modifies the execution environment and can have unintended
consequences on other tests. For example, modifying the hostname
causes some networking tests to fail DNS lookups. Run all syscall
tests in their own uts namespaces to isolate these changes.

PiperOrigin-RevId: 329348127
---
 test/runner/runner.go | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/test/runner/runner.go b/test/runner/runner.go
index 5ac91310df..22d535f8dc 100644
--- a/test/runner/runner.go
+++ b/test/runner/runner.go
@@ -106,11 +106,14 @@ func runTestCaseNative(testBin string, tc gtest.TestCase, t *testing.T) {
 	cmd.Env = env
 	cmd.Stdout = os.Stdout
 	cmd.Stderr = os.Stderr
+	cmd.SysProcAttr = &syscall.SysProcAttr{}
+
+	if specutils.HasCapabilities(capability.CAP_SYS_ADMIN) {
+		cmd.SysProcAttr.Cloneflags |= syscall.CLONE_NEWUTS
+	}
 
 	if specutils.HasCapabilities(capability.CAP_NET_ADMIN) {
-		cmd.SysProcAttr = &syscall.SysProcAttr{
-			Cloneflags: syscall.CLONE_NEWNET,
-		}
+		cmd.SysProcAttr.Cloneflags |= syscall.CLONE_NEWNET
 	}
 
 	if err := cmd.Run(); err != nil {

From 661c6bbb180129f2a81484005571233df6da16d2 Mon Sep 17 00:00:00 2001
From: Ting-Yu Wang <anivia@google.com>
Date: Mon, 31 Aug 2020 12:01:46 -0700
Subject: [PATCH 117/211] stateify: Bring back struct field and type names in
 pretty print

PiperOrigin-RevId: 329349158
---
 pkg/state/pretty/pretty.go | 103 ++++++++++++++++++++++++-------------
 1 file changed, 66 insertions(+), 37 deletions(-)

diff --git a/pkg/state/pretty/pretty.go b/pkg/state/pretty/pretty.go
index 1375fcc38d..887f453a90 100644
--- a/pkg/state/pretty/pretty.go
+++ b/pkg/state/pretty/pretty.go
@@ -26,12 +26,17 @@ import (
 	"gvisor.dev/gvisor/pkg/state/wire"
 )
 
-func formatRef(x *wire.Ref, graph uint64, html bool) string {
+type printer struct {
+	html      bool
+	typeSpecs map[string]*wire.Type
+}
+
+func (p *printer) formatRef(x *wire.Ref, graph uint64) string {
 	baseRef := fmt.Sprintf("g%dr%d", graph, x.Root)
 	fullRef := baseRef
 	if len(x.Dots) > 0 {
 		// See wire.Ref; Type valid if Dots non-zero.
-		typ, _ := formatType(x.Type, graph, html)
+		typ, _ := p.formatType(x.Type, graph)
 		var buf strings.Builder
 		buf.WriteString("(*")
 		buf.WriteString(typ)
@@ -51,34 +56,40 @@ func formatRef(x *wire.Ref, graph uint64, html bool) string {
 		buf.WriteString(")")
 		fullRef = buf.String()
 	}
-	if html {
+	if p.html {
 		return fmt.Sprintf("<a href=\"#%s\">%s</a>", baseRef, fullRef)
 	}
 	return fullRef
 }
 
-func formatType(t wire.TypeSpec, graph uint64, html bool) (string, bool) {
+func (p *printer) formatType(t wire.TypeSpec, graph uint64) (string, bool) {
 	switch x := t.(type) {
 	case wire.TypeID:
-		base := fmt.Sprintf("g%dt%d", graph, x)
-		if html {
-			return fmt.Sprintf("<a href=\"#%s\">%s</a>", base, base), true
+		tag := fmt.Sprintf("g%dt%d", graph, x)
+		desc := tag
+		if spec, ok := p.typeSpecs[tag]; ok {
+			desc += fmt.Sprintf("=%s", spec.Name)
+		} else {
+			desc += "!missing-type-spec"
+		}
+		if p.html {
+			return fmt.Sprintf("<a href=\"#%s\">%s</a>", tag, desc), true
 		}
-		return fmt.Sprintf("%s", base), true
+		return desc, true
 	case wire.TypeSpecNil:
 		return "", false // Only nil type.
 	case *wire.TypeSpecPointer:
-		element, _ := formatType(x.Type, graph, html)
+		element, _ := p.formatType(x.Type, graph)
 		return fmt.Sprintf("(*%s)", element), true
 	case *wire.TypeSpecArray:
-		element, _ := formatType(x.Type, graph, html)
+		element, _ := p.formatType(x.Type, graph)
 		return fmt.Sprintf("[%d](%s)", x.Count, element), true
 	case *wire.TypeSpecSlice:
-		element, _ := formatType(x.Type, graph, html)
+		element, _ := p.formatType(x.Type, graph)
 		return fmt.Sprintf("([]%s)", element), true
 	case *wire.TypeSpecMap:
-		key, _ := formatType(x.Key, graph, html)
-		value, _ := formatType(x.Value, graph, html)
+		key, _ := p.formatType(x.Key, graph)
+		value, _ := p.formatType(x.Value, graph)
 		return fmt.Sprintf("(map[%s]%s)", key, value), true
 	default:
 		panic(fmt.Sprintf("unreachable: unknown type %T", t))
@@ -87,7 +98,7 @@ func formatType(t wire.TypeSpec, graph uint64, html bool) (string, bool) {
 
 // format formats a single object, for pretty-printing. It also returns whether
 // the value is a non-zero value.
-func format(graph uint64, depth int, encoded wire.Object, html bool) (string, bool) {
+func (p *printer) format(graph uint64, depth int, encoded wire.Object) (string, bool) {
 	switch x := encoded.(type) {
 	case wire.Nil:
 		return "nil", false
@@ -98,7 +109,7 @@ func format(graph uint64, depth int, encoded wire.Object, html bool) (string, bo
 	case *wire.Complex128:
 		return fmt.Sprintf("%f+%fi", real(*x), imag(*x)), *x != 0.0
 	case *wire.Ref:
-		return formatRef(x, graph, html), x.Root != 0
+		return p.formatRef(x, graph), x.Root != 0
 	case *wire.Type:
 		tabs := "\n" + strings.Repeat("\t", depth)
 		items := make([]string, 0, len(x.Fields)+2)
@@ -109,7 +120,7 @@ func format(graph uint64, depth int, encoded wire.Object, html bool) (string, bo
 		items = append(items, "}")
 		return strings.Join(items, tabs), true // No zero value.
 	case *wire.Slice:
-		return fmt.Sprintf("%s{len:%d,cap:%d}", formatRef(&x.Ref, graph, html), x.Length, x.Capacity), x.Capacity != 0
+		return fmt.Sprintf("%s{len:%d,cap:%d}", p.formatRef(&x.Ref, graph), x.Length, x.Capacity), x.Capacity != 0
 	case *wire.Array:
 		if len(x.Contents) == 0 {
 			return "[]", false
@@ -119,7 +130,7 @@ func format(graph uint64, depth int, encoded wire.Object, html bool) (string, bo
 		items = append(items, "[")
 		tabs := "\n" + strings.Repeat("\t", depth)
 		for i := 0; i < len(x.Contents); i++ {
-			item, ok := format(graph, depth+1, x.Contents[i], html)
+			item, ok := p.format(graph, depth+1, x.Contents[i])
 			if !ok {
 				zeros = append(zeros, fmt.Sprintf("\t%s,", item))
 				continue
@@ -136,7 +147,9 @@ func format(graph uint64, depth int, encoded wire.Object, html bool) (string, bo
 		items = append(items, "]")
 		return strings.Join(items, tabs), len(zeros) < len(x.Contents)
 	case *wire.Struct:
-		typ, _ := formatType(x.TypeID, graph, html)
+		tag := fmt.Sprintf("g%dt%d", graph, x.TypeID)
+		spec, _ := p.typeSpecs[tag]
+		typ, _ := p.formatType(x.TypeID, graph)
 		if x.Fields() == 0 {
 			return fmt.Sprintf("struct[%s]{}", typ), false
 		}
@@ -145,9 +158,15 @@ func format(graph uint64, depth int, encoded wire.Object, html bool) (string, bo
 		tabs := "\n" + strings.Repeat("\t", depth)
 		allZero := true
 		for i := 0; i < x.Fields(); i++ {
-			element, ok := format(graph, depth+1, *x.Field(i), html)
+			var name string
+			if spec != nil && i < len(spec.Fields) {
+				name = spec.Fields[i]
+			} else {
+				name = fmt.Sprintf("%d", i)
+			}
+			element, ok := p.format(graph, depth+1, *x.Field(i))
 			allZero = allZero && !ok
-			items = append(items, fmt.Sprintf("\t%d: %s,", i, element))
+			items = append(items, fmt.Sprintf("\t%s: %s,", name, element))
 		}
 		items = append(items, "}")
 		return strings.Join(items, tabs), !allZero
@@ -159,15 +178,15 @@ func format(graph uint64, depth int, encoded wire.Object, html bool) (string, bo
 		items = append(items, "map{")
 		tabs := "\n" + strings.Repeat("\t", depth)
 		for i := 0; i < len(x.Keys); i++ {
-			key, _ := format(graph, depth+1, x.Keys[i], html)
-			value, _ := format(graph, depth+1, x.Values[i], html)
+			key, _ := p.format(graph, depth+1, x.Keys[i])
+			value, _ := p.format(graph, depth+1, x.Values[i])
 			items = append(items, fmt.Sprintf("\t%s: %s,", key, value))
 		}
 		items = append(items, "}")
 		return strings.Join(items, tabs), true
 	case *wire.Interface:
-		typ, typOk := formatType(x.Type, graph, html)
-		element, elementOk := format(graph, depth+1, x.Value, html)
+		typ, typOk := p.formatType(x.Type, graph)
+		element, elementOk := p.format(graph, depth+1, x.Value)
 		return fmt.Sprintf("interface[%s]{%s}", typ, element), typOk || elementOk
 	default:
 		// Must be a primitive; use reflection.
@@ -176,11 +195,11 @@ func format(graph uint64, depth int, encoded wire.Object, html bool) (string, bo
 }
 
 // printStream is the basic print implementation.
-func printStream(w io.Writer, r wire.Reader, html bool) (err error) {
+func (p *printer) printStream(w io.Writer, r wire.Reader) (err error) {
 	// current graph ID.
 	var graph uint64
 
-	if html {
+	if p.html {
 		fmt.Fprintf(w, "<pre>")
 		defer fmt.Fprintf(w, "</pre>")
 	}
@@ -195,6 +214,8 @@ func printStream(w io.Writer, r wire.Reader, html bool) (err error) {
 		}
 	}()
 
+	p.typeSpecs = make(map[string]*wire.Type)
+
 	for {
 		// Find the first object to begin generation.
 		length, object, err := state.ReadHeader(r)
@@ -222,18 +243,19 @@ func printStream(w io.Writer, r wire.Reader, html bool) (err error) {
 		// loop in decode.go. But we don't register type information,
 		// etc. and just print the raw structures.
 		var (
-			oid uint64 = 1
-			tid uint64 = 1
+			tid     uint64 = 1
+			objects []wire.Object
 		)
-		for oid <= length {
+		for oid := uint64(1); oid <= length; {
 			// Unmarshal the object.
 			encoded := wire.Load(r)
 
 			// Is this a type?
-			if _, ok := encoded.(*wire.Type); ok {
-				str, _ := format(graph, 0, encoded, html)
+			if typ, ok := encoded.(*wire.Type); ok {
+				str, _ := p.format(graph, 0, encoded)
 				tag := fmt.Sprintf("g%dt%d", graph, tid)
-				if html {
+				p.typeSpecs[tag] = typ
+				if p.html {
 					// See below.
 					tag = fmt.Sprintf("<a name=\"%s\">%s</a><a href=\"#%s\">&#9875;</a>", tag, tag, tag)
 				}
@@ -244,17 +266,24 @@ func printStream(w io.Writer, r wire.Reader, html bool) (err error) {
 				continue
 			}
 
+			// Otherwise, it is a node.
+			objects = append(objects, encoded)
+			oid++
+		}
+
+		for i, encoded := range objects {
+			// oid starts at 1.
+			oid := i + 1
 			// Format the node.
-			str, _ := format(graph, 0, encoded, html)
+			str, _ := p.format(graph, 0, encoded)
 			tag := fmt.Sprintf("g%dr%d", graph, oid)
-			if html {
+			if p.html {
 				// Create a little tag with an anchor next to it for linking.
 				tag = fmt.Sprintf("<a name=\"%s\">%s</a><a href=\"#%s\">&#9875;</a>", tag, tag, tag)
 			}
 			if _, err := fmt.Fprintf(w, "%s = %s\n", tag, str); err != nil {
 				return err
 			}
-			oid++
 		}
 	}
 
@@ -263,10 +292,10 @@ func printStream(w io.Writer, r wire.Reader, html bool) (err error) {
 
 // PrintText reads the stream from r and prints text to w.
 func PrintText(w io.Writer, r wire.Reader) error {
-	return printStream(w, r, false /* html */)
+	return (&printer{}).printStream(w, r)
 }
 
 // PrintHTML reads the stream from r and prints html to w.
 func PrintHTML(w io.Writer, r wire.Reader) error {
-	return printStream(w, r, true /* html */)
+	return (&printer{html: true}).printStream(w, r)
 }

From 1b879d8276c39dca6a43b656df9224e21b8b80e1 Mon Sep 17 00:00:00 2001
From: gVisor bot <gvisor-bot@google.com>
Date: Mon, 31 Aug 2020 12:50:31 -0700
Subject: [PATCH 118/211] Implement walk in gvisor verity fs

Implement walk directories in gvisor verity file system. For each step,
the child dentry is verified against a verified parent root hash.

PiperOrigin-RevId: 329358747
---
 pkg/merkletree/merkletree.go           |   6 +
 pkg/sentry/fsimpl/verity/BUILD         |   1 +
 pkg/sentry/fsimpl/verity/filesystem.go | 405 ++++++++++++++++++++++++-
 pkg/sentry/fsimpl/verity/verity.go     |  19 ++
 4 files changed, 425 insertions(+), 6 deletions(-)

diff --git a/pkg/merkletree/merkletree.go b/pkg/merkletree/merkletree.go
index 1a0477c6a3..36832ec86a 100644
--- a/pkg/merkletree/merkletree.go
+++ b/pkg/merkletree/merkletree.go
@@ -29,6 +29,12 @@ const (
 	sha256DigestSize = 32
 )
 
+// DigestSize returns the size (in bytes) of a digest.
+// TODO(b/156980949): Allow config other hash methods (SHA384/SHA512).
+func DigestSize() int {
+	return sha256DigestSize
+}
+
 // Layout defines the scale of a Merkle tree.
 type Layout struct {
 	// blockSize is the size of a data block to be hashed.
diff --git a/pkg/sentry/fsimpl/verity/BUILD b/pkg/sentry/fsimpl/verity/BUILD
index 28d2a4bcba..326c4ed902 100644
--- a/pkg/sentry/fsimpl/verity/BUILD
+++ b/pkg/sentry/fsimpl/verity/BUILD
@@ -13,6 +13,7 @@ go_library(
         "//pkg/abi/linux",
         "//pkg/context",
         "//pkg/fspath",
+        "//pkg/merkletree",
         "//pkg/sentry/fs/lock",
         "//pkg/sentry/kernel/auth",
         "//pkg/sentry/socket/unix/transport",
diff --git a/pkg/sentry/fsimpl/verity/filesystem.go b/pkg/sentry/fsimpl/verity/filesystem.go
index 78c6074bd0..0e17dbddc4 100644
--- a/pkg/sentry/fsimpl/verity/filesystem.go
+++ b/pkg/sentry/fsimpl/verity/filesystem.go
@@ -15,9 +15,15 @@
 package verity
 
 import (
+	"bytes"
+	"fmt"
+	"io"
+	"strconv"
+
 	"gvisor.dev/gvisor/pkg/abi/linux"
 	"gvisor.dev/gvisor/pkg/context"
 	"gvisor.dev/gvisor/pkg/fspath"
+	"gvisor.dev/gvisor/pkg/merkletree"
 	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
 	"gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
 	"gvisor.dev/gvisor/pkg/sentry/vfs"
@@ -91,10 +97,366 @@ func (fs *filesystem) renameMuUnlockAndCheckDrop(ctx context.Context, ds **[]*de
 	putDentrySlice(*ds)
 }
 
-// resolveLocked resolves rp to an existing file.
-func (fs *filesystem) resolveLocked(ctx context.Context, rp *vfs.ResolvingPath, ds **[]*dentry) (*dentry, error) {
-	// TODO(b/159261227): Implement resolveLocked.
-	return nil, nil
+// stepLocked resolves rp.Component() to an existing file, starting from the
+// given directory.
+//
+// Dentries which may have a reference count of zero, and which therefore
+// should be dropped once traversal is complete, are appended to ds.
+//
+// Preconditions: fs.renameMu must be locked. d.dirMu must be locked.
+// !rp.Done().
+func (fs *filesystem) stepLocked(ctx context.Context, rp *vfs.ResolvingPath, d *dentry, mayFollowSymlinks bool, ds **[]*dentry) (*dentry, error) {
+	if !d.isDir() {
+		return nil, syserror.ENOTDIR
+	}
+
+	if err := d.checkPermissions(rp.Credentials(), vfs.MayExec); err != nil {
+		return nil, err
+	}
+
+afterSymlink:
+	name := rp.Component()
+	if name == "." {
+		rp.Advance()
+		return d, nil
+	}
+	if name == ".." {
+		if isRoot, err := rp.CheckRoot(ctx, &d.vfsd); err != nil {
+			return nil, err
+		} else if isRoot || d.parent == nil {
+			rp.Advance()
+			return d, nil
+		}
+		if err := rp.CheckMount(ctx, &d.parent.vfsd); err != nil {
+			return nil, err
+		}
+		rp.Advance()
+		return d.parent, nil
+	}
+	child, err := fs.getChildLocked(ctx, d, name, ds)
+	if err != nil {
+		return nil, err
+	}
+	if err := rp.CheckMount(ctx, &child.vfsd); err != nil {
+		return nil, err
+	}
+	if child.isSymlink() && mayFollowSymlinks && rp.ShouldFollowSymlink() {
+		target, err := child.readlink(ctx)
+		if err != nil {
+			return nil, err
+		}
+		if err := rp.HandleSymlink(target); err != nil {
+			return nil, err
+		}
+		goto afterSymlink // don't check the current directory again
+	}
+	rp.Advance()
+	return child, nil
+}
+
+// verifyChild verifies the root hash of child against the already verified
+// root hash of the parent to ensure the child is expected.  verifyChild
+// triggers a sentry panic if unexpected modifications to the file system are
+// detected. In noCrashOnVerificationFailure mode it returns a syserror
+// instead.
+// Preconditions: fs.renameMu must be locked. d.dirMu must be locked.
+// TODO(b/166474175): Investigate all possible errors returned in this
+// function, and make sure we differentiate all errors that indicate unexpected
+// modifications to the file system from the ones that are not harmful.
+func (fs *filesystem) verifyChild(ctx context.Context, parent *dentry, child *dentry) (*dentry, error) {
+	vfsObj := fs.vfsfs.VirtualFilesystem()
+
+	// Get the path to the child dentry. This is only used to provide path
+	// information in failure case.
+	childPath, err := vfsObj.PathnameWithDeleted(ctx, child.fs.rootDentry.lowerVD, child.lowerVD)
+	if err != nil {
+		return nil, err
+	}
+
+	verityMu.RLock()
+	defer verityMu.RUnlock()
+	// Read the offset of the child from the extended attributes of the
+	// corresponding Merkle tree file.
+	// This is the offset of the root hash for child in its parent's Merkle
+	// tree file.
+	off, err := vfsObj.GetxattrAt(ctx, fs.creds, &vfs.PathOperation{
+		Root:  child.lowerMerkleVD,
+		Start: child.lowerMerkleVD,
+	}, &vfs.GetxattrOptions{
+		Name: merkleOffsetInParentXattr,
+		// Offset is a 32 bit integer.
+		Size: sizeOfInt32,
+	})
+
+	// The Merkle tree file for the child should have been created and
+	// contains the expected xattrs. If the file or the xattr does not
+	// exist, it indicates unexpected modifications to the file system.
+	if err == syserror.ENOENT || err == syserror.ENODATA {
+		if noCrashOnVerificationFailure {
+			return nil, err
+		}
+		panic(fmt.Sprintf("Failed to get xattr %s for %s: %v", merkleOffsetInParentXattr, childPath, err))
+	}
+	if err != nil {
+		return nil, err
+	}
+	// The offset xattr should be an integer. If it's not, it indicates
+	// unexpected modifications to the file system.
+	offset, err := strconv.Atoi(off)
+	if err != nil {
+		if noCrashOnVerificationFailure {
+			return nil, syserror.EINVAL
+		}
+		panic(fmt.Sprintf("Failed to convert xattr %s for %s to int: %v", merkleOffsetInParentXattr, childPath, err))
+	}
+
+	// Open parent Merkle tree file to read and verify child's root hash.
+	parentMerkleFD, err := vfsObj.OpenAt(ctx, fs.creds, &vfs.PathOperation{
+		Root:  parent.lowerMerkleVD,
+		Start: parent.lowerMerkleVD,
+	}, &vfs.OpenOptions{
+		Flags: linux.O_RDONLY,
+	})
+
+	// The parent Merkle tree file should have been created. If it's
+	// missing, it indicates an unexpected modification to the file system.
+	if err == syserror.ENOENT {
+		if noCrashOnVerificationFailure {
+			return nil, err
+		}
+		panic(fmt.Sprintf("Failed to open parent Merkle file for %s: %v", childPath, err))
+	}
+	if err != nil {
+		return nil, err
+	}
+
+	// dataSize is the size of raw data for the Merkle tree. For a file,
+	// dataSize is the size of the whole file. For a directory, dataSize is
+	// the size of all its children's root hashes.
+	dataSize, err := parentMerkleFD.Getxattr(ctx, &vfs.GetxattrOptions{
+		Name: merkleSizeXattr,
+		Size: sizeOfInt32,
+	})
+
+	// The Merkle tree file for the child should have been created and
+	// contains the expected xattrs. If the file or the xattr does not
+	// exist, it indicates unexpected modifications to the file system.
+	if err == syserror.ENOENT || err == syserror.ENODATA {
+		if noCrashOnVerificationFailure {
+			return nil, err
+		}
+		panic(fmt.Sprintf("Failed to get xattr %s for %s: %v", merkleSizeXattr, childPath, err))
+	}
+	if err != nil {
+		return nil, err
+	}
+
+	// The dataSize xattr should be an integer. If it's not, it indicates
+	// unexpected modifications to the file system.
+	parentSize, err := strconv.Atoi(dataSize)
+	if err != nil {
+		if noCrashOnVerificationFailure {
+			return nil, syserror.EINVAL
+		}
+		panic(fmt.Sprintf("Failed to convert xattr %s for %s to int: %v", merkleSizeXattr, childPath, err))
+	}
+
+	fdReader := vfs.FileReadWriteSeeker{
+		FD:  parentMerkleFD,
+		Ctx: ctx,
+	}
+
+	// Since we are verifying against a directory Merkle tree, buf should
+	// contain the root hash of the children in the parent Merkle tree when
+	// Verify returns with success.
+	var buf bytes.Buffer
+	if err := merkletree.Verify(&buf, &fdReader, &fdReader, int64(parentSize), int64(offset), int64(merkletree.DigestSize()), parent.rootHash, true /* dataAndTreeInSameFile */); err != nil && err != io.EOF {
+		if noCrashOnVerificationFailure {
+			return nil, syserror.EIO
+		}
+		panic(fmt.Sprintf("Verification for %s failed: %v", childPath, err))
+	}
+
+	// Cache child root hash when it's verified the first time.
+	if len(child.rootHash) == 0 {
+		child.rootHash = buf.Bytes()
+	}
+	return child, nil
+}
+
+// Preconditions: fs.renameMu must be locked. d.dirMu must be locked.
+func (fs *filesystem) getChildLocked(ctx context.Context, parent *dentry, name string, ds **[]*dentry) (*dentry, error) {
+	if child, ok := parent.children[name]; ok {
+		// If enabling verification on files/directories is not allowed
+		// during runtime, all cached children are already verified. If
+		// runtime enable is allowed and the parent directory is
+		// enabled, we should verify the child root hash here because
+		// it may be cached before enabled.
+		if fs.allowRuntimeEnable && len(parent.rootHash) != 0 {
+			if _, err := fs.verifyChild(ctx, parent, child); err != nil {
+				return nil, err
+			}
+		}
+		return child, nil
+	}
+	child, err := fs.lookupAndVerifyLocked(ctx, parent, name)
+	if err != nil {
+		return nil, err
+	}
+	if parent.children == nil {
+		parent.children = make(map[string]*dentry)
+	}
+	parent.children[name] = child
+	// child's refcount is initially 0, so it may be dropped after traversal.
+	*ds = appendDentry(*ds, child)
+	return child, nil
+}
+
+// Preconditions: fs.renameMu must be locked. parent.dirMu must be locked.
+func (fs *filesystem) lookupAndVerifyLocked(ctx context.Context, parent *dentry, name string) (*dentry, error) {
+	vfsObj := fs.vfsfs.VirtualFilesystem()
+
+	childFilename := fspath.Parse(name)
+	childVD, childErr := vfsObj.GetDentryAt(ctx, fs.creds, &vfs.PathOperation{
+		Root:  parent.lowerVD,
+		Start: parent.lowerVD,
+		Path:  childFilename,
+	}, &vfs.GetDentryOptions{})
+
+	// We will handle ENOENT separately, as it may indicate unexpected
+	// modifications to the file system, and may cause a sentry panic.
+	if childErr != nil && childErr != syserror.ENOENT {
+		return nil, childErr
+	}
+
+	// The dentry needs to be cleaned up if any error occurs. IncRef will be
+	// called if a verity child dentry is successfully created.
+	if childErr == nil {
+		defer childVD.DecRef(ctx)
+	}
+
+	childMerkleFilename := merklePrefix + name
+	childMerkleVD, childMerkleErr := vfsObj.GetDentryAt(ctx, fs.creds, &vfs.PathOperation{
+		Root:  parent.lowerVD,
+		Start: parent.lowerVD,
+		Path:  fspath.Parse(childMerkleFilename),
+	}, &vfs.GetDentryOptions{})
+
+	// We will handle ENOENT separately, as it may indicate unexpected
+	// modifications to the file system, and may cause a sentry panic.
+	if childMerkleErr != nil && childMerkleErr != syserror.ENOENT {
+		return nil, childMerkleErr
+	}
+
+	// The dentry needs to be cleaned up if any error occurs. IncRef will be
+	// called if a verity child dentry is successfully created.
+	if childMerkleErr == nil {
+		defer childMerkleVD.DecRef(ctx)
+	}
+
+	// Get the path to the parent dentry. This is only used to provide path
+	// information in failure case.
+	parentPath, err := vfsObj.PathnameWithDeleted(ctx, parent.fs.rootDentry.lowerVD, parent.lowerVD)
+	if err != nil {
+		return nil, err
+	}
+
+	// TODO(b/166474175): Investigate all possible errors of childErr and
+	// childMerkleErr, and make sure we differentiate all errors that
+	// indicate unexpected modifications to the file system from the ones
+	// that are not harmful.
+	if childErr == syserror.ENOENT && childMerkleErr == nil {
+		// Failed to get child file/directory dentry. However the
+		// corresponding Merkle tree is found. This indicates an
+		// unexpected modification to the file system that
+		// removed/renamed the child.
+		if noCrashOnVerificationFailure {
+			return nil, childErr
+		}
+		panic(fmt.Sprintf("Target file %s is expected but missing", parentPath+"/"+name))
+	} else if childErr == nil && childMerkleErr == syserror.ENOENT {
+		// If in allowRuntimeEnable mode, and the Merkle tree file is
+		// not created yet, we create an empty Merkle tree file, so that
+		// if the file is enabled through ioctl, we have the Merkle tree
+		// file open and ready to use.
+		// This may cause empty and unused Merkle tree files in
+		// allowRuntimeEnable mode, if they are never enabled. This
+		// does not affect verification, as we rely on cached root hash
+		// to decide whether to perform verification, not the existence
+		// of the Merkle tree file. Also, those Merkle tree files are
+		// always hidden and cannot be accessed by verity fs users.
+		if fs.allowRuntimeEnable {
+			childMerkleFD, err := vfsObj.OpenAt(ctx, fs.creds, &vfs.PathOperation{
+				Root:  parent.lowerVD,
+				Start: parent.lowerVD,
+				Path:  fspath.Parse(childMerkleFilename),
+			}, &vfs.OpenOptions{
+				Flags: linux.O_RDWR | linux.O_CREAT,
+			})
+			if err != nil {
+				return nil, err
+			}
+			childMerkleFD.DecRef(ctx)
+			childMerkleVD, err = vfsObj.GetDentryAt(ctx, fs.creds, &vfs.PathOperation{
+				Root:  parent.lowerVD,
+				Start: parent.lowerVD,
+				Path:  fspath.Parse(childMerkleFilename),
+			}, &vfs.GetDentryOptions{})
+			if err != nil {
+				return nil, err
+			}
+		} else {
+			// If runtime enable is not allowed. This indicates an
+			// unexpected modification to the file system that
+			// removed/renamed the Merkle tree file.
+			if noCrashOnVerificationFailure {
+				return nil, childMerkleErr
+			}
+			panic(fmt.Sprintf("Expected Merkle file for target %s but none found", parentPath+"/"+name))
+		}
+	}
+
+	mask := uint32(linux.STATX_TYPE | linux.STATX_MODE | linux.STATX_UID | linux.STATX_GID)
+	stat, err := vfsObj.StatAt(ctx, fs.creds, &vfs.PathOperation{
+		Root:  childVD,
+		Start: childVD,
+	}, &vfs.StatOptions{
+		Mask: mask,
+	})
+	if err != nil {
+		return nil, err
+	}
+
+	child := fs.newDentry()
+	child.lowerVD = childVD
+	child.lowerMerkleVD = childMerkleVD
+
+	// Increase the reference for both childVD and childMerkleVD as they are
+	// held by child. If this function fails and the child is destroyed, the
+	// references will be decreased in destroyLocked.
+	childVD.IncRef()
+	childMerkleVD.IncRef()
+
+	parent.IncRef()
+	child.parent = parent
+	child.name = name
+
+	// TODO(b/162788573): Verify child metadata.
+	child.mode = uint32(stat.Mode)
+	child.uid = stat.UID
+	child.gid = stat.GID
+
+	// Verify child root hash. This should always be performed unless in
+	// allowRuntimeEnable mode and the parent directory hasn't been enabled
+	// yet.
+	if !(fs.allowRuntimeEnable && len(parent.rootHash) == 0) {
+		if _, err := fs.verifyChild(ctx, parent, child); err != nil {
+			child.destroyLocked(ctx)
+			return nil, err
+		}
+	}
+
+	return child, nil
 }
 
 // walkParentDirLocked resolves all but the last path component of rp to an
@@ -104,8 +466,39 @@ func (fs *filesystem) resolveLocked(ctx context.Context, rp *vfs.ResolvingPath,
 //
 // Preconditions: fs.renameMu must be locked. !rp.Done().
 func (fs *filesystem) walkParentDirLocked(ctx context.Context, rp *vfs.ResolvingPath, d *dentry, ds **[]*dentry) (*dentry, error) {
-	// TODO(b/159261227): Implement walkParentDirLocked.
-	return nil, nil
+	for !rp.Final() {
+		d.dirMu.Lock()
+		next, err := fs.stepLocked(ctx, rp, d, true /* mayFollowSymlinks */, ds)
+		d.dirMu.Unlock()
+		if err != nil {
+			return nil, err
+		}
+		d = next
+	}
+	if !d.isDir() {
+		return nil, syserror.ENOTDIR
+	}
+	return d, nil
+}
+
+// resolveLocked resolves rp to an existing file.
+//
+// Preconditions: fs.renameMu must be locked.
+func (fs *filesystem) resolveLocked(ctx context.Context, rp *vfs.ResolvingPath, ds **[]*dentry) (*dentry, error) {
+	d := rp.Start().Impl().(*dentry)
+	for !rp.Done() {
+		d.dirMu.Lock()
+		next, err := fs.stepLocked(ctx, rp, d, true /* mayFollowSymlinks */, ds)
+		d.dirMu.Unlock()
+		if err != nil {
+			return nil, err
+		}
+		d = next
+	}
+	if rp.MustBeDir() && !d.isDir() {
+		return nil, syserror.ENOTDIR
+	}
+	return d, nil
 }
 
 // AccessAt implements vfs.Filesystem.Impl.AccessAt.
diff --git a/pkg/sentry/fsimpl/verity/verity.go b/pkg/sentry/fsimpl/verity/verity.go
index 1c5b07aa5f..eedb5f4848 100644
--- a/pkg/sentry/fsimpl/verity/verity.go
+++ b/pkg/sentry/fsimpl/verity/verity.go
@@ -41,6 +41,18 @@ const Name = "verity"
 // tree file for "/foo" is "/.merkle.verity.foo".
 const merklePrefix = ".merkle.verity."
 
+// merkleoffsetInParentXattr is the extended attribute name specifying the
+// offset of child root hash in its parent's Merkle tree.
+const merkleOffsetInParentXattr = "user.merkle.offset"
+
+// merkleSizeXattr is the extended attribute name specifying the size of data
+// hashed by the corresponding Merkle tree. For a file, it's the size of the
+// whole file. For a directory, it's the size of all its children's root hashes.
+const merkleSizeXattr = "user.merkle.size"
+
+// sizeOfInt32 is the size in bytes for a 32 bit integer in extended attributes.
+const sizeOfInt32 = 4
+
 // noCrashOnVerificationFailure indicates whether the sandbox should panic
 // whenever verification fails. If true, an error is returned instead of
 // panicking. This should only be set for tests.
@@ -48,6 +60,11 @@ const merklePrefix = ".merkle.verity."
 // flag.
 var noCrashOnVerificationFailure bool
 
+// verityMu synchronizes enabling verity files, protects files or directories
+// from being enabled by different threads simultaneously. It also ensures that
+// verity does not access files that are being enabled.
+var verityMu sync.RWMutex
+
 // FilesystemType implements vfs.FilesystemType.
 type FilesystemType struct{}
 
@@ -215,6 +232,8 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt
 	copy(d.rootHash, iopts.RootHash)
 	d.vfsd.Init(d)
 
+	fs.rootDentry = d
+
 	return &fs.vfsfs, &d.vfsd, nil
 }
 

From bdabd100d340d9ff88e65f31930e1f31800811f5 Mon Sep 17 00:00:00 2001
From: Tamir Duberstein <tamird@google.com>
Date: Mon, 31 Aug 2020 13:08:49 -0700
Subject: [PATCH 119/211] Remove __fuchsia__ defines

These mostly guard linux-only headers; check for linux instead.

PiperOrigin-RevId: 329362762
---
 test/syscalls/linux/BUILD                     |   23 +-
 test/syscalls/linux/packet_socket_raw.cc      |    6 -
 test/syscalls/linux/raw_socket.cc             |    6 -
 test/syscalls/linux/tcp_socket.cc             |   10 +-
 test/syscalls/linux/udp_socket.cc             | 1828 ++++++++++++++++-
 .../linux/udp_socket_errqueue_test_case.cc    |   57 -
 test/syscalls/linux/udp_socket_test_cases.cc  | 1781 ----------------
 test/syscalls/linux/udp_socket_test_cases.h   |   82 -
 test/util/fs_util.cc                          |    8 +-
 test/util/fs_util.h                           |    4 +-
 test/util/test_util_runfiles.cc               |    4 -
 11 files changed, 1843 insertions(+), 1966 deletions(-)
 delete mode 100644 test/syscalls/linux/udp_socket_errqueue_test_case.cc
 delete mode 100644 test/syscalls/linux/udp_socket_test_cases.cc
 delete mode 100644 test/syscalls/linux/udp_socket_test_cases.h

diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD
index fad3be7bf2..de753fc4e1 100644
--- a/test/syscalls/linux/BUILD
+++ b/test/syscalls/linux/BUILD
@@ -22,6 +22,7 @@ exports_files(
         "socket_ipv4_tcp_unbound_external_networking_test.cc",
         "socket_ipv4_udp_unbound_external_networking_test.cc",
         "socket_ipv4_udp_unbound_loopback.cc",
+        "socket_ipv4_udp_unbound_loopback_nogotsan.cc",
         "tcp_socket.cc",
         "udp_bind.cc",
         "udp_socket.cc",
@@ -3666,15 +3667,12 @@ cc_binary(
     ],
 )
 
-cc_library(
-    name = "udp_socket_test_cases",
+cc_binary(
+    name = "udp_socket_test",
     testonly = 1,
-    srcs = [
-        "udp_socket_errqueue_test_case.cc",
-        "udp_socket_test_cases.cc",
-    ],
-    hdrs = ["udp_socket_test_cases.h"],
+    srcs = ["udp_socket.cc"],
     defines = select_system(),
+    linkstatic = 1,
     deps = [
         ":ip_socket_test_util",
         ":socket_test_util",
@@ -3689,17 +3687,6 @@ cc_library(
         "//test/util:test_util",
         "//test/util:thread_util",
     ],
-    alwayslink = 1,
-)
-
-cc_binary(
-    name = "udp_socket_test",
-    testonly = 1,
-    srcs = ["udp_socket.cc"],
-    linkstatic = 1,
-    deps = [
-        ":udp_socket_test_cases",
-    ],
 )
 
 cc_binary(
diff --git a/test/syscalls/linux/packet_socket_raw.cc b/test/syscalls/linux/packet_socket_raw.cc
index a11a03415e..f3c1d6bc9c 100644
--- a/test/syscalls/linux/packet_socket_raw.cc
+++ b/test/syscalls/linux/packet_socket_raw.cc
@@ -14,9 +14,7 @@
 
 #include <arpa/inet.h>
 #include <linux/capability.h>
-#ifndef __fuchsia__
 #include <linux/filter.h>
-#endif  // __fuchsia__
 #include <linux/if_arp.h>
 #include <linux/if_packet.h>
 #include <net/ethernet.h>
@@ -618,8 +616,6 @@ TEST_P(RawPacketTest, GetSocketErrorBind) {
   }
 }
 
-#ifndef __fuchsia__
-
 TEST_P(RawPacketTest, SetSocketDetachFilterNoInstalledFilter) {
   // TODO(gvisor.dev/2746): Support SO_ATTACH_FILTER/SO_DETACH_FILTER.
   //
@@ -647,8 +643,6 @@ TEST_P(RawPacketTest, GetSocketDetachFilter) {
               SyscallFailsWithErrno(ENOPROTOOPT));
 }
 
-#endif  // __fuchsia__
-
 INSTANTIATE_TEST_SUITE_P(AllInetTests, RawPacketTest,
                          ::testing::Values(ETH_P_IP, ETH_P_ALL));
 
diff --git a/test/syscalls/linux/raw_socket.cc b/test/syscalls/linux/raw_socket.cc
index 8d6e5c9134..54709371c1 100644
--- a/test/syscalls/linux/raw_socket.cc
+++ b/test/syscalls/linux/raw_socket.cc
@@ -13,9 +13,7 @@
 // limitations under the License.
 
 #include <linux/capability.h>
-#ifndef __fuchsia__
 #include <linux/filter.h>
-#endif  // __fuchsia__
 #include <netinet/in.h>
 #include <netinet/ip.h>
 #include <netinet/ip6.h>
@@ -815,8 +813,6 @@ void RawSocketTest::ReceiveBufFrom(int sock, char* recv_buf,
   ASSERT_NO_FATAL_FAILURE(RecvNoCmsg(sock, recv_buf, recv_buf_len));
 }
 
-#ifndef __fuchsia__
-
 TEST_P(RawSocketTest, SetSocketDetachFilterNoInstalledFilter) {
   // TODO(gvisor.dev/2746): Support SO_ATTACH_FILTER/SO_DETACH_FILTER.
   if (IsRunningOnGvisor()) {
@@ -838,8 +834,6 @@ TEST_P(RawSocketTest, GetSocketDetachFilter) {
               SyscallFailsWithErrno(ENOPROTOOPT));
 }
 
-#endif  //  __fuchsia__
-
 // AF_INET6+SOCK_RAW+IPPROTO_RAW sockets can be created, but not written to.
 TEST(RawSocketTest, IPv6ProtoRaw) {
   SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
diff --git a/test/syscalls/linux/tcp_socket.cc b/test/syscalls/linux/tcp_socket.cc
index a6325a761b..ab731db1dd 100644
--- a/test/syscalls/linux/tcp_socket.cc
+++ b/test/syscalls/linux/tcp_socket.cc
@@ -13,9 +13,9 @@
 // limitations under the License.
 
 #include <fcntl.h>
-#ifndef __fuchsia__
+#ifdef __linux__
 #include <linux/filter.h>
-#endif  // __fuchsia__
+#endif  // __linux__
 #include <netinet/in.h>
 #include <netinet/tcp.h>
 #include <poll.h>
@@ -1586,7 +1586,7 @@ TEST_P(SimpleTcpSocketTest, SetTCPWindowClampAboveHalfMinRcvBuf) {
   }
 }
 
-#ifndef __fuchsia__
+#ifdef __linux__
 
 // TODO(gvisor.dev/2746): Support SO_ATTACH_FILTER/SO_DETACH_FILTER.
 // gVisor currently silently ignores attaching a filter.
@@ -1620,6 +1620,8 @@ TEST_P(SimpleTcpSocketTest, SetSocketAttachDetachFilter) {
       SyscallSucceeds());
 }
 
+#endif  // __linux__
+
 TEST_P(SimpleTcpSocketTest, SetSocketDetachFilterNoInstalledFilter) {
   // TODO(gvisor.dev/2746): Support SO_ATTACH_FILTER/SO_DETACH_FILTER.
   SKIP_IF(IsRunningOnGvisor());
@@ -1641,8 +1643,6 @@ TEST_P(SimpleTcpSocketTest, GetSocketDetachFilter) {
               SyscallFailsWithErrno(ENOPROTOOPT));
 }
 
-#endif  // __fuchsia__
-
 INSTANTIATE_TEST_SUITE_P(AllInetTests, SimpleTcpSocketTest,
                          ::testing::Values(AF_INET, AF_INET6));
 
diff --git a/test/syscalls/linux/udp_socket.cc b/test/syscalls/linux/udp_socket.cc
index 7a8ac30a44..97db2b321a 100644
--- a/test/syscalls/linux/udp_socket.cc
+++ b/test/syscalls/linux/udp_socket.cc
@@ -12,13 +12,1839 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "test/syscalls/linux/udp_socket_test_cases.h"
+#include <arpa/inet.h>
+#include <fcntl.h>
+#ifdef __linux__
+#include <linux/errqueue.h>
+#include <linux/filter.h>
+#endif  // __linux__
+#include <netinet/in.h>
+#include <poll.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+
+#include "absl/strings/str_format.h"
+#ifndef SIOCGSTAMP
+#include <linux/sockios.h>
+#endif
+
+#include "gtest/gtest.h"
+#include "absl/base/macros.h"
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
+#include "test/syscalls/linux/ip_socket_test_util.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/syscalls/linux/unix_domain_socket_test_util.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/posix_error.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
 
 namespace gvisor {
 namespace testing {
 
 namespace {
 
+// Fixture for tests parameterized by the address family to use (AF_INET and
+// AF_INET6) when creating sockets.
+class UdpSocketTest
+    : public ::testing::TestWithParam<gvisor::testing::AddressFamily> {
+ protected:
+  // Creates two sockets that will be used by test cases.
+  void SetUp() override;
+
+  // Binds the socket bind_ to the loopback and updates bind_addr_.
+  PosixError BindLoopback();
+
+  // Binds the socket bind_ to Any and updates bind_addr_.
+  PosixError BindAny();
+
+  // Binds given socket to address addr and updates.
+  PosixError BindSocket(int socket, struct sockaddr* addr);
+
+  // Return initialized Any address to port 0.
+  struct sockaddr_storage InetAnyAddr();
+
+  // Return initialized Loopback address to port 0.
+  struct sockaddr_storage InetLoopbackAddr();
+
+  // Disconnects socket sockfd.
+  void Disconnect(int sockfd);
+
+  // Get family for the test.
+  int GetFamily();
+
+  // Socket used by Bind methods
+  FileDescriptor bind_;
+
+  // Second socket used for tests.
+  FileDescriptor sock_;
+
+  // Address for bind_ socket.
+  struct sockaddr* bind_addr_;
+
+  // Initialized to the length based on GetFamily().
+  socklen_t addrlen_;
+
+  // Storage for bind_addr_.
+  struct sockaddr_storage bind_addr_storage_;
+
+ private:
+  // Helper to initialize addrlen_ for the test case.
+  socklen_t GetAddrLength();
+};
+
+// Gets a pointer to the port component of the given address.
+uint16_t* Port(struct sockaddr_storage* addr) {
+  switch (addr->ss_family) {
+    case AF_INET: {
+      auto sin = reinterpret_cast<struct sockaddr_in*>(addr);
+      return &sin->sin_port;
+    }
+    case AF_INET6: {
+      auto sin6 = reinterpret_cast<struct sockaddr_in6*>(addr);
+      return &sin6->sin6_port;
+    }
+  }
+
+  return nullptr;
+}
+
+// Sets addr port to "port".
+void SetPort(struct sockaddr_storage* addr, uint16_t port) {
+  switch (addr->ss_family) {
+    case AF_INET: {
+      auto sin = reinterpret_cast<struct sockaddr_in*>(addr);
+      sin->sin_port = port;
+      break;
+    }
+    case AF_INET6: {
+      auto sin6 = reinterpret_cast<struct sockaddr_in6*>(addr);
+      sin6->sin6_port = port;
+      break;
+    }
+  }
+}
+
+void UdpSocketTest::SetUp() {
+  addrlen_ = GetAddrLength();
+
+  bind_ =
+      ASSERT_NO_ERRNO_AND_VALUE(Socket(GetFamily(), SOCK_DGRAM, IPPROTO_UDP));
+  memset(&bind_addr_storage_, 0, sizeof(bind_addr_storage_));
+  bind_addr_ = reinterpret_cast<struct sockaddr*>(&bind_addr_storage_);
+
+  sock_ =
+      ASSERT_NO_ERRNO_AND_VALUE(Socket(GetFamily(), SOCK_DGRAM, IPPROTO_UDP));
+}
+
+int UdpSocketTest::GetFamily() {
+  if (GetParam() == AddressFamily::kIpv4) {
+    return AF_INET;
+  }
+  return AF_INET6;
+}
+
+PosixError UdpSocketTest::BindLoopback() {
+  bind_addr_storage_ = InetLoopbackAddr();
+  struct sockaddr* bind_addr_ =
+      reinterpret_cast<struct sockaddr*>(&bind_addr_storage_);
+  return BindSocket(bind_.get(), bind_addr_);
+}
+
+PosixError UdpSocketTest::BindAny() {
+  bind_addr_storage_ = InetAnyAddr();
+  struct sockaddr* bind_addr_ =
+      reinterpret_cast<struct sockaddr*>(&bind_addr_storage_);
+  return BindSocket(bind_.get(), bind_addr_);
+}
+
+PosixError UdpSocketTest::BindSocket(int socket, struct sockaddr* addr) {
+  socklen_t len = sizeof(bind_addr_storage_);
+
+  // Bind, then check that we get the right address.
+  RETURN_ERROR_IF_SYSCALL_FAIL(bind(socket, addr, addrlen_));
+
+  RETURN_ERROR_IF_SYSCALL_FAIL(getsockname(socket, addr, &len));
+
+  if (addrlen_ != len) {
+    return PosixError(
+        EINVAL,
+        absl::StrFormat("getsockname len: %u expected: %u", len, addrlen_));
+  }
+  return PosixError(0);
+}
+
+socklen_t UdpSocketTest::GetAddrLength() {
+  struct sockaddr_storage addr;
+  if (GetFamily() == AF_INET) {
+    auto sin = reinterpret_cast<struct sockaddr_in*>(&addr);
+    return sizeof(*sin);
+  }
+
+  auto sin6 = reinterpret_cast<struct sockaddr_in6*>(&addr);
+  return sizeof(*sin6);
+}
+
+sockaddr_storage UdpSocketTest::InetAnyAddr() {
+  struct sockaddr_storage addr;
+  memset(&addr, 0, sizeof(addr));
+  reinterpret_cast<struct sockaddr*>(&addr)->sa_family = GetFamily();
+
+  if (GetFamily() == AF_INET) {
+    auto sin = reinterpret_cast<struct sockaddr_in*>(&addr);
+    sin->sin_addr.s_addr = htonl(INADDR_ANY);
+    sin->sin_port = htons(0);
+    return addr;
+  }
+
+  auto sin6 = reinterpret_cast<struct sockaddr_in6*>(&addr);
+  sin6->sin6_addr = IN6ADDR_ANY_INIT;
+  sin6->sin6_port = htons(0);
+  return addr;
+}
+
+sockaddr_storage UdpSocketTest::InetLoopbackAddr() {
+  struct sockaddr_storage addr;
+  memset(&addr, 0, sizeof(addr));
+  reinterpret_cast<struct sockaddr*>(&addr)->sa_family = GetFamily();
+
+  if (GetFamily() == AF_INET) {
+    auto sin = reinterpret_cast<struct sockaddr_in*>(&addr);
+    sin->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+    sin->sin_port = htons(0);
+    return addr;
+  }
+  auto sin6 = reinterpret_cast<struct sockaddr_in6*>(&addr);
+  sin6->sin6_addr = in6addr_loopback;
+  sin6->sin6_port = htons(0);
+  return addr;
+}
+
+void UdpSocketTest::Disconnect(int sockfd) {
+  sockaddr_storage addr_storage = InetAnyAddr();
+  sockaddr* addr = reinterpret_cast<struct sockaddr*>(&addr_storage);
+  socklen_t addrlen = sizeof(addr_storage);
+
+  addr->sa_family = AF_UNSPEC;
+  ASSERT_THAT(connect(sockfd, addr, addrlen), SyscallSucceeds());
+
+  // Check that after disconnect the socket is bound to the ANY address.
+  EXPECT_THAT(getsockname(sockfd, addr, &addrlen), SyscallSucceeds());
+  if (GetParam() == AddressFamily::kIpv4) {
+    auto addr_out = reinterpret_cast<struct sockaddr_in*>(addr);
+    EXPECT_EQ(addrlen, sizeof(*addr_out));
+    EXPECT_EQ(addr_out->sin_addr.s_addr, htonl(INADDR_ANY));
+  } else {
+    auto addr_out = reinterpret_cast<struct sockaddr_in6*>(addr);
+    EXPECT_EQ(addrlen, sizeof(*addr_out));
+    struct in6_addr loopback = IN6ADDR_ANY_INIT;
+
+    EXPECT_EQ(memcmp(&addr_out->sin6_addr, &loopback, sizeof(in6_addr)), 0);
+  }
+}
+
+TEST_P(UdpSocketTest, Creation) {
+  FileDescriptor sock =
+      ASSERT_NO_ERRNO_AND_VALUE(Socket(GetFamily(), SOCK_DGRAM, IPPROTO_UDP));
+  EXPECT_THAT(close(sock.release()), SyscallSucceeds());
+
+  sock = ASSERT_NO_ERRNO_AND_VALUE(Socket(GetFamily(), SOCK_DGRAM, 0));
+  EXPECT_THAT(close(sock.release()), SyscallSucceeds());
+
+  ASSERT_THAT(socket(GetFamily(), SOCK_STREAM, IPPROTO_UDP), SyscallFails());
+}
+
+TEST_P(UdpSocketTest, Getsockname) {
+  // Check that we're not bound.
+  struct sockaddr_storage addr;
+  socklen_t addrlen = sizeof(addr);
+  EXPECT_THAT(
+      getsockname(bind_.get(), reinterpret_cast<sockaddr*>(&addr), &addrlen),
+      SyscallSucceeds());
+  EXPECT_EQ(addrlen, addrlen_);
+  struct sockaddr_storage any = InetAnyAddr();
+  EXPECT_EQ(memcmp(&addr, reinterpret_cast<struct sockaddr*>(&any), addrlen_),
+            0);
+
+  ASSERT_NO_ERRNO(BindLoopback());
+
+  EXPECT_THAT(
+      getsockname(bind_.get(), reinterpret_cast<sockaddr*>(&addr), &addrlen),
+      SyscallSucceeds());
+
+  EXPECT_EQ(addrlen, addrlen_);
+  EXPECT_EQ(memcmp(&addr, bind_addr_, addrlen_), 0);
+}
+
+TEST_P(UdpSocketTest, Getpeername) {
+  ASSERT_NO_ERRNO(BindLoopback());
+
+  // Check that we're not connected.
+  struct sockaddr_storage addr;
+  socklen_t addrlen = sizeof(addr);
+  EXPECT_THAT(
+      getpeername(sock_.get(), reinterpret_cast<sockaddr*>(&addr), &addrlen),
+      SyscallFailsWithErrno(ENOTCONN));
+
+  // Connect, then check that we get the right address.
+  ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds());
+
+  addrlen = sizeof(addr);
+  EXPECT_THAT(
+      getpeername(sock_.get(), reinterpret_cast<sockaddr*>(&addr), &addrlen),
+      SyscallSucceeds());
+  EXPECT_EQ(addrlen, addrlen_);
+  EXPECT_EQ(memcmp(&addr, bind_addr_, addrlen_), 0);
+}
+
+TEST_P(UdpSocketTest, SendNotConnected) {
+  ASSERT_NO_ERRNO(BindLoopback());
+
+  // Do send & write, they must fail.
+  char buf[512];
+  EXPECT_THAT(send(sock_.get(), buf, sizeof(buf), 0),
+              SyscallFailsWithErrno(EDESTADDRREQ));
+
+  EXPECT_THAT(write(sock_.get(), buf, sizeof(buf)),
+              SyscallFailsWithErrno(EDESTADDRREQ));
+
+  // Use sendto.
+  ASSERT_THAT(sendto(sock_.get(), buf, sizeof(buf), 0, bind_addr_, addrlen_),
+              SyscallSucceedsWithValue(sizeof(buf)));
+
+  // Check that we're bound now.
+  struct sockaddr_storage addr;
+  socklen_t addrlen = sizeof(addr);
+  EXPECT_THAT(
+      getsockname(sock_.get(), reinterpret_cast<sockaddr*>(&addr), &addrlen),
+      SyscallSucceeds());
+  EXPECT_EQ(addrlen, addrlen_);
+  EXPECT_NE(*Port(&addr), 0);
+}
+
+TEST_P(UdpSocketTest, ConnectBinds) {
+  ASSERT_NO_ERRNO(BindLoopback());
+
+  // Connect the socket.
+  ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds());
+
+  // Check that we're bound now.
+  struct sockaddr_storage addr;
+  socklen_t addrlen = sizeof(addr);
+  EXPECT_THAT(
+      getsockname(sock_.get(), reinterpret_cast<sockaddr*>(&addr), &addrlen),
+      SyscallSucceeds());
+  EXPECT_EQ(addrlen, addrlen_);
+  EXPECT_NE(*Port(&addr), 0);
+}
+
+TEST_P(UdpSocketTest, ReceiveNotBound) {
+  char buf[512];
+  EXPECT_THAT(recv(sock_.get(), buf, sizeof(buf), MSG_DONTWAIT),
+              SyscallFailsWithErrno(EWOULDBLOCK));
+}
+
+TEST_P(UdpSocketTest, Bind) {
+  ASSERT_NO_ERRNO(BindLoopback());
+
+  // Try to bind again.
+  EXPECT_THAT(bind(bind_.get(), bind_addr_, addrlen_),
+              SyscallFailsWithErrno(EINVAL));
+
+  // Check that we're still bound to the original address.
+  struct sockaddr_storage addr;
+  socklen_t addrlen = sizeof(addr);
+  EXPECT_THAT(
+      getsockname(bind_.get(), reinterpret_cast<sockaddr*>(&addr), &addrlen),
+      SyscallSucceeds());
+  EXPECT_EQ(addrlen, addrlen_);
+  EXPECT_EQ(memcmp(&addr, bind_addr_, addrlen_), 0);
+}
+
+TEST_P(UdpSocketTest, BindInUse) {
+  ASSERT_NO_ERRNO(BindLoopback());
+
+  // Try to bind again.
+  EXPECT_THAT(bind(sock_.get(), bind_addr_, addrlen_),
+              SyscallFailsWithErrno(EADDRINUSE));
+}
+
+TEST_P(UdpSocketTest, ReceiveAfterConnect) {
+  ASSERT_NO_ERRNO(BindLoopback());
+  ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds());
+
+  // Send from sock_ to bind_
+  char buf[512];
+  RandomizeBuffer(buf, sizeof(buf));
+  ASSERT_THAT(sendto(sock_.get(), buf, sizeof(buf), 0, bind_addr_, addrlen_),
+              SyscallSucceedsWithValue(sizeof(buf)));
+
+  // Receive the data.
+  char received[sizeof(buf)];
+  EXPECT_THAT(recv(bind_.get(), received, sizeof(received), 0),
+              SyscallSucceedsWithValue(sizeof(received)));
+  EXPECT_EQ(memcmp(buf, received, sizeof(buf)), 0);
+}
+
+TEST_P(UdpSocketTest, ReceiveAfterDisconnect) {
+  ASSERT_NO_ERRNO(BindLoopback());
+
+  for (int i = 0; i < 2; i++) {
+    // Connet sock_ to bound address.
+    ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds());
+
+    struct sockaddr_storage addr;
+    socklen_t addrlen = sizeof(addr);
+    EXPECT_THAT(
+        getsockname(sock_.get(), reinterpret_cast<sockaddr*>(&addr), &addrlen),
+        SyscallSucceeds());
+    EXPECT_EQ(addrlen, addrlen_);
+
+    // Send from sock to bind_.
+    char buf[512];
+    RandomizeBuffer(buf, sizeof(buf));
+
+    ASSERT_THAT(sendto(bind_.get(), buf, sizeof(buf), 0,
+                       reinterpret_cast<sockaddr*>(&addr), addrlen),
+                SyscallSucceedsWithValue(sizeof(buf)));
+
+    // Receive the data.
+    char received[sizeof(buf)];
+    EXPECT_THAT(recv(sock_.get(), received, sizeof(received), 0),
+                SyscallSucceedsWithValue(sizeof(received)));
+    EXPECT_EQ(memcmp(buf, received, sizeof(buf)), 0);
+
+    // Disconnect sock_.
+    struct sockaddr unspec = {};
+    unspec.sa_family = AF_UNSPEC;
+    ASSERT_THAT(connect(sock_.get(), &unspec, sizeof(unspec.sa_family)),
+                SyscallSucceeds());
+  }
+}
+
+TEST_P(UdpSocketTest, Connect) {
+  ASSERT_NO_ERRNO(BindLoopback());
+  ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds());
+
+  // Check that we're connected to the right peer.
+  struct sockaddr_storage peer;
+  socklen_t peerlen = sizeof(peer);
+  EXPECT_THAT(
+      getpeername(sock_.get(), reinterpret_cast<sockaddr*>(&peer), &peerlen),
+      SyscallSucceeds());
+  EXPECT_EQ(peerlen, addrlen_);
+  EXPECT_EQ(memcmp(&peer, bind_addr_, addrlen_), 0);
+
+  // Try to bind after connect.
+  struct sockaddr_storage any = InetAnyAddr();
+  EXPECT_THAT(
+      bind(sock_.get(), reinterpret_cast<struct sockaddr*>(&any), addrlen_),
+      SyscallFailsWithErrno(EINVAL));
+
+  struct sockaddr_storage bind2_storage = InetLoopbackAddr();
+  struct sockaddr* bind2_addr =
+      reinterpret_cast<struct sockaddr*>(&bind2_storage);
+  FileDescriptor bind2 =
+      ASSERT_NO_ERRNO_AND_VALUE(Socket(GetFamily(), SOCK_DGRAM, IPPROTO_UDP));
+  ASSERT_NO_ERRNO(BindSocket(bind2.get(), bind2_addr));
+
+  // Try to connect again.
+  EXPECT_THAT(connect(sock_.get(), bind2_addr, addrlen_), SyscallSucceeds());
+
+  // Check that peer name changed.
+  peerlen = sizeof(peer);
+  EXPECT_THAT(
+      getpeername(sock_.get(), reinterpret_cast<sockaddr*>(&peer), &peerlen),
+      SyscallSucceeds());
+  EXPECT_EQ(peerlen, addrlen_);
+  EXPECT_EQ(memcmp(&peer, bind2_addr, addrlen_), 0);
+}
+
+TEST_P(UdpSocketTest, ConnectAnyZero) {
+  // TODO(138658473): Enable when we can connect to port 0 with gVisor.
+  SKIP_IF(IsRunningOnGvisor());
+
+  struct sockaddr_storage any = InetAnyAddr();
+  EXPECT_THAT(
+      connect(sock_.get(), reinterpret_cast<struct sockaddr*>(&any), addrlen_),
+      SyscallSucceeds());
+
+  struct sockaddr_storage addr;
+  socklen_t addrlen = sizeof(addr);
+  EXPECT_THAT(
+      getpeername(sock_.get(), reinterpret_cast<sockaddr*>(&addr), &addrlen),
+      SyscallFailsWithErrno(ENOTCONN));
+}
+
+TEST_P(UdpSocketTest, ConnectAnyWithPort) {
+  ASSERT_NO_ERRNO(BindAny());
+  ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds());
+
+  struct sockaddr_storage addr;
+  socklen_t addrlen = sizeof(addr);
+  EXPECT_THAT(
+      getpeername(sock_.get(), reinterpret_cast<sockaddr*>(&addr), &addrlen),
+      SyscallSucceeds());
+}
+
+TEST_P(UdpSocketTest, DisconnectAfterConnectAny) {
+  // TODO(138658473): Enable when we can connect to port 0 with gVisor.
+  SKIP_IF(IsRunningOnGvisor());
+  struct sockaddr_storage any = InetAnyAddr();
+  EXPECT_THAT(
+      connect(sock_.get(), reinterpret_cast<struct sockaddr*>(&any), addrlen_),
+      SyscallSucceeds());
+
+  struct sockaddr_storage addr;
+  socklen_t addrlen = sizeof(addr);
+  EXPECT_THAT(
+      getpeername(sock_.get(), reinterpret_cast<sockaddr*>(&addr), &addrlen),
+      SyscallFailsWithErrno(ENOTCONN));
+
+  Disconnect(sock_.get());
+}
+
+TEST_P(UdpSocketTest, DisconnectAfterConnectAnyWithPort) {
+  ASSERT_NO_ERRNO(BindAny());
+  EXPECT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds());
+
+  struct sockaddr_storage addr;
+  socklen_t addrlen = sizeof(addr);
+  EXPECT_THAT(
+      getpeername(sock_.get(), reinterpret_cast<sockaddr*>(&addr), &addrlen),
+      SyscallSucceeds());
+
+  EXPECT_EQ(addrlen, addrlen_);
+  EXPECT_EQ(*Port(&bind_addr_storage_), *Port(&addr));
+
+  Disconnect(sock_.get());
+}
+
+TEST_P(UdpSocketTest, DisconnectAfterBind) {
+  ASSERT_NO_ERRNO(BindLoopback());
+
+  // Bind to the next port above bind_.
+  struct sockaddr_storage addr_storage = InetLoopbackAddr();
+  struct sockaddr* addr = reinterpret_cast<struct sockaddr*>(&addr_storage);
+  SetPort(&addr_storage, *Port(&bind_addr_storage_) + 1);
+  ASSERT_NO_ERRNO(BindSocket(sock_.get(), addr));
+
+  // Connect the socket.
+  ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds());
+
+  struct sockaddr_storage unspec = {};
+  unspec.ss_family = AF_UNSPEC;
+  EXPECT_THAT(connect(sock_.get(), reinterpret_cast<sockaddr*>(&unspec),
+                      sizeof(unspec.ss_family)),
+              SyscallSucceeds());
+
+  // Check that we're still bound.
+  socklen_t addrlen = sizeof(unspec);
+  EXPECT_THAT(
+      getsockname(sock_.get(), reinterpret_cast<sockaddr*>(&unspec), &addrlen),
+      SyscallSucceeds());
+
+  EXPECT_EQ(addrlen, addrlen_);
+  EXPECT_EQ(memcmp(addr, &unspec, addrlen_), 0);
+
+  addrlen = sizeof(addr);
+  EXPECT_THAT(getpeername(sock_.get(), addr, &addrlen),
+              SyscallFailsWithErrno(ENOTCONN));
+}
+
+TEST_P(UdpSocketTest, BindToAnyConnnectToLocalhost) {
+  ASSERT_NO_ERRNO(BindAny());
+
+  struct sockaddr_storage addr_storage = InetLoopbackAddr();
+  struct sockaddr* addr = reinterpret_cast<struct sockaddr*>(&addr_storage);
+  SetPort(&addr_storage, *Port(&bind_addr_storage_) + 1);
+  socklen_t addrlen = sizeof(addr);
+
+  // Connect the socket.
+  ASSERT_THAT(connect(bind_.get(), addr, addrlen_), SyscallSucceeds());
+
+  EXPECT_THAT(getsockname(bind_.get(), addr, &addrlen), SyscallSucceeds());
+
+  // If the socket is bound to ANY and connected to a loopback address,
+  // getsockname() has to return the loopback address.
+  if (GetParam() == AddressFamily::kIpv4) {
+    auto addr_out = reinterpret_cast<struct sockaddr_in*>(addr);
+    EXPECT_EQ(addrlen, sizeof(*addr_out));
+    EXPECT_EQ(addr_out->sin_addr.s_addr, htonl(INADDR_LOOPBACK));
+  } else {
+    auto addr_out = reinterpret_cast<struct sockaddr_in6*>(addr);
+    struct in6_addr loopback = IN6ADDR_LOOPBACK_INIT;
+    EXPECT_EQ(addrlen, sizeof(*addr_out));
+    EXPECT_EQ(memcmp(&addr_out->sin6_addr, &loopback, sizeof(in6_addr)), 0);
+  }
+}
+
+TEST_P(UdpSocketTest, DisconnectAfterBindToAny) {
+  ASSERT_NO_ERRNO(BindLoopback());
+
+  struct sockaddr_storage any_storage = InetAnyAddr();
+  struct sockaddr* any = reinterpret_cast<struct sockaddr*>(&any_storage);
+  SetPort(&any_storage, *Port(&bind_addr_storage_) + 1);
+
+  ASSERT_NO_ERRNO(BindSocket(sock_.get(), any));
+
+  // Connect the socket.
+  ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds());
+
+  Disconnect(sock_.get());
+
+  // Check that we're still bound.
+  struct sockaddr_storage addr;
+  socklen_t addrlen = sizeof(addr);
+  EXPECT_THAT(
+      getsockname(sock_.get(), reinterpret_cast<sockaddr*>(&addr), &addrlen),
+      SyscallSucceeds());
+
+  EXPECT_EQ(addrlen, addrlen_);
+  EXPECT_EQ(memcmp(&addr, any, addrlen), 0);
+
+  addrlen = sizeof(addr);
+  EXPECT_THAT(
+      getpeername(sock_.get(), reinterpret_cast<sockaddr*>(&addr), &addrlen),
+      SyscallFailsWithErrno(ENOTCONN));
+}
+
+TEST_P(UdpSocketTest, Disconnect) {
+  ASSERT_NO_ERRNO(BindLoopback());
+
+  struct sockaddr_storage any_storage = InetAnyAddr();
+  struct sockaddr* any = reinterpret_cast<struct sockaddr*>(&any_storage);
+  SetPort(&any_storage, *Port(&bind_addr_storage_) + 1);
+  ASSERT_NO_ERRNO(BindSocket(sock_.get(), any));
+
+  for (int i = 0; i < 2; i++) {
+    // Try to connect again.
+    EXPECT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds());
+
+    // Check that we're connected to the right peer.
+    struct sockaddr_storage peer;
+    socklen_t peerlen = sizeof(peer);
+    EXPECT_THAT(
+        getpeername(sock_.get(), reinterpret_cast<sockaddr*>(&peer), &peerlen),
+        SyscallSucceeds());
+    EXPECT_EQ(peerlen, addrlen_);
+    EXPECT_EQ(memcmp(&peer, bind_addr_, addrlen_), 0);
+
+    // Try to disconnect.
+    struct sockaddr_storage addr = {};
+    addr.ss_family = AF_UNSPEC;
+    EXPECT_THAT(connect(sock_.get(), reinterpret_cast<sockaddr*>(&addr),
+                        sizeof(addr.ss_family)),
+                SyscallSucceeds());
+
+    peerlen = sizeof(peer);
+    EXPECT_THAT(
+        getpeername(sock_.get(), reinterpret_cast<sockaddr*>(&peer), &peerlen),
+        SyscallFailsWithErrno(ENOTCONN));
+
+    // Check that we're still bound.
+    socklen_t addrlen = sizeof(addr);
+    EXPECT_THAT(
+        getsockname(sock_.get(), reinterpret_cast<sockaddr*>(&addr), &addrlen),
+        SyscallSucceeds());
+    EXPECT_EQ(addrlen, addrlen_);
+    EXPECT_EQ(*Port(&addr), *Port(&any_storage));
+  }
+}
+
+TEST_P(UdpSocketTest, ConnectBadAddress) {
+  struct sockaddr addr = {};
+  addr.sa_family = GetFamily();
+  ASSERT_THAT(connect(sock_.get(), &addr, sizeof(addr.sa_family)),
+              SyscallFailsWithErrno(EINVAL));
+}
+
+TEST_P(UdpSocketTest, SendToAddressOtherThanConnected) {
+  ASSERT_NO_ERRNO(BindLoopback());
+
+  struct sockaddr_storage addr_storage = InetAnyAddr();
+  struct sockaddr* addr = reinterpret_cast<struct sockaddr*>(&addr_storage);
+  SetPort(&addr_storage, *Port(&bind_addr_storage_) + 1);
+
+  ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds());
+
+  // Send to a different destination than we're connected to.
+  char buf[512];
+  EXPECT_THAT(sendto(sock_.get(), buf, sizeof(buf), 0, addr, addrlen_),
+              SyscallSucceedsWithValue(sizeof(buf)));
+}
+
+TEST_P(UdpSocketTest, ZerolengthWriteAllowed) {
+  // TODO(gvisor.dev/issue/1202): Hostinet does not support zero length writes.
+  SKIP_IF(IsRunningWithHostinet());
+
+  ASSERT_NO_ERRNO(BindLoopback());
+  // Connect to loopback:bind_addr_+1.
+  struct sockaddr_storage addr_storage = InetLoopbackAddr();
+  struct sockaddr* addr = reinterpret_cast<struct sockaddr*>(&addr_storage);
+  SetPort(&addr_storage, *Port(&bind_addr_storage_) + 1);
+  ASSERT_THAT(connect(bind_.get(), addr, addrlen_), SyscallSucceeds());
+
+  // Bind sock to loopback:bind_addr_+1.
+  ASSERT_THAT(bind(sock_.get(), addr, addrlen_), SyscallSucceeds());
+
+  char buf[3];
+  // Send zero length packet from bind_ to sock_.
+  ASSERT_THAT(write(bind_.get(), buf, 0), SyscallSucceedsWithValue(0));
+
+  struct pollfd pfd = {sock_.get(), POLLIN, 0};
+  ASSERT_THAT(RetryEINTR(poll)(&pfd, 1, /*timeout*/ 1000),
+              SyscallSucceedsWithValue(1));
+
+  // Receive the packet.
+  char received[3];
+  EXPECT_THAT(read(sock_.get(), received, sizeof(received)),
+              SyscallSucceedsWithValue(0));
+}
+
+TEST_P(UdpSocketTest, ZerolengthWriteAllowedNonBlockRead) {
+  // TODO(gvisor.dev/issue/1202): Hostinet does not support zero length writes.
+  SKIP_IF(IsRunningWithHostinet());
+
+  ASSERT_NO_ERRNO(BindLoopback());
+
+  // Connect to loopback:bind_addr_port+1.
+  struct sockaddr_storage addr_storage = InetLoopbackAddr();
+  struct sockaddr* addr = reinterpret_cast<struct sockaddr*>(&addr_storage);
+  SetPort(&addr_storage, *Port(&bind_addr_storage_) + 1);
+  ASSERT_THAT(connect(bind_.get(), addr, addrlen_), SyscallSucceeds());
+
+  // Bind sock to loopback:bind_addr_port+1.
+  ASSERT_THAT(bind(sock_.get(), addr, addrlen_), SyscallSucceeds());
+
+  // Set sock to non-blocking.
+  int opts = 0;
+  ASSERT_THAT(opts = fcntl(sock_.get(), F_GETFL), SyscallSucceeds());
+  ASSERT_THAT(fcntl(sock_.get(), F_SETFL, opts | O_NONBLOCK),
+              SyscallSucceeds());
+
+  char buf[3];
+  // Send zero length packet from bind_ to sock_.
+  ASSERT_THAT(write(bind_.get(), buf, 0), SyscallSucceedsWithValue(0));
+
+  struct pollfd pfd = {sock_.get(), POLLIN, 0};
+  ASSERT_THAT(RetryEINTR(poll)(&pfd, 1, /*timeout=*/1000),
+              SyscallSucceedsWithValue(1));
+
+  // Receive the packet.
+  char received[3];
+  EXPECT_THAT(read(sock_.get(), received, sizeof(received)),
+              SyscallSucceedsWithValue(0));
+  EXPECT_THAT(read(sock_.get(), received, sizeof(received)),
+              SyscallFailsWithErrno(EAGAIN));
+}
+
+TEST_P(UdpSocketTest, SendAndReceiveNotConnected) {
+  ASSERT_NO_ERRNO(BindLoopback());
+
+  // Send some data to bind_.
+  char buf[512];
+  RandomizeBuffer(buf, sizeof(buf));
+
+  ASSERT_THAT(sendto(sock_.get(), buf, sizeof(buf), 0, bind_addr_, addrlen_),
+              SyscallSucceedsWithValue(sizeof(buf)));
+
+  // Receive the data.
+  char received[sizeof(buf)];
+  EXPECT_THAT(recv(bind_.get(), received, sizeof(received), 0),
+              SyscallSucceedsWithValue(sizeof(received)));
+  EXPECT_EQ(memcmp(buf, received, sizeof(buf)), 0);
+}
+
+TEST_P(UdpSocketTest, SendAndReceiveConnected) {
+  ASSERT_NO_ERRNO(BindLoopback());
+
+  // Connect to loopback:bind_addr_port+1.
+  struct sockaddr_storage addr_storage = InetLoopbackAddr();
+  struct sockaddr* addr = reinterpret_cast<struct sockaddr*>(&addr_storage);
+  SetPort(&addr_storage, *Port(&bind_addr_storage_) + 1);
+  ASSERT_THAT(connect(bind_.get(), addr, addrlen_), SyscallSucceeds());
+
+  // Bind sock to loopback:bind_addr_port+1.
+  ASSERT_THAT(bind(sock_.get(), addr, addrlen_), SyscallSucceeds());
+
+  // Send some data from sock to bind_.
+  char buf[512];
+  RandomizeBuffer(buf, sizeof(buf));
+
+  ASSERT_THAT(sendto(sock_.get(), buf, sizeof(buf), 0, bind_addr_, addrlen_),
+              SyscallSucceedsWithValue(sizeof(buf)));
+
+  // Receive the data.
+  char received[sizeof(buf)];
+  EXPECT_THAT(recv(bind_.get(), received, sizeof(received), 0),
+              SyscallSucceedsWithValue(sizeof(received)));
+  EXPECT_EQ(memcmp(buf, received, sizeof(buf)), 0);
+}
+
+TEST_P(UdpSocketTest, ReceiveFromNotConnected) {
+  ASSERT_NO_ERRNO(BindLoopback());
+
+  // Connect to loopback:bind_addr_port+1.
+  struct sockaddr_storage addr_storage = InetLoopbackAddr();
+  struct sockaddr* addr = reinterpret_cast<struct sockaddr*>(&addr_storage);
+  SetPort(&addr_storage, *Port(&bind_addr_storage_) + 1);
+  ASSERT_THAT(connect(bind_.get(), addr, addrlen_), SyscallSucceeds());
+
+  // Bind sock to loopback:bind_addr_port+2.
+  struct sockaddr_storage addr2_storage = InetLoopbackAddr();
+  struct sockaddr* addr2 = reinterpret_cast<struct sockaddr*>(&addr2_storage);
+  SetPort(&addr2_storage, *Port(&bind_addr_storage_) + 2);
+  ASSERT_THAT(bind(sock_.get(), addr2, addrlen_), SyscallSucceeds());
+
+  // Send some data from sock to bind_.
+  char buf[512];
+  ASSERT_THAT(sendto(sock_.get(), buf, sizeof(buf), 0, bind_addr_, addrlen_),
+              SyscallSucceedsWithValue(sizeof(buf)));
+
+  // Check that the data isn't received because it was sent from a different
+  // address than we're connected.
+  EXPECT_THAT(recv(sock_.get(), buf, sizeof(buf), MSG_DONTWAIT),
+              SyscallFailsWithErrno(EWOULDBLOCK));
+}
+
+TEST_P(UdpSocketTest, ReceiveBeforeConnect) {
+  ASSERT_NO_ERRNO(BindLoopback());
+
+  // Bind sock to loopback:bind_addr_port+2.
+  struct sockaddr_storage addr2_storage = InetLoopbackAddr();
+  struct sockaddr* addr2 = reinterpret_cast<struct sockaddr*>(&addr2_storage);
+  SetPort(&addr2_storage, *Port(&bind_addr_storage_) + 2);
+  ASSERT_THAT(bind(sock_.get(), addr2, addrlen_), SyscallSucceeds());
+
+  // Send some data from sock to bind_.
+  char buf[512];
+  RandomizeBuffer(buf, sizeof(buf));
+
+  ASSERT_THAT(sendto(sock_.get(), buf, sizeof(buf), 0, bind_addr_, addrlen_),
+              SyscallSucceedsWithValue(sizeof(buf)));
+
+  // Connect to loopback:bind_addr_port+1.
+  struct sockaddr_storage addr_storage = InetLoopbackAddr();
+  struct sockaddr* addr = reinterpret_cast<struct sockaddr*>(&addr_storage);
+  SetPort(&addr_storage, *Port(&bind_addr_storage_) + 1);
+  ASSERT_THAT(connect(bind_.get(), addr, addrlen_), SyscallSucceeds());
+
+  // Receive the data. It works because it was sent before the connect.
+  char received[sizeof(buf)];
+  EXPECT_THAT(recv(bind_.get(), received, sizeof(received), 0),
+              SyscallSucceedsWithValue(sizeof(received)));
+  EXPECT_EQ(memcmp(buf, received, sizeof(buf)), 0);
+
+  // Send again. This time it should not be received.
+  ASSERT_THAT(sendto(sock_.get(), buf, sizeof(buf), 0, bind_addr_, addrlen_),
+              SyscallSucceedsWithValue(sizeof(buf)));
+
+  EXPECT_THAT(recv(bind_.get(), buf, sizeof(buf), MSG_DONTWAIT),
+              SyscallFailsWithErrno(EWOULDBLOCK));
+}
+
+TEST_P(UdpSocketTest, ReceiveFrom) {
+  ASSERT_NO_ERRNO(BindLoopback());
+
+  // Connect to loopback:bind_addr_port+1.
+  struct sockaddr_storage addr_storage = InetLoopbackAddr();
+  struct sockaddr* addr = reinterpret_cast<struct sockaddr*>(&addr_storage);
+  SetPort(&addr_storage, *Port(&bind_addr_storage_) + 1);
+  ASSERT_THAT(connect(bind_.get(), addr, addrlen_), SyscallSucceeds());
+
+  // Bind sock to loopback:bind_addr_port+1.
+  ASSERT_THAT(bind(sock_.get(), addr, addrlen_), SyscallSucceeds());
+
+  // Send some data from sock to bind_.
+  char buf[512];
+  RandomizeBuffer(buf, sizeof(buf));
+
+  ASSERT_THAT(sendto(sock_.get(), buf, sizeof(buf), 0, bind_addr_, addrlen_),
+              SyscallSucceedsWithValue(sizeof(buf)));
+
+  // Receive the data and sender address.
+  char received[sizeof(buf)];
+  struct sockaddr_storage addr2;
+  socklen_t addr2len = sizeof(addr2);
+  EXPECT_THAT(recvfrom(bind_.get(), received, sizeof(received), 0,
+                       reinterpret_cast<sockaddr*>(&addr2), &addr2len),
+              SyscallSucceedsWithValue(sizeof(received)));
+  EXPECT_EQ(memcmp(buf, received, sizeof(buf)), 0);
+  EXPECT_EQ(addr2len, addrlen_);
+  EXPECT_EQ(memcmp(addr, &addr2, addrlen_), 0);
+}
+
+TEST_P(UdpSocketTest, Listen) {
+  ASSERT_THAT(listen(sock_.get(), SOMAXCONN),
+              SyscallFailsWithErrno(EOPNOTSUPP));
+}
+
+TEST_P(UdpSocketTest, Accept) {
+  ASSERT_THAT(accept(sock_.get(), nullptr, nullptr),
+              SyscallFailsWithErrno(EOPNOTSUPP));
+}
+
+// This test validates that a read shutdown with pending data allows the read
+// to proceed with the data before returning EAGAIN.
+TEST_P(UdpSocketTest, ReadShutdownNonblockPendingData) {
+  ASSERT_NO_ERRNO(BindLoopback());
+
+  // Connect to loopback:bind_addr_port+1.
+  struct sockaddr_storage addr_storage = InetLoopbackAddr();
+  struct sockaddr* addr = reinterpret_cast<struct sockaddr*>(&addr_storage);
+  SetPort(&addr_storage, *Port(&bind_addr_storage_) + 1);
+  ASSERT_THAT(connect(bind_.get(), addr, addrlen_), SyscallSucceeds());
+
+  // Bind to loopback:bind_addr_port+1 and connect to bind_addr_.
+  ASSERT_THAT(bind(sock_.get(), addr, addrlen_), SyscallSucceeds());
+  ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds());
+
+  // Verify that we get EWOULDBLOCK when there is nothing to read.
+  char received[512];
+  EXPECT_THAT(recv(bind_.get(), received, sizeof(received), MSG_DONTWAIT),
+              SyscallFailsWithErrno(EWOULDBLOCK));
+
+  const char* buf = "abc";
+  EXPECT_THAT(write(sock_.get(), buf, 3), SyscallSucceedsWithValue(3));
+
+  int opts = 0;
+  ASSERT_THAT(opts = fcntl(bind_.get(), F_GETFL), SyscallSucceeds());
+  ASSERT_THAT(fcntl(bind_.get(), F_SETFL, opts | O_NONBLOCK),
+              SyscallSucceeds());
+  ASSERT_THAT(opts = fcntl(bind_.get(), F_GETFL), SyscallSucceeds());
+  ASSERT_NE(opts & O_NONBLOCK, 0);
+
+  EXPECT_THAT(shutdown(bind_.get(), SHUT_RD), SyscallSucceeds());
+
+  struct pollfd pfd = {bind_.get(), POLLIN, 0};
+  ASSERT_THAT(RetryEINTR(poll)(&pfd, 1, /*timeout=*/1000),
+              SyscallSucceedsWithValue(1));
+
+  // We should get the data even though read has been shutdown.
+  EXPECT_THAT(recv(bind_.get(), received, 2, 0), SyscallSucceedsWithValue(2));
+
+  // Because we read less than the entire packet length, since it's a packet
+  // based socket any subsequent reads should return EWOULDBLOCK.
+  EXPECT_THAT(recv(bind_.get(), received, 1, 0),
+              SyscallFailsWithErrno(EWOULDBLOCK));
+}
+
+// This test is validating that even after a socket is shutdown if it's
+// reconnected it will reset the shutdown state.
+TEST_P(UdpSocketTest, ReadShutdownSameSocketResetsShutdownState) {
+  char received[512];
+  EXPECT_THAT(recv(bind_.get(), received, sizeof(received), MSG_DONTWAIT),
+              SyscallFailsWithErrno(EWOULDBLOCK));
+
+  EXPECT_THAT(shutdown(bind_.get(), SHUT_RD), SyscallFailsWithErrno(ENOTCONN));
+
+  EXPECT_THAT(recv(bind_.get(), received, sizeof(received), MSG_DONTWAIT),
+              SyscallFailsWithErrno(EWOULDBLOCK));
+
+  // Connect the socket, then try to shutdown again.
+  ASSERT_NO_ERRNO(BindLoopback());
+
+  // Connect to loopback:bind_addr_port+1.
+  struct sockaddr_storage addr_storage = InetLoopbackAddr();
+  struct sockaddr* addr = reinterpret_cast<struct sockaddr*>(&addr_storage);
+  SetPort(&addr_storage, *Port(&bind_addr_storage_) + 1);
+  ASSERT_THAT(connect(bind_.get(), addr, addrlen_), SyscallSucceeds());
+
+  EXPECT_THAT(recv(bind_.get(), received, sizeof(received), MSG_DONTWAIT),
+              SyscallFailsWithErrno(EWOULDBLOCK));
+}
+
+TEST_P(UdpSocketTest, ReadShutdown) {
+  // TODO(gvisor.dev/issue/1202): Calling recv() after shutdown without
+  // MSG_DONTWAIT blocks indefinitely.
+  SKIP_IF(IsRunningWithHostinet());
+
+  ASSERT_NO_ERRNO(BindLoopback());
+
+  char received[512];
+  EXPECT_THAT(recv(sock_.get(), received, sizeof(received), MSG_DONTWAIT),
+              SyscallFailsWithErrno(EWOULDBLOCK));
+
+  EXPECT_THAT(shutdown(sock_.get(), SHUT_RD), SyscallFailsWithErrno(ENOTCONN));
+
+  EXPECT_THAT(recv(sock_.get(), received, sizeof(received), MSG_DONTWAIT),
+              SyscallFailsWithErrno(EWOULDBLOCK));
+
+  // Connect the socket, then try to shutdown again.
+  ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds());
+
+  EXPECT_THAT(recv(sock_.get(), received, sizeof(received), MSG_DONTWAIT),
+              SyscallFailsWithErrno(EWOULDBLOCK));
+
+  EXPECT_THAT(shutdown(sock_.get(), SHUT_RD), SyscallSucceeds());
+
+  EXPECT_THAT(recv(sock_.get(), received, sizeof(received), 0),
+              SyscallSucceedsWithValue(0));
+}
+
+TEST_P(UdpSocketTest, ReadShutdownDifferentThread) {
+  // TODO(gvisor.dev/issue/1202): Calling recv() after shutdown without
+  // MSG_DONTWAIT blocks indefinitely.
+  SKIP_IF(IsRunningWithHostinet());
+  ASSERT_NO_ERRNO(BindLoopback());
+
+  char received[512];
+  EXPECT_THAT(recv(sock_.get(), received, sizeof(received), MSG_DONTWAIT),
+              SyscallFailsWithErrno(EWOULDBLOCK));
+
+  // Connect the socket, then shutdown from another thread.
+  ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds());
+
+  EXPECT_THAT(recv(sock_.get(), received, sizeof(received), MSG_DONTWAIT),
+              SyscallFailsWithErrno(EWOULDBLOCK));
+
+  ScopedThread t([&] {
+    absl::SleepFor(absl::Milliseconds(200));
+    EXPECT_THAT(shutdown(sock_.get(), SHUT_RD), SyscallSucceeds());
+  });
+  EXPECT_THAT(RetryEINTR(recv)(sock_.get(), received, sizeof(received), 0),
+              SyscallSucceedsWithValue(0));
+  t.Join();
+
+  EXPECT_THAT(RetryEINTR(recv)(sock_.get(), received, sizeof(received), 0),
+              SyscallSucceedsWithValue(0));
+}
+
+TEST_P(UdpSocketTest, WriteShutdown) {
+  ASSERT_NO_ERRNO(BindLoopback());
+  EXPECT_THAT(shutdown(sock_.get(), SHUT_WR), SyscallFailsWithErrno(ENOTCONN));
+  ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds());
+  EXPECT_THAT(shutdown(sock_.get(), SHUT_WR), SyscallSucceeds());
+}
+
+TEST_P(UdpSocketTest, SynchronousReceive) {
+  ASSERT_NO_ERRNO(BindLoopback());
+
+  // Send some data to bind_ from another thread.
+  char buf[512];
+  RandomizeBuffer(buf, sizeof(buf));
+
+  // Receive the data prior to actually starting the other thread.
+  char received[512];
+  EXPECT_THAT(
+      RetryEINTR(recv)(bind_.get(), received, sizeof(received), MSG_DONTWAIT),
+      SyscallFailsWithErrno(EWOULDBLOCK));
+
+  // Start the thread.
+  ScopedThread t([&] {
+    absl::SleepFor(absl::Milliseconds(200));
+    ASSERT_THAT(sendto(sock_.get(), buf, sizeof(buf), 0, this->bind_addr_,
+                       this->addrlen_),
+                SyscallSucceedsWithValue(sizeof(buf)));
+  });
+
+  EXPECT_THAT(RetryEINTR(recv)(bind_.get(), received, sizeof(received), 0),
+              SyscallSucceedsWithValue(512));
+  EXPECT_EQ(memcmp(buf, received, sizeof(buf)), 0);
+}
+
+TEST_P(UdpSocketTest, BoundaryPreserved_SendRecv) {
+  ASSERT_NO_ERRNO(BindLoopback());
+
+  // Send 3 packets from sock to bind_.
+  constexpr int psize = 100;
+  char buf[3 * psize];
+  RandomizeBuffer(buf, sizeof(buf));
+
+  for (int i = 0; i < 3; ++i) {
+    ASSERT_THAT(
+        sendto(sock_.get(), buf + i * psize, psize, 0, bind_addr_, addrlen_),
+        SyscallSucceedsWithValue(psize));
+  }
+
+  // Receive the data as 3 separate packets.
+  char received[6 * psize];
+  for (int i = 0; i < 3; ++i) {
+    EXPECT_THAT(recv(bind_.get(), received + i * psize, 3 * psize, 0),
+                SyscallSucceedsWithValue(psize));
+  }
+  EXPECT_EQ(memcmp(buf, received, 3 * psize), 0);
+}
+
+TEST_P(UdpSocketTest, BoundaryPreserved_WritevReadv) {
+  ASSERT_NO_ERRNO(BindLoopback());
+
+  // Direct writes from sock to bind_.
+  ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds());
+
+  // Send 2 packets from sock to bind_, where each packet's data consists of
+  // 2 discontiguous iovecs.
+  constexpr size_t kPieceSize = 100;
+  char buf[4 * kPieceSize];
+  RandomizeBuffer(buf, sizeof(buf));
+
+  for (int i = 0; i < 2; i++) {
+    struct iovec iov[2];
+    for (int j = 0; j < 2; j++) {
+      iov[j].iov_base = reinterpret_cast<void*>(
+          reinterpret_cast<uintptr_t>(buf) + (i + 2 * j) * kPieceSize);
+      iov[j].iov_len = kPieceSize;
+    }
+    ASSERT_THAT(writev(sock_.get(), iov, 2),
+                SyscallSucceedsWithValue(2 * kPieceSize));
+  }
+
+  // Receive the data as 2 separate packets.
+  char received[6 * kPieceSize];
+  for (int i = 0; i < 2; i++) {
+    struct iovec iov[3];
+    for (int j = 0; j < 3; j++) {
+      iov[j].iov_base = reinterpret_cast<void*>(
+          reinterpret_cast<uintptr_t>(received) + (i + 2 * j) * kPieceSize);
+      iov[j].iov_len = kPieceSize;
+    }
+    ASSERT_THAT(readv(bind_.get(), iov, 3),
+                SyscallSucceedsWithValue(2 * kPieceSize));
+  }
+  EXPECT_EQ(memcmp(buf, received, 4 * kPieceSize), 0);
+}
+
+TEST_P(UdpSocketTest, BoundaryPreserved_SendMsgRecvMsg) {
+  ASSERT_NO_ERRNO(BindLoopback());
+
+  // Send 2 packets from sock to bind_, where each packet's data consists of
+  // 2 discontiguous iovecs.
+  constexpr size_t kPieceSize = 100;
+  char buf[4 * kPieceSize];
+  RandomizeBuffer(buf, sizeof(buf));
+
+  for (int i = 0; i < 2; i++) {
+    struct iovec iov[2];
+    for (int j = 0; j < 2; j++) {
+      iov[j].iov_base = reinterpret_cast<void*>(
+          reinterpret_cast<uintptr_t>(buf) + (i + 2 * j) * kPieceSize);
+      iov[j].iov_len = kPieceSize;
+    }
+    struct msghdr msg = {};
+    msg.msg_name = bind_addr_;
+    msg.msg_namelen = addrlen_;
+    msg.msg_iov = iov;
+    msg.msg_iovlen = 2;
+    ASSERT_THAT(sendmsg(sock_.get(), &msg, 0),
+                SyscallSucceedsWithValue(2 * kPieceSize));
+  }
+
+  // Receive the data as 2 separate packets.
+  char received[6 * kPieceSize];
+  for (int i = 0; i < 2; i++) {
+    struct iovec iov[3];
+    for (int j = 0; j < 3; j++) {
+      iov[j].iov_base = reinterpret_cast<void*>(
+          reinterpret_cast<uintptr_t>(received) + (i + 2 * j) * kPieceSize);
+      iov[j].iov_len = kPieceSize;
+    }
+    struct msghdr msg = {};
+    msg.msg_iov = iov;
+    msg.msg_iovlen = 3;
+    ASSERT_THAT(recvmsg(bind_.get(), &msg, 0),
+                SyscallSucceedsWithValue(2 * kPieceSize));
+  }
+  EXPECT_EQ(memcmp(buf, received, 4 * kPieceSize), 0);
+}
+
+TEST_P(UdpSocketTest, FIONREADShutdown) {
+  ASSERT_NO_ERRNO(BindLoopback());
+
+  int n = -1;
+  EXPECT_THAT(ioctl(sock_.get(), FIONREAD, &n), SyscallSucceedsWithValue(0));
+  EXPECT_EQ(n, 0);
+
+  // A UDP socket must be connected before it can be shutdown.
+  ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds());
+
+  n = -1;
+  EXPECT_THAT(ioctl(sock_.get(), FIONREAD, &n), SyscallSucceedsWithValue(0));
+  EXPECT_EQ(n, 0);
+
+  EXPECT_THAT(shutdown(sock_.get(), SHUT_RD), SyscallSucceeds());
+
+  n = -1;
+  EXPECT_THAT(ioctl(sock_.get(), FIONREAD, &n), SyscallSucceedsWithValue(0));
+  EXPECT_EQ(n, 0);
+}
+
+TEST_P(UdpSocketTest, FIONREADWriteShutdown) {
+  int n = -1;
+  EXPECT_THAT(ioctl(bind_.get(), FIONREAD, &n), SyscallSucceedsWithValue(0));
+  EXPECT_EQ(n, 0);
+
+  ASSERT_NO_ERRNO(BindLoopback());
+
+  // A UDP socket must be connected before it can be shutdown.
+  ASSERT_THAT(connect(bind_.get(), bind_addr_, addrlen_), SyscallSucceeds());
+
+  n = -1;
+  EXPECT_THAT(ioctl(bind_.get(), FIONREAD, &n), SyscallSucceedsWithValue(0));
+  EXPECT_EQ(n, 0);
+
+  const char str[] = "abc";
+  ASSERT_THAT(send(bind_.get(), str, sizeof(str), 0),
+              SyscallSucceedsWithValue(sizeof(str)));
+
+  struct pollfd pfd = {bind_.get(), POLLIN, 0};
+  ASSERT_THAT(RetryEINTR(poll)(&pfd, 1, /*timeout=*/1000),
+              SyscallSucceedsWithValue(1));
+
+  n = -1;
+  EXPECT_THAT(ioctl(bind_.get(), FIONREAD, &n), SyscallSucceedsWithValue(0));
+  EXPECT_EQ(n, sizeof(str));
+
+  EXPECT_THAT(shutdown(bind_.get(), SHUT_RD), SyscallSucceeds());
+
+  n = -1;
+  EXPECT_THAT(ioctl(bind_.get(), FIONREAD, &n), SyscallSucceedsWithValue(0));
+  EXPECT_EQ(n, sizeof(str));
+}
+
+// NOTE: Do not use `FIONREAD` as test name because it will be replaced by the
+// corresponding macro and become `0x541B`.
+TEST_P(UdpSocketTest, Fionread) {
+  ASSERT_NO_ERRNO(BindLoopback());
+
+  // Check that the bound socket with an empty buffer reports an empty first
+  // packet.
+  int n = -1;
+  EXPECT_THAT(ioctl(bind_.get(), FIONREAD, &n), SyscallSucceedsWithValue(0));
+  EXPECT_EQ(n, 0);
+
+  // Send 3 packets from sock to bind_.
+  constexpr int psize = 100;
+  char buf[3 * psize];
+  RandomizeBuffer(buf, sizeof(buf));
+
+  struct pollfd pfd = {bind_.get(), POLLIN, 0};
+  for (int i = 0; i < 3; ++i) {
+    ASSERT_THAT(
+        sendto(sock_.get(), buf + i * psize, psize, 0, bind_addr_, addrlen_),
+        SyscallSucceedsWithValue(psize));
+
+    ASSERT_THAT(RetryEINTR(poll)(&pfd, 1, /*timeout=*/1000),
+                SyscallSucceedsWithValue(1));
+
+    // Check that regardless of how many packets are in the queue, the size
+    // reported is that of a single packet.
+    n = -1;
+    EXPECT_THAT(ioctl(bind_.get(), FIONREAD, &n), SyscallSucceedsWithValue(0));
+    EXPECT_EQ(n, psize);
+  }
+}
+
+TEST_P(UdpSocketTest, FIONREADZeroLengthPacket) {
+  ASSERT_NO_ERRNO(BindLoopback());
+
+  // Check that the bound socket with an empty buffer reports an empty first
+  // packet.
+  int n = -1;
+  EXPECT_THAT(ioctl(bind_.get(), FIONREAD, &n), SyscallSucceedsWithValue(0));
+  EXPECT_EQ(n, 0);
+
+  // Send 3 packets from sock to bind_.
+  constexpr int psize = 100;
+  char buf[3 * psize];
+  RandomizeBuffer(buf, sizeof(buf));
+
+  struct pollfd pfd = {bind_.get(), POLLIN, 0};
+  for (int i = 0; i < 3; ++i) {
+    ASSERT_THAT(
+        sendto(sock_.get(), buf + i * psize, 0, 0, bind_addr_, addrlen_),
+        SyscallSucceedsWithValue(0));
+
+    // TODO(gvisor.dev/issue/2726): sending a zero-length message to a hostinet
+    // socket does not cause a poll event to be triggered.
+    if (!IsRunningWithHostinet()) {
+      ASSERT_THAT(RetryEINTR(poll)(&pfd, 1, /*timeout=*/1000),
+                  SyscallSucceedsWithValue(1));
+    }
+
+    // Check that regardless of how many packets are in the queue, the size
+    // reported is that of a single packet.
+    n = -1;
+    EXPECT_THAT(ioctl(bind_.get(), FIONREAD, &n), SyscallSucceedsWithValue(0));
+    EXPECT_EQ(n, 0);
+  }
+}
+
+TEST_P(UdpSocketTest, FIONREADZeroLengthWriteShutdown) {
+  int n = -1;
+  EXPECT_THAT(ioctl(bind_.get(), FIONREAD, &n), SyscallSucceedsWithValue(0));
+  EXPECT_EQ(n, 0);
+
+  ASSERT_NO_ERRNO(BindLoopback());
+
+  // A UDP socket must be connected before it can be shutdown.
+  ASSERT_THAT(connect(bind_.get(), bind_addr_, addrlen_), SyscallSucceeds());
+
+  n = -1;
+  EXPECT_THAT(ioctl(bind_.get(), FIONREAD, &n), SyscallSucceedsWithValue(0));
+  EXPECT_EQ(n, 0);
+
+  const char str[] = "abc";
+  ASSERT_THAT(send(bind_.get(), str, 0, 0), SyscallSucceedsWithValue(0));
+
+  n = -1;
+  EXPECT_THAT(ioctl(bind_.get(), FIONREAD, &n), SyscallSucceedsWithValue(0));
+  EXPECT_EQ(n, 0);
+
+  EXPECT_THAT(shutdown(bind_.get(), SHUT_RD), SyscallSucceeds());
+
+  n = -1;
+  EXPECT_THAT(ioctl(bind_.get(), FIONREAD, &n), SyscallSucceedsWithValue(0));
+  EXPECT_EQ(n, 0);
+}
+
+TEST_P(UdpSocketTest, SoNoCheckOffByDefault) {
+  // TODO(gvisor.dev/issue/1202): SO_NO_CHECK socket option not supported by
+  // hostinet.
+  SKIP_IF(IsRunningWithHostinet());
+
+  int v = -1;
+  socklen_t optlen = sizeof(v);
+  ASSERT_THAT(getsockopt(bind_.get(), SOL_SOCKET, SO_NO_CHECK, &v, &optlen),
+              SyscallSucceeds());
+  ASSERT_EQ(v, kSockOptOff);
+  ASSERT_EQ(optlen, sizeof(v));
+}
+
+TEST_P(UdpSocketTest, SoNoCheck) {
+  // TODO(gvisor.dev/issue/1202): SO_NO_CHECK socket option not supported by
+  // hostinet.
+  SKIP_IF(IsRunningWithHostinet());
+
+  int v = kSockOptOn;
+  socklen_t optlen = sizeof(v);
+  ASSERT_THAT(setsockopt(bind_.get(), SOL_SOCKET, SO_NO_CHECK, &v, optlen),
+              SyscallSucceeds());
+  v = -1;
+  ASSERT_THAT(getsockopt(bind_.get(), SOL_SOCKET, SO_NO_CHECK, &v, &optlen),
+              SyscallSucceeds());
+  ASSERT_EQ(v, kSockOptOn);
+  ASSERT_EQ(optlen, sizeof(v));
+
+  v = kSockOptOff;
+  ASSERT_THAT(setsockopt(bind_.get(), SOL_SOCKET, SO_NO_CHECK, &v, optlen),
+              SyscallSucceeds());
+  v = -1;
+  ASSERT_THAT(getsockopt(bind_.get(), SOL_SOCKET, SO_NO_CHECK, &v, &optlen),
+              SyscallSucceeds());
+  ASSERT_EQ(v, kSockOptOff);
+  ASSERT_EQ(optlen, sizeof(v));
+}
+
+#ifdef __linux__
+TEST_P(UdpSocketTest, ErrorQueue) {
+  char cmsgbuf[CMSG_SPACE(sizeof(sock_extended_err))];
+  msghdr msg;
+  memset(&msg, 0, sizeof(msg));
+  iovec iov;
+  memset(&iov, 0, sizeof(iov));
+  msg.msg_iov = &iov;
+  msg.msg_iovlen = 1;
+  msg.msg_control = cmsgbuf;
+  msg.msg_controllen = sizeof(cmsgbuf);
+
+  // recv*(MSG_ERRQUEUE) never blocks, even without MSG_DONTWAIT.
+  EXPECT_THAT(RetryEINTR(recvmsg)(bind_.get(), &msg, MSG_ERRQUEUE),
+              SyscallFailsWithErrno(EAGAIN));
+}
+#endif  // __linux__
+
+TEST_P(UdpSocketTest, SoTimestampOffByDefault) {
+  // TODO(gvisor.dev/issue/1202): SO_TIMESTAMP socket option not supported by
+  // hostinet.
+  SKIP_IF(IsRunningWithHostinet());
+
+  int v = -1;
+  socklen_t optlen = sizeof(v);
+  ASSERT_THAT(getsockopt(bind_.get(), SOL_SOCKET, SO_TIMESTAMP, &v, &optlen),
+              SyscallSucceeds());
+  ASSERT_EQ(v, kSockOptOff);
+  ASSERT_EQ(optlen, sizeof(v));
+}
+
+TEST_P(UdpSocketTest, SoTimestamp) {
+  // TODO(gvisor.dev/issue/1202): ioctl() and SO_TIMESTAMP socket option are not
+  // supported by hostinet.
+  SKIP_IF(IsRunningWithHostinet());
+
+  ASSERT_NO_ERRNO(BindLoopback());
+  ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds());
+
+  int v = 1;
+  ASSERT_THAT(setsockopt(bind_.get(), SOL_SOCKET, SO_TIMESTAMP, &v, sizeof(v)),
+              SyscallSucceeds());
+
+  char buf[3];
+  // Send zero length packet from sock to bind_.
+  ASSERT_THAT(RetryEINTR(write)(sock_.get(), buf, 0),
+              SyscallSucceedsWithValue(0));
+
+  struct pollfd pfd = {bind_.get(), POLLIN, 0};
+  ASSERT_THAT(RetryEINTR(poll)(&pfd, 1, /*timeout=*/1000),
+              SyscallSucceedsWithValue(1));
+
+  char cmsgbuf[CMSG_SPACE(sizeof(struct timeval))];
+  msghdr msg;
+  memset(&msg, 0, sizeof(msg));
+  iovec iov;
+  memset(&iov, 0, sizeof(iov));
+  msg.msg_iov = &iov;
+  msg.msg_iovlen = 1;
+  msg.msg_control = cmsgbuf;
+  msg.msg_controllen = sizeof(cmsgbuf);
+
+  ASSERT_THAT(RetryEINTR(recvmsg)(bind_.get(), &msg, 0),
+              SyscallSucceedsWithValue(0));
+
+  struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg);
+  ASSERT_NE(cmsg, nullptr);
+  ASSERT_EQ(cmsg->cmsg_level, SOL_SOCKET);
+  ASSERT_EQ(cmsg->cmsg_type, SO_TIMESTAMP);
+  ASSERT_EQ(cmsg->cmsg_len, CMSG_LEN(sizeof(struct timeval)));
+
+  struct timeval tv = {};
+  memcpy(&tv, CMSG_DATA(cmsg), sizeof(struct timeval));
+
+  ASSERT_TRUE(tv.tv_sec != 0 || tv.tv_usec != 0);
+
+  // There should be nothing to get via ioctl.
+  ASSERT_THAT(ioctl(bind_.get(), SIOCGSTAMP, &tv),
+              SyscallFailsWithErrno(ENOENT));
+}
+
+TEST_P(UdpSocketTest, WriteShutdownNotConnected) {
+  EXPECT_THAT(shutdown(bind_.get(), SHUT_WR), SyscallFailsWithErrno(ENOTCONN));
+}
+
+TEST_P(UdpSocketTest, TimestampIoctl) {
+  // TODO(gvisor.dev/issue/1202): ioctl() is not supported by hostinet.
+  SKIP_IF(IsRunningWithHostinet());
+
+  ASSERT_NO_ERRNO(BindLoopback());
+  ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds());
+
+  char buf[3];
+  // Send packet from sock to bind_.
+  ASSERT_THAT(RetryEINTR(write)(sock_.get(), buf, sizeof(buf)),
+              SyscallSucceedsWithValue(sizeof(buf)));
+
+  struct pollfd pfd = {bind_.get(), POLLIN, 0};
+  ASSERT_THAT(RetryEINTR(poll)(&pfd, 1, /*timeout=*/1000),
+              SyscallSucceedsWithValue(1));
+
+  // There should be no control messages.
+  char recv_buf[sizeof(buf)];
+  ASSERT_NO_FATAL_FAILURE(RecvNoCmsg(bind_.get(), recv_buf, sizeof(recv_buf)));
+
+  // A nonzero timeval should be available via ioctl.
+  struct timeval tv = {};
+  ASSERT_THAT(ioctl(bind_.get(), SIOCGSTAMP, &tv), SyscallSucceeds());
+  ASSERT_TRUE(tv.tv_sec != 0 || tv.tv_usec != 0);
+}
+
+TEST_P(UdpSocketTest, TimestampIoctlNothingRead) {
+  // TODO(gvisor.dev/issue/1202): ioctl() is not supported by hostinet.
+  SKIP_IF(IsRunningWithHostinet());
+
+  ASSERT_NO_ERRNO(BindLoopback());
+  ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds());
+
+  struct timeval tv = {};
+  ASSERT_THAT(ioctl(sock_.get(), SIOCGSTAMP, &tv),
+              SyscallFailsWithErrno(ENOENT));
+}
+
+// Test that the timestamp accessed via SIOCGSTAMP is still accessible after
+// SO_TIMESTAMP is enabled and used to retrieve a timestamp.
+TEST_P(UdpSocketTest, TimestampIoctlPersistence) {
+  // TODO(gvisor.dev/issue/1202): ioctl() and SO_TIMESTAMP socket option are not
+  // supported by hostinet.
+  SKIP_IF(IsRunningWithHostinet());
+
+  ASSERT_NO_ERRNO(BindLoopback());
+  ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds());
+
+  char buf[3];
+  // Send packet from sock to bind_.
+  ASSERT_THAT(RetryEINTR(write)(sock_.get(), buf, sizeof(buf)),
+              SyscallSucceedsWithValue(sizeof(buf)));
+  ASSERT_THAT(RetryEINTR(write)(sock_.get(), buf, 0),
+              SyscallSucceedsWithValue(0));
+
+  struct pollfd pfd = {bind_.get(), POLLIN, 0};
+  ASSERT_THAT(RetryEINTR(poll)(&pfd, 1, /*timeout=*/1000),
+              SyscallSucceedsWithValue(1));
+
+  // There should be no control messages.
+  char recv_buf[sizeof(buf)];
+  ASSERT_NO_FATAL_FAILURE(RecvNoCmsg(bind_.get(), recv_buf, sizeof(recv_buf)));
+
+  // A nonzero timeval should be available via ioctl.
+  struct timeval tv = {};
+  ASSERT_THAT(ioctl(bind_.get(), SIOCGSTAMP, &tv), SyscallSucceeds());
+  ASSERT_TRUE(tv.tv_sec != 0 || tv.tv_usec != 0);
+
+  // Enable SO_TIMESTAMP and send a message.
+  int v = 1;
+  EXPECT_THAT(setsockopt(bind_.get(), SOL_SOCKET, SO_TIMESTAMP, &v, sizeof(v)),
+              SyscallSucceeds());
+  ASSERT_THAT(RetryEINTR(write)(sock_.get(), buf, 0),
+              SyscallSucceedsWithValue(0));
+
+  ASSERT_THAT(RetryEINTR(poll)(&pfd, 1, /*timeout=*/1000),
+              SyscallSucceedsWithValue(1));
+
+  // There should be a message for SO_TIMESTAMP.
+  char cmsgbuf[CMSG_SPACE(sizeof(struct timeval))];
+  msghdr msg = {};
+  iovec iov = {};
+  msg.msg_iov = &iov;
+  msg.msg_iovlen = 1;
+  msg.msg_control = cmsgbuf;
+  msg.msg_controllen = sizeof(cmsgbuf);
+  ASSERT_THAT(RetryEINTR(recvmsg)(bind_.get(), &msg, 0),
+              SyscallSucceedsWithValue(0));
+  struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg);
+  ASSERT_NE(cmsg, nullptr);
+
+  // The ioctl should return the exact same values as before.
+  struct timeval tv2 = {};
+  ASSERT_THAT(ioctl(bind_.get(), SIOCGSTAMP, &tv2), SyscallSucceeds());
+  ASSERT_EQ(tv.tv_sec, tv2.tv_sec);
+  ASSERT_EQ(tv.tv_usec, tv2.tv_usec);
+}
+
+// Test that a socket with IP_TOS or IPV6_TCLASS set will set the TOS byte on
+// outgoing packets, and that a receiving socket with IP_RECVTOS or
+// IPV6_RECVTCLASS will create the corresponding control message.
+TEST_P(UdpSocketTest, SetAndReceiveTOS) {
+  ASSERT_NO_ERRNO(BindLoopback());
+  ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds());
+
+  // Allow socket to receive control message.
+  int recv_level = SOL_IP;
+  int recv_type = IP_RECVTOS;
+  if (GetParam() != AddressFamily::kIpv4) {
+    recv_level = SOL_IPV6;
+    recv_type = IPV6_RECVTCLASS;
+  }
+  ASSERT_THAT(setsockopt(bind_.get(), recv_level, recv_type, &kSockOptOn,
+                         sizeof(kSockOptOn)),
+              SyscallSucceeds());
+
+  // Set socket TOS.
+  int sent_level = recv_level;
+  int sent_type = IP_TOS;
+  if (sent_level == SOL_IPV6) {
+    sent_type = IPV6_TCLASS;
+  }
+  int sent_tos = IPTOS_LOWDELAY;  // Choose some TOS value.
+  ASSERT_THAT(setsockopt(sock_.get(), sent_level, sent_type, &sent_tos,
+                         sizeof(sent_tos)),
+              SyscallSucceeds());
+
+  // Prepare message to send.
+  constexpr size_t kDataLength = 1024;
+  struct msghdr sent_msg = {};
+  struct iovec sent_iov = {};
+  char sent_data[kDataLength];
+  sent_iov.iov_base = &sent_data[0];
+  sent_iov.iov_len = kDataLength;
+  sent_msg.msg_iov = &sent_iov;
+  sent_msg.msg_iovlen = 1;
+
+  ASSERT_THAT(RetryEINTR(sendmsg)(sock_.get(), &sent_msg, 0),
+              SyscallSucceedsWithValue(kDataLength));
+
+  // Receive message.
+  struct msghdr received_msg = {};
+  struct iovec received_iov = {};
+  char received_data[kDataLength];
+  received_iov.iov_base = &received_data[0];
+  received_iov.iov_len = kDataLength;
+  received_msg.msg_iov = &received_iov;
+  received_msg.msg_iovlen = 1;
+  size_t cmsg_data_len = sizeof(int8_t);
+  if (sent_type == IPV6_TCLASS) {
+    cmsg_data_len = sizeof(int);
+  }
+  std::vector<char> received_cmsgbuf(CMSG_SPACE(cmsg_data_len));
+  received_msg.msg_control = &received_cmsgbuf[0];
+  received_msg.msg_controllen = received_cmsgbuf.size();
+  ASSERT_THAT(RetryEINTR(recvmsg)(bind_.get(), &received_msg, 0),
+              SyscallSucceedsWithValue(kDataLength));
+
+  struct cmsghdr* cmsg = CMSG_FIRSTHDR(&received_msg);
+  ASSERT_NE(cmsg, nullptr);
+  EXPECT_EQ(cmsg->cmsg_len, CMSG_LEN(cmsg_data_len));
+  EXPECT_EQ(cmsg->cmsg_level, sent_level);
+  EXPECT_EQ(cmsg->cmsg_type, sent_type);
+  int8_t received_tos = 0;
+  memcpy(&received_tos, CMSG_DATA(cmsg), sizeof(received_tos));
+  EXPECT_EQ(received_tos, sent_tos);
+}
+
+// Test that sendmsg with IP_TOS and IPV6_TCLASS control messages will set the
+// TOS byte on outgoing packets, and that a receiving socket with IP_RECVTOS or
+// IPV6_RECVTCLASS will create the corresponding control message.
+TEST_P(UdpSocketTest, SendAndReceiveTOS) {
+  // TODO(b/146661005): Setting TOS via cmsg not supported for netstack.
+  SKIP_IF(IsRunningOnGvisor() && !IsRunningWithHostinet());
+
+  ASSERT_NO_ERRNO(BindLoopback());
+  ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds());
+
+  // Allow socket to receive control message.
+  int recv_level = SOL_IP;
+  int recv_type = IP_RECVTOS;
+  if (GetParam() != AddressFamily::kIpv4) {
+    recv_level = SOL_IPV6;
+    recv_type = IPV6_RECVTCLASS;
+  }
+  int recv_opt = kSockOptOn;
+  ASSERT_THAT(setsockopt(bind_.get(), recv_level, recv_type, &recv_opt,
+                         sizeof(recv_opt)),
+              SyscallSucceeds());
+
+  // Prepare message to send.
+  constexpr size_t kDataLength = 1024;
+  int sent_level = recv_level;
+  int sent_type = IP_TOS;
+  int sent_tos = IPTOS_LOWDELAY;  // Choose some TOS value.
+
+  struct msghdr sent_msg = {};
+  struct iovec sent_iov = {};
+  char sent_data[kDataLength];
+  sent_iov.iov_base = &sent_data[0];
+  sent_iov.iov_len = kDataLength;
+  sent_msg.msg_iov = &sent_iov;
+  sent_msg.msg_iovlen = 1;
+  size_t cmsg_data_len = sizeof(int8_t);
+  if (sent_level == SOL_IPV6) {
+    sent_type = IPV6_TCLASS;
+    cmsg_data_len = sizeof(int);
+  }
+  std::vector<char> sent_cmsgbuf(CMSG_SPACE(cmsg_data_len));
+  sent_msg.msg_control = &sent_cmsgbuf[0];
+  sent_msg.msg_controllen = CMSG_LEN(cmsg_data_len);
+
+  // Manually add control message.
+  struct cmsghdr* sent_cmsg = CMSG_FIRSTHDR(&sent_msg);
+  sent_cmsg->cmsg_len = CMSG_LEN(cmsg_data_len);
+  sent_cmsg->cmsg_level = sent_level;
+  sent_cmsg->cmsg_type = sent_type;
+  *(int8_t*)CMSG_DATA(sent_cmsg) = sent_tos;
+
+  ASSERT_THAT(RetryEINTR(sendmsg)(sock_.get(), &sent_msg, 0),
+              SyscallSucceedsWithValue(kDataLength));
+
+  // Receive message.
+  struct msghdr received_msg = {};
+  struct iovec received_iov = {};
+  char received_data[kDataLength];
+  received_iov.iov_base = &received_data[0];
+  received_iov.iov_len = kDataLength;
+  received_msg.msg_iov = &received_iov;
+  received_msg.msg_iovlen = 1;
+  std::vector<char> received_cmsgbuf(CMSG_SPACE(cmsg_data_len));
+  received_msg.msg_control = &received_cmsgbuf[0];
+  received_msg.msg_controllen = CMSG_LEN(cmsg_data_len);
+  ASSERT_THAT(RetryEINTR(recvmsg)(bind_.get(), &received_msg, 0),
+              SyscallSucceedsWithValue(kDataLength));
+
+  struct cmsghdr* cmsg = CMSG_FIRSTHDR(&received_msg);
+  ASSERT_NE(cmsg, nullptr);
+  EXPECT_EQ(cmsg->cmsg_len, CMSG_LEN(cmsg_data_len));
+  EXPECT_EQ(cmsg->cmsg_level, sent_level);
+  EXPECT_EQ(cmsg->cmsg_type, sent_type);
+  int8_t received_tos = 0;
+  memcpy(&received_tos, CMSG_DATA(cmsg), sizeof(received_tos));
+  EXPECT_EQ(received_tos, sent_tos);
+}
+
+TEST_P(UdpSocketTest, RecvBufLimitsEmptyRcvBuf) {
+  // Discover minimum buffer size by setting it to zero.
+  constexpr int kRcvBufSz = 0;
+  ASSERT_THAT(setsockopt(bind_.get(), SOL_SOCKET, SO_RCVBUF, &kRcvBufSz,
+                         sizeof(kRcvBufSz)),
+              SyscallSucceeds());
+
+  int min = 0;
+  socklen_t min_len = sizeof(min);
+  ASSERT_THAT(getsockopt(bind_.get(), SOL_SOCKET, SO_RCVBUF, &min, &min_len),
+              SyscallSucceeds());
+
+  // Bind bind_ to loopback.
+  ASSERT_NO_ERRNO(BindLoopback());
+
+  {
+    // Send data of size min and verify that it's received.
+    std::vector<char> buf(min);
+    RandomizeBuffer(buf.data(), buf.size());
+    ASSERT_THAT(
+        sendto(sock_.get(), buf.data(), buf.size(), 0, bind_addr_, addrlen_),
+        SyscallSucceedsWithValue(buf.size()));
+    std::vector<char> received(buf.size());
+    EXPECT_THAT(
+        recv(bind_.get(), received.data(), received.size(), MSG_DONTWAIT),
+        SyscallSucceedsWithValue(received.size()));
+  }
+
+  {
+    // Send data of size min + 1 and verify that its received. Both linux and
+    // Netstack accept a dgram that exceeds rcvBuf limits if the receive buffer
+    // is currently empty.
+    std::vector<char> buf(min + 1);
+    RandomizeBuffer(buf.data(), buf.size());
+    ASSERT_THAT(
+        sendto(sock_.get(), buf.data(), buf.size(), 0, bind_addr_, addrlen_),
+        SyscallSucceedsWithValue(buf.size()));
+
+    std::vector<char> received(buf.size());
+    EXPECT_THAT(
+        recv(bind_.get(), received.data(), received.size(), MSG_DONTWAIT),
+        SyscallSucceedsWithValue(received.size()));
+  }
+}
+
+// Test that receive buffer limits are enforced.
+TEST_P(UdpSocketTest, RecvBufLimits) {
+  // Bind s_ to loopback.
+  ASSERT_NO_ERRNO(BindLoopback());
+
+  int min = 0;
+  {
+    // Discover minimum buffer size by trying to set it to zero.
+    constexpr int kRcvBufSz = 0;
+    ASSERT_THAT(setsockopt(bind_.get(), SOL_SOCKET, SO_RCVBUF, &kRcvBufSz,
+                           sizeof(kRcvBufSz)),
+                SyscallSucceeds());
+
+    socklen_t min_len = sizeof(min);
+    ASSERT_THAT(getsockopt(bind_.get(), SOL_SOCKET, SO_RCVBUF, &min, &min_len),
+                SyscallSucceeds());
+  }
+
+  // Now set the limit to min * 4.
+  int new_rcv_buf_sz = min * 4;
+  if (!IsRunningOnGvisor() || IsRunningWithHostinet()) {
+    // Linux doubles the value specified so just set to min * 2.
+    new_rcv_buf_sz = min * 2;
+  }
+
+  ASSERT_THAT(setsockopt(bind_.get(), SOL_SOCKET, SO_RCVBUF, &new_rcv_buf_sz,
+                         sizeof(new_rcv_buf_sz)),
+              SyscallSucceeds());
+  int rcv_buf_sz = 0;
+  {
+    socklen_t rcv_buf_len = sizeof(rcv_buf_sz);
+    ASSERT_THAT(getsockopt(bind_.get(), SOL_SOCKET, SO_RCVBUF, &rcv_buf_sz,
+                           &rcv_buf_len),
+                SyscallSucceeds());
+  }
+
+  {
+    std::vector<char> buf(min);
+    RandomizeBuffer(buf.data(), buf.size());
+
+    ASSERT_THAT(
+        sendto(sock_.get(), buf.data(), buf.size(), 0, bind_addr_, addrlen_),
+        SyscallSucceedsWithValue(buf.size()));
+    ASSERT_THAT(
+        sendto(sock_.get(), buf.data(), buf.size(), 0, bind_addr_, addrlen_),
+        SyscallSucceedsWithValue(buf.size()));
+    ASSERT_THAT(
+        sendto(sock_.get(), buf.data(), buf.size(), 0, bind_addr_, addrlen_),
+        SyscallSucceedsWithValue(buf.size()));
+    ASSERT_THAT(
+        sendto(sock_.get(), buf.data(), buf.size(), 0, bind_addr_, addrlen_),
+        SyscallSucceedsWithValue(buf.size()));
+    int sent = 4;
+    if (IsRunningOnGvisor() && !IsRunningWithHostinet()) {
+      // Linux seems to drop the 4th packet even though technically it should
+      // fit in the receive buffer.
+      ASSERT_THAT(
+          sendto(sock_.get(), buf.data(), buf.size(), 0, bind_addr_, addrlen_),
+          SyscallSucceedsWithValue(buf.size()));
+      sent++;
+    }
+
+    for (int i = 0; i < sent - 1; i++) {
+      // Receive the data.
+      std::vector<char> received(buf.size());
+      EXPECT_THAT(
+          recv(bind_.get(), received.data(), received.size(), MSG_DONTWAIT),
+          SyscallSucceedsWithValue(received.size()));
+      EXPECT_EQ(memcmp(buf.data(), received.data(), buf.size()), 0);
+    }
+
+    // The last receive should fail with EAGAIN as the last packet should have
+    // been dropped due to lack of space in the receive buffer.
+    std::vector<char> received(buf.size());
+    EXPECT_THAT(
+        recv(bind_.get(), received.data(), received.size(), MSG_DONTWAIT),
+        SyscallFailsWithErrno(EAGAIN));
+  }
+}
+
+#ifdef __linux__
+
+// TODO(gvisor.dev/2746): Support SO_ATTACH_FILTER/SO_DETACH_FILTER.
+// gVisor currently silently ignores attaching a filter.
+TEST_P(UdpSocketTest, SetSocketDetachFilter) {
+  // Program generated using sudo tcpdump -i lo udp and port 1234 -dd
+  struct sock_filter code[] = {
+      {0x28, 0, 0, 0x0000000c},  {0x15, 0, 6, 0x000086dd},
+      {0x30, 0, 0, 0x00000014},  {0x15, 0, 15, 0x00000011},
+      {0x28, 0, 0, 0x00000036},  {0x15, 12, 0, 0x000004d2},
+      {0x28, 0, 0, 0x00000038},  {0x15, 10, 11, 0x000004d2},
+      {0x15, 0, 10, 0x00000800}, {0x30, 0, 0, 0x00000017},
+      {0x15, 0, 8, 0x00000011},  {0x28, 0, 0, 0x00000014},
+      {0x45, 6, 0, 0x00001fff},  {0xb1, 0, 0, 0x0000000e},
+      {0x48, 0, 0, 0x0000000e},  {0x15, 2, 0, 0x000004d2},
+      {0x48, 0, 0, 0x00000010},  {0x15, 0, 1, 0x000004d2},
+      {0x6, 0, 0, 0x00040000},   {0x6, 0, 0, 0x00000000},
+  };
+  struct sock_fprog bpf = {
+      .len = ABSL_ARRAYSIZE(code),
+      .filter = code,
+  };
+  ASSERT_THAT(
+      setsockopt(sock_.get(), SOL_SOCKET, SO_ATTACH_FILTER, &bpf, sizeof(bpf)),
+      SyscallSucceeds());
+
+  constexpr int val = 0;
+  ASSERT_THAT(
+      setsockopt(sock_.get(), SOL_SOCKET, SO_DETACH_FILTER, &val, sizeof(val)),
+      SyscallSucceeds());
+}
+
+#endif  // __linux__
+
+TEST_P(UdpSocketTest, SetSocketDetachFilterNoInstalledFilter) {
+  // TODO(gvisor.dev/2746): Support SO_ATTACH_FILTER/SO_DETACH_FILTER.
+  SKIP_IF(IsRunningOnGvisor());
+  constexpr int val = 0;
+  ASSERT_THAT(
+      setsockopt(sock_.get(), SOL_SOCKET, SO_DETACH_FILTER, &val, sizeof(val)),
+      SyscallFailsWithErrno(ENOENT));
+}
+
+TEST_P(UdpSocketTest, GetSocketDetachFilter) {
+  int val = 0;
+  socklen_t val_len = sizeof(val);
+  ASSERT_THAT(
+      getsockopt(sock_.get(), SOL_SOCKET, SO_DETACH_FILTER, &val, &val_len),
+      SyscallFailsWithErrno(ENOPROTOOPT));
+}
+
 INSTANTIATE_TEST_SUITE_P(AllInetTests, UdpSocketTest,
                          ::testing::Values(AddressFamily::kIpv4,
                                            AddressFamily::kIpv6,
diff --git a/test/syscalls/linux/udp_socket_errqueue_test_case.cc b/test/syscalls/linux/udp_socket_errqueue_test_case.cc
deleted file mode 100644
index 54a0594f74..0000000000
--- a/test/syscalls/linux/udp_socket_errqueue_test_case.cc
+++ /dev/null
@@ -1,57 +0,0 @@
-// Copyright 2018 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef __fuchsia__
-
-#include <arpa/inet.h>
-#include <fcntl.h>
-#include <linux/errqueue.h>
-#include <netinet/in.h>
-#include <sys/ioctl.h>
-#include <sys/socket.h>
-#include <sys/types.h>
-
-#include "gtest/gtest.h"
-#include "absl/base/macros.h"
-#include "absl/time/clock.h"
-#include "absl/time/time.h"
-#include "test/syscalls/linux/socket_test_util.h"
-#include "test/syscalls/linux/udp_socket_test_cases.h"
-#include "test/syscalls/linux/unix_domain_socket_test_util.h"
-#include "test/util/test_util.h"
-#include "test/util/thread_util.h"
-
-namespace gvisor {
-namespace testing {
-
-TEST_P(UdpSocketTest, ErrorQueue) {
-  char cmsgbuf[CMSG_SPACE(sizeof(sock_extended_err))];
-  msghdr msg;
-  memset(&msg, 0, sizeof(msg));
-  iovec iov;
-  memset(&iov, 0, sizeof(iov));
-  msg.msg_iov = &iov;
-  msg.msg_iovlen = 1;
-  msg.msg_control = cmsgbuf;
-  msg.msg_controllen = sizeof(cmsgbuf);
-
-  // recv*(MSG_ERRQUEUE) never blocks, even without MSG_DONTWAIT.
-  EXPECT_THAT(RetryEINTR(recvmsg)(bind_.get(), &msg, MSG_ERRQUEUE),
-              SyscallFailsWithErrno(EAGAIN));
-}
-
-}  // namespace testing
-}  // namespace gvisor
-
-#endif  // __fuchsia__
diff --git a/test/syscalls/linux/udp_socket_test_cases.cc b/test/syscalls/linux/udp_socket_test_cases.cc
deleted file mode 100644
index 60c48ed6e8..0000000000
--- a/test/syscalls/linux/udp_socket_test_cases.cc
+++ /dev/null
@@ -1,1781 +0,0 @@
-// Copyright 2018 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "test/syscalls/linux/udp_socket_test_cases.h"
-
-#include <arpa/inet.h>
-#include <fcntl.h>
-#ifndef __fuchsia__
-#include <linux/filter.h>
-#endif  // __fuchsia__
-#include <netinet/in.h>
-#include <poll.h>
-#include <sys/ioctl.h>
-#include <sys/socket.h>
-#include <sys/types.h>
-
-#include "absl/strings/str_format.h"
-#ifndef SIOCGSTAMP
-#include <linux/sockios.h>
-#endif
-
-#include "gtest/gtest.h"
-#include "absl/base/macros.h"
-#include "absl/time/clock.h"
-#include "absl/time/time.h"
-#include "test/syscalls/linux/ip_socket_test_util.h"
-#include "test/syscalls/linux/socket_test_util.h"
-#include "test/syscalls/linux/unix_domain_socket_test_util.h"
-#include "test/util/file_descriptor.h"
-#include "test/util/posix_error.h"
-#include "test/util/test_util.h"
-#include "test/util/thread_util.h"
-
-namespace gvisor {
-namespace testing {
-
-// Gets a pointer to the port component of the given address.
-uint16_t* Port(struct sockaddr_storage* addr) {
-  switch (addr->ss_family) {
-    case AF_INET: {
-      auto sin = reinterpret_cast<struct sockaddr_in*>(addr);
-      return &sin->sin_port;
-    }
-    case AF_INET6: {
-      auto sin6 = reinterpret_cast<struct sockaddr_in6*>(addr);
-      return &sin6->sin6_port;
-    }
-  }
-
-  return nullptr;
-}
-
-// Sets addr port to "port".
-void SetPort(struct sockaddr_storage* addr, uint16_t port) {
-  switch (addr->ss_family) {
-    case AF_INET: {
-      auto sin = reinterpret_cast<struct sockaddr_in*>(addr);
-      sin->sin_port = port;
-      break;
-    }
-    case AF_INET6: {
-      auto sin6 = reinterpret_cast<struct sockaddr_in6*>(addr);
-      sin6->sin6_port = port;
-      break;
-    }
-  }
-}
-
-void UdpSocketTest::SetUp() {
-  addrlen_ = GetAddrLength();
-
-  bind_ =
-      ASSERT_NO_ERRNO_AND_VALUE(Socket(GetFamily(), SOCK_DGRAM, IPPROTO_UDP));
-  memset(&bind_addr_storage_, 0, sizeof(bind_addr_storage_));
-  bind_addr_ = reinterpret_cast<struct sockaddr*>(&bind_addr_storage_);
-
-  sock_ =
-      ASSERT_NO_ERRNO_AND_VALUE(Socket(GetFamily(), SOCK_DGRAM, IPPROTO_UDP));
-}
-
-int UdpSocketTest::GetFamily() {
-  if (GetParam() == AddressFamily::kIpv4) {
-    return AF_INET;
-  }
-  return AF_INET6;
-}
-
-PosixError UdpSocketTest::BindLoopback() {
-  bind_addr_storage_ = InetLoopbackAddr();
-  struct sockaddr* bind_addr_ =
-      reinterpret_cast<struct sockaddr*>(&bind_addr_storage_);
-  return BindSocket(bind_.get(), bind_addr_);
-}
-
-PosixError UdpSocketTest::BindAny() {
-  bind_addr_storage_ = InetAnyAddr();
-  struct sockaddr* bind_addr_ =
-      reinterpret_cast<struct sockaddr*>(&bind_addr_storage_);
-  return BindSocket(bind_.get(), bind_addr_);
-}
-
-PosixError UdpSocketTest::BindSocket(int socket, struct sockaddr* addr) {
-  socklen_t len = sizeof(bind_addr_storage_);
-
-  // Bind, then check that we get the right address.
-  RETURN_ERROR_IF_SYSCALL_FAIL(bind(socket, addr, addrlen_));
-
-  RETURN_ERROR_IF_SYSCALL_FAIL(getsockname(socket, addr, &len));
-
-  if (addrlen_ != len) {
-    return PosixError(
-        EINVAL,
-        absl::StrFormat("getsockname len: %u expected: %u", len, addrlen_));
-  }
-  return PosixError(0);
-}
-
-socklen_t UdpSocketTest::GetAddrLength() {
-  struct sockaddr_storage addr;
-  if (GetFamily() == AF_INET) {
-    auto sin = reinterpret_cast<struct sockaddr_in*>(&addr);
-    return sizeof(*sin);
-  }
-
-  auto sin6 = reinterpret_cast<struct sockaddr_in6*>(&addr);
-  return sizeof(*sin6);
-}
-
-sockaddr_storage UdpSocketTest::InetAnyAddr() {
-  struct sockaddr_storage addr;
-  memset(&addr, 0, sizeof(addr));
-  reinterpret_cast<struct sockaddr*>(&addr)->sa_family = GetFamily();
-
-  if (GetFamily() == AF_INET) {
-    auto sin = reinterpret_cast<struct sockaddr_in*>(&addr);
-    sin->sin_addr.s_addr = htonl(INADDR_ANY);
-    sin->sin_port = htons(0);
-    return addr;
-  }
-
-  auto sin6 = reinterpret_cast<struct sockaddr_in6*>(&addr);
-  sin6->sin6_addr = IN6ADDR_ANY_INIT;
-  sin6->sin6_port = htons(0);
-  return addr;
-}
-
-sockaddr_storage UdpSocketTest::InetLoopbackAddr() {
-  struct sockaddr_storage addr;
-  memset(&addr, 0, sizeof(addr));
-  reinterpret_cast<struct sockaddr*>(&addr)->sa_family = GetFamily();
-
-  if (GetFamily() == AF_INET) {
-    auto sin = reinterpret_cast<struct sockaddr_in*>(&addr);
-    sin->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
-    sin->sin_port = htons(0);
-    return addr;
-  }
-  auto sin6 = reinterpret_cast<struct sockaddr_in6*>(&addr);
-  sin6->sin6_addr = in6addr_loopback;
-  sin6->sin6_port = htons(0);
-  return addr;
-}
-
-void UdpSocketTest::Disconnect(int sockfd) {
-  sockaddr_storage addr_storage = InetAnyAddr();
-  sockaddr* addr = reinterpret_cast<struct sockaddr*>(&addr_storage);
-  socklen_t addrlen = sizeof(addr_storage);
-
-  addr->sa_family = AF_UNSPEC;
-  ASSERT_THAT(connect(sockfd, addr, addrlen), SyscallSucceeds());
-
-  // Check that after disconnect the socket is bound to the ANY address.
-  EXPECT_THAT(getsockname(sockfd, addr, &addrlen), SyscallSucceeds());
-  if (GetParam() == AddressFamily::kIpv4) {
-    auto addr_out = reinterpret_cast<struct sockaddr_in*>(addr);
-    EXPECT_EQ(addrlen, sizeof(*addr_out));
-    EXPECT_EQ(addr_out->sin_addr.s_addr, htonl(INADDR_ANY));
-  } else {
-    auto addr_out = reinterpret_cast<struct sockaddr_in6*>(addr);
-    EXPECT_EQ(addrlen, sizeof(*addr_out));
-    struct in6_addr loopback = IN6ADDR_ANY_INIT;
-
-    EXPECT_EQ(memcmp(&addr_out->sin6_addr, &loopback, sizeof(in6_addr)), 0);
-  }
-}
-
-TEST_P(UdpSocketTest, Creation) {
-  FileDescriptor sock =
-      ASSERT_NO_ERRNO_AND_VALUE(Socket(GetFamily(), SOCK_DGRAM, IPPROTO_UDP));
-  EXPECT_THAT(close(sock.release()), SyscallSucceeds());
-
-  sock = ASSERT_NO_ERRNO_AND_VALUE(Socket(GetFamily(), SOCK_DGRAM, 0));
-  EXPECT_THAT(close(sock.release()), SyscallSucceeds());
-
-  ASSERT_THAT(socket(GetFamily(), SOCK_STREAM, IPPROTO_UDP), SyscallFails());
-}
-
-TEST_P(UdpSocketTest, Getsockname) {
-  // Check that we're not bound.
-  struct sockaddr_storage addr;
-  socklen_t addrlen = sizeof(addr);
-  EXPECT_THAT(
-      getsockname(bind_.get(), reinterpret_cast<sockaddr*>(&addr), &addrlen),
-      SyscallSucceeds());
-  EXPECT_EQ(addrlen, addrlen_);
-  struct sockaddr_storage any = InetAnyAddr();
-  EXPECT_EQ(memcmp(&addr, reinterpret_cast<struct sockaddr*>(&any), addrlen_),
-            0);
-
-  ASSERT_NO_ERRNO(BindLoopback());
-
-  EXPECT_THAT(
-      getsockname(bind_.get(), reinterpret_cast<sockaddr*>(&addr), &addrlen),
-      SyscallSucceeds());
-
-  EXPECT_EQ(addrlen, addrlen_);
-  EXPECT_EQ(memcmp(&addr, bind_addr_, addrlen_), 0);
-}
-
-TEST_P(UdpSocketTest, Getpeername) {
-  ASSERT_NO_ERRNO(BindLoopback());
-
-  // Check that we're not connected.
-  struct sockaddr_storage addr;
-  socklen_t addrlen = sizeof(addr);
-  EXPECT_THAT(
-      getpeername(sock_.get(), reinterpret_cast<sockaddr*>(&addr), &addrlen),
-      SyscallFailsWithErrno(ENOTCONN));
-
-  // Connect, then check that we get the right address.
-  ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds());
-
-  addrlen = sizeof(addr);
-  EXPECT_THAT(
-      getpeername(sock_.get(), reinterpret_cast<sockaddr*>(&addr), &addrlen),
-      SyscallSucceeds());
-  EXPECT_EQ(addrlen, addrlen_);
-  EXPECT_EQ(memcmp(&addr, bind_addr_, addrlen_), 0);
-}
-
-TEST_P(UdpSocketTest, SendNotConnected) {
-  ASSERT_NO_ERRNO(BindLoopback());
-
-  // Do send & write, they must fail.
-  char buf[512];
-  EXPECT_THAT(send(sock_.get(), buf, sizeof(buf), 0),
-              SyscallFailsWithErrno(EDESTADDRREQ));
-
-  EXPECT_THAT(write(sock_.get(), buf, sizeof(buf)),
-              SyscallFailsWithErrno(EDESTADDRREQ));
-
-  // Use sendto.
-  ASSERT_THAT(sendto(sock_.get(), buf, sizeof(buf), 0, bind_addr_, addrlen_),
-              SyscallSucceedsWithValue(sizeof(buf)));
-
-  // Check that we're bound now.
-  struct sockaddr_storage addr;
-  socklen_t addrlen = sizeof(addr);
-  EXPECT_THAT(
-      getsockname(sock_.get(), reinterpret_cast<sockaddr*>(&addr), &addrlen),
-      SyscallSucceeds());
-  EXPECT_EQ(addrlen, addrlen_);
-  EXPECT_NE(*Port(&addr), 0);
-}
-
-TEST_P(UdpSocketTest, ConnectBinds) {
-  ASSERT_NO_ERRNO(BindLoopback());
-
-  // Connect the socket.
-  ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds());
-
-  // Check that we're bound now.
-  struct sockaddr_storage addr;
-  socklen_t addrlen = sizeof(addr);
-  EXPECT_THAT(
-      getsockname(sock_.get(), reinterpret_cast<sockaddr*>(&addr), &addrlen),
-      SyscallSucceeds());
-  EXPECT_EQ(addrlen, addrlen_);
-  EXPECT_NE(*Port(&addr), 0);
-}
-
-TEST_P(UdpSocketTest, ReceiveNotBound) {
-  char buf[512];
-  EXPECT_THAT(recv(sock_.get(), buf, sizeof(buf), MSG_DONTWAIT),
-              SyscallFailsWithErrno(EWOULDBLOCK));
-}
-
-TEST_P(UdpSocketTest, Bind) {
-  ASSERT_NO_ERRNO(BindLoopback());
-
-  // Try to bind again.
-  EXPECT_THAT(bind(bind_.get(), bind_addr_, addrlen_),
-              SyscallFailsWithErrno(EINVAL));
-
-  // Check that we're still bound to the original address.
-  struct sockaddr_storage addr;
-  socklen_t addrlen = sizeof(addr);
-  EXPECT_THAT(
-      getsockname(bind_.get(), reinterpret_cast<sockaddr*>(&addr), &addrlen),
-      SyscallSucceeds());
-  EXPECT_EQ(addrlen, addrlen_);
-  EXPECT_EQ(memcmp(&addr, bind_addr_, addrlen_), 0);
-}
-
-TEST_P(UdpSocketTest, BindInUse) {
-  ASSERT_NO_ERRNO(BindLoopback());
-
-  // Try to bind again.
-  EXPECT_THAT(bind(sock_.get(), bind_addr_, addrlen_),
-              SyscallFailsWithErrno(EADDRINUSE));
-}
-
-TEST_P(UdpSocketTest, ReceiveAfterConnect) {
-  ASSERT_NO_ERRNO(BindLoopback());
-  ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds());
-
-  // Send from sock_ to bind_
-  char buf[512];
-  RandomizeBuffer(buf, sizeof(buf));
-  ASSERT_THAT(sendto(sock_.get(), buf, sizeof(buf), 0, bind_addr_, addrlen_),
-              SyscallSucceedsWithValue(sizeof(buf)));
-
-  // Receive the data.
-  char received[sizeof(buf)];
-  EXPECT_THAT(recv(bind_.get(), received, sizeof(received), 0),
-              SyscallSucceedsWithValue(sizeof(received)));
-  EXPECT_EQ(memcmp(buf, received, sizeof(buf)), 0);
-}
-
-TEST_P(UdpSocketTest, ReceiveAfterDisconnect) {
-  ASSERT_NO_ERRNO(BindLoopback());
-
-  for (int i = 0; i < 2; i++) {
-    // Connet sock_ to bound address.
-    ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds());
-
-    struct sockaddr_storage addr;
-    socklen_t addrlen = sizeof(addr);
-    EXPECT_THAT(
-        getsockname(sock_.get(), reinterpret_cast<sockaddr*>(&addr), &addrlen),
-        SyscallSucceeds());
-    EXPECT_EQ(addrlen, addrlen_);
-
-    // Send from sock to bind_.
-    char buf[512];
-    RandomizeBuffer(buf, sizeof(buf));
-
-    ASSERT_THAT(sendto(bind_.get(), buf, sizeof(buf), 0,
-                       reinterpret_cast<sockaddr*>(&addr), addrlen),
-                SyscallSucceedsWithValue(sizeof(buf)));
-
-    // Receive the data.
-    char received[sizeof(buf)];
-    EXPECT_THAT(recv(sock_.get(), received, sizeof(received), 0),
-                SyscallSucceedsWithValue(sizeof(received)));
-    EXPECT_EQ(memcmp(buf, received, sizeof(buf)), 0);
-
-    // Disconnect sock_.
-    struct sockaddr unspec = {};
-    unspec.sa_family = AF_UNSPEC;
-    ASSERT_THAT(connect(sock_.get(), &unspec, sizeof(unspec.sa_family)),
-                SyscallSucceeds());
-  }
-}
-
-TEST_P(UdpSocketTest, Connect) {
-  ASSERT_NO_ERRNO(BindLoopback());
-  ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds());
-
-  // Check that we're connected to the right peer.
-  struct sockaddr_storage peer;
-  socklen_t peerlen = sizeof(peer);
-  EXPECT_THAT(
-      getpeername(sock_.get(), reinterpret_cast<sockaddr*>(&peer), &peerlen),
-      SyscallSucceeds());
-  EXPECT_EQ(peerlen, addrlen_);
-  EXPECT_EQ(memcmp(&peer, bind_addr_, addrlen_), 0);
-
-  // Try to bind after connect.
-  struct sockaddr_storage any = InetAnyAddr();
-  EXPECT_THAT(
-      bind(sock_.get(), reinterpret_cast<struct sockaddr*>(&any), addrlen_),
-      SyscallFailsWithErrno(EINVAL));
-
-  struct sockaddr_storage bind2_storage = InetLoopbackAddr();
-  struct sockaddr* bind2_addr =
-      reinterpret_cast<struct sockaddr*>(&bind2_storage);
-  FileDescriptor bind2 =
-      ASSERT_NO_ERRNO_AND_VALUE(Socket(GetFamily(), SOCK_DGRAM, IPPROTO_UDP));
-  ASSERT_NO_ERRNO(BindSocket(bind2.get(), bind2_addr));
-
-  // Try to connect again.
-  EXPECT_THAT(connect(sock_.get(), bind2_addr, addrlen_), SyscallSucceeds());
-
-  // Check that peer name changed.
-  peerlen = sizeof(peer);
-  EXPECT_THAT(
-      getpeername(sock_.get(), reinterpret_cast<sockaddr*>(&peer), &peerlen),
-      SyscallSucceeds());
-  EXPECT_EQ(peerlen, addrlen_);
-  EXPECT_EQ(memcmp(&peer, bind2_addr, addrlen_), 0);
-}
-
-TEST_P(UdpSocketTest, ConnectAnyZero) {
-  // TODO(138658473): Enable when we can connect to port 0 with gVisor.
-  SKIP_IF(IsRunningOnGvisor());
-
-  struct sockaddr_storage any = InetAnyAddr();
-  EXPECT_THAT(
-      connect(sock_.get(), reinterpret_cast<struct sockaddr*>(&any), addrlen_),
-      SyscallSucceeds());
-
-  struct sockaddr_storage addr;
-  socklen_t addrlen = sizeof(addr);
-  EXPECT_THAT(
-      getpeername(sock_.get(), reinterpret_cast<sockaddr*>(&addr), &addrlen),
-      SyscallFailsWithErrno(ENOTCONN));
-}
-
-TEST_P(UdpSocketTest, ConnectAnyWithPort) {
-  ASSERT_NO_ERRNO(BindAny());
-  ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds());
-
-  struct sockaddr_storage addr;
-  socklen_t addrlen = sizeof(addr);
-  EXPECT_THAT(
-      getpeername(sock_.get(), reinterpret_cast<sockaddr*>(&addr), &addrlen),
-      SyscallSucceeds());
-}
-
-TEST_P(UdpSocketTest, DisconnectAfterConnectAny) {
-  // TODO(138658473): Enable when we can connect to port 0 with gVisor.
-  SKIP_IF(IsRunningOnGvisor());
-  struct sockaddr_storage any = InetAnyAddr();
-  EXPECT_THAT(
-      connect(sock_.get(), reinterpret_cast<struct sockaddr*>(&any), addrlen_),
-      SyscallSucceeds());
-
-  struct sockaddr_storage addr;
-  socklen_t addrlen = sizeof(addr);
-  EXPECT_THAT(
-      getpeername(sock_.get(), reinterpret_cast<sockaddr*>(&addr), &addrlen),
-      SyscallFailsWithErrno(ENOTCONN));
-
-  Disconnect(sock_.get());
-}
-
-TEST_P(UdpSocketTest, DisconnectAfterConnectAnyWithPort) {
-  ASSERT_NO_ERRNO(BindAny());
-  EXPECT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds());
-
-  struct sockaddr_storage addr;
-  socklen_t addrlen = sizeof(addr);
-  EXPECT_THAT(
-      getpeername(sock_.get(), reinterpret_cast<sockaddr*>(&addr), &addrlen),
-      SyscallSucceeds());
-
-  EXPECT_EQ(addrlen, addrlen_);
-  EXPECT_EQ(*Port(&bind_addr_storage_), *Port(&addr));
-
-  Disconnect(sock_.get());
-}
-
-TEST_P(UdpSocketTest, DisconnectAfterBind) {
-  ASSERT_NO_ERRNO(BindLoopback());
-
-  // Bind to the next port above bind_.
-  struct sockaddr_storage addr_storage = InetLoopbackAddr();
-  struct sockaddr* addr = reinterpret_cast<struct sockaddr*>(&addr_storage);
-  SetPort(&addr_storage, *Port(&bind_addr_storage_) + 1);
-  ASSERT_NO_ERRNO(BindSocket(sock_.get(), addr));
-
-  // Connect the socket.
-  ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds());
-
-  struct sockaddr_storage unspec = {};
-  unspec.ss_family = AF_UNSPEC;
-  EXPECT_THAT(connect(sock_.get(), reinterpret_cast<sockaddr*>(&unspec),
-                      sizeof(unspec.ss_family)),
-              SyscallSucceeds());
-
-  // Check that we're still bound.
-  socklen_t addrlen = sizeof(unspec);
-  EXPECT_THAT(
-      getsockname(sock_.get(), reinterpret_cast<sockaddr*>(&unspec), &addrlen),
-      SyscallSucceeds());
-
-  EXPECT_EQ(addrlen, addrlen_);
-  EXPECT_EQ(memcmp(addr, &unspec, addrlen_), 0);
-
-  addrlen = sizeof(addr);
-  EXPECT_THAT(getpeername(sock_.get(), addr, &addrlen),
-              SyscallFailsWithErrno(ENOTCONN));
-}
-
-TEST_P(UdpSocketTest, BindToAnyConnnectToLocalhost) {
-  ASSERT_NO_ERRNO(BindAny());
-
-  struct sockaddr_storage addr_storage = InetLoopbackAddr();
-  struct sockaddr* addr = reinterpret_cast<struct sockaddr*>(&addr_storage);
-  SetPort(&addr_storage, *Port(&bind_addr_storage_) + 1);
-  socklen_t addrlen = sizeof(addr);
-
-  // Connect the socket.
-  ASSERT_THAT(connect(bind_.get(), addr, addrlen_), SyscallSucceeds());
-
-  EXPECT_THAT(getsockname(bind_.get(), addr, &addrlen), SyscallSucceeds());
-
-  // If the socket is bound to ANY and connected to a loopback address,
-  // getsockname() has to return the loopback address.
-  if (GetParam() == AddressFamily::kIpv4) {
-    auto addr_out = reinterpret_cast<struct sockaddr_in*>(addr);
-    EXPECT_EQ(addrlen, sizeof(*addr_out));
-    EXPECT_EQ(addr_out->sin_addr.s_addr, htonl(INADDR_LOOPBACK));
-  } else {
-    auto addr_out = reinterpret_cast<struct sockaddr_in6*>(addr);
-    struct in6_addr loopback = IN6ADDR_LOOPBACK_INIT;
-    EXPECT_EQ(addrlen, sizeof(*addr_out));
-    EXPECT_EQ(memcmp(&addr_out->sin6_addr, &loopback, sizeof(in6_addr)), 0);
-  }
-}
-
-TEST_P(UdpSocketTest, DisconnectAfterBindToAny) {
-  ASSERT_NO_ERRNO(BindLoopback());
-
-  struct sockaddr_storage any_storage = InetAnyAddr();
-  struct sockaddr* any = reinterpret_cast<struct sockaddr*>(&any_storage);
-  SetPort(&any_storage, *Port(&bind_addr_storage_) + 1);
-
-  ASSERT_NO_ERRNO(BindSocket(sock_.get(), any));
-
-  // Connect the socket.
-  ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds());
-
-  Disconnect(sock_.get());
-
-  // Check that we're still bound.
-  struct sockaddr_storage addr;
-  socklen_t addrlen = sizeof(addr);
-  EXPECT_THAT(
-      getsockname(sock_.get(), reinterpret_cast<sockaddr*>(&addr), &addrlen),
-      SyscallSucceeds());
-
-  EXPECT_EQ(addrlen, addrlen_);
-  EXPECT_EQ(memcmp(&addr, any, addrlen), 0);
-
-  addrlen = sizeof(addr);
-  EXPECT_THAT(
-      getpeername(sock_.get(), reinterpret_cast<sockaddr*>(&addr), &addrlen),
-      SyscallFailsWithErrno(ENOTCONN));
-}
-
-TEST_P(UdpSocketTest, Disconnect) {
-  ASSERT_NO_ERRNO(BindLoopback());
-
-  struct sockaddr_storage any_storage = InetAnyAddr();
-  struct sockaddr* any = reinterpret_cast<struct sockaddr*>(&any_storage);
-  SetPort(&any_storage, *Port(&bind_addr_storage_) + 1);
-  ASSERT_NO_ERRNO(BindSocket(sock_.get(), any));
-
-  for (int i = 0; i < 2; i++) {
-    // Try to connect again.
-    EXPECT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds());
-
-    // Check that we're connected to the right peer.
-    struct sockaddr_storage peer;
-    socklen_t peerlen = sizeof(peer);
-    EXPECT_THAT(
-        getpeername(sock_.get(), reinterpret_cast<sockaddr*>(&peer), &peerlen),
-        SyscallSucceeds());
-    EXPECT_EQ(peerlen, addrlen_);
-    EXPECT_EQ(memcmp(&peer, bind_addr_, addrlen_), 0);
-
-    // Try to disconnect.
-    struct sockaddr_storage addr = {};
-    addr.ss_family = AF_UNSPEC;
-    EXPECT_THAT(connect(sock_.get(), reinterpret_cast<sockaddr*>(&addr),
-                        sizeof(addr.ss_family)),
-                SyscallSucceeds());
-
-    peerlen = sizeof(peer);
-    EXPECT_THAT(
-        getpeername(sock_.get(), reinterpret_cast<sockaddr*>(&peer), &peerlen),
-        SyscallFailsWithErrno(ENOTCONN));
-
-    // Check that we're still bound.
-    socklen_t addrlen = sizeof(addr);
-    EXPECT_THAT(
-        getsockname(sock_.get(), reinterpret_cast<sockaddr*>(&addr), &addrlen),
-        SyscallSucceeds());
-    EXPECT_EQ(addrlen, addrlen_);
-    EXPECT_EQ(*Port(&addr), *Port(&any_storage));
-  }
-}
-
-TEST_P(UdpSocketTest, ConnectBadAddress) {
-  struct sockaddr addr = {};
-  addr.sa_family = GetFamily();
-  ASSERT_THAT(connect(sock_.get(), &addr, sizeof(addr.sa_family)),
-              SyscallFailsWithErrno(EINVAL));
-}
-
-TEST_P(UdpSocketTest, SendToAddressOtherThanConnected) {
-  ASSERT_NO_ERRNO(BindLoopback());
-
-  struct sockaddr_storage addr_storage = InetAnyAddr();
-  struct sockaddr* addr = reinterpret_cast<struct sockaddr*>(&addr_storage);
-  SetPort(&addr_storage, *Port(&bind_addr_storage_) + 1);
-
-  ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds());
-
-  // Send to a different destination than we're connected to.
-  char buf[512];
-  EXPECT_THAT(sendto(sock_.get(), buf, sizeof(buf), 0, addr, addrlen_),
-              SyscallSucceedsWithValue(sizeof(buf)));
-}
-
-TEST_P(UdpSocketTest, ZerolengthWriteAllowed) {
-  // TODO(gvisor.dev/issue/1202): Hostinet does not support zero length writes.
-  SKIP_IF(IsRunningWithHostinet());
-
-  ASSERT_NO_ERRNO(BindLoopback());
-  // Connect to loopback:bind_addr_+1.
-  struct sockaddr_storage addr_storage = InetLoopbackAddr();
-  struct sockaddr* addr = reinterpret_cast<struct sockaddr*>(&addr_storage);
-  SetPort(&addr_storage, *Port(&bind_addr_storage_) + 1);
-  ASSERT_THAT(connect(bind_.get(), addr, addrlen_), SyscallSucceeds());
-
-  // Bind sock to loopback:bind_addr_+1.
-  ASSERT_THAT(bind(sock_.get(), addr, addrlen_), SyscallSucceeds());
-
-  char buf[3];
-  // Send zero length packet from bind_ to sock_.
-  ASSERT_THAT(write(bind_.get(), buf, 0), SyscallSucceedsWithValue(0));
-
-  struct pollfd pfd = {sock_.get(), POLLIN, 0};
-  ASSERT_THAT(RetryEINTR(poll)(&pfd, 1, /*timeout*/ 1000),
-              SyscallSucceedsWithValue(1));
-
-  // Receive the packet.
-  char received[3];
-  EXPECT_THAT(read(sock_.get(), received, sizeof(received)),
-              SyscallSucceedsWithValue(0));
-}
-
-TEST_P(UdpSocketTest, ZerolengthWriteAllowedNonBlockRead) {
-  // TODO(gvisor.dev/issue/1202): Hostinet does not support zero length writes.
-  SKIP_IF(IsRunningWithHostinet());
-
-  ASSERT_NO_ERRNO(BindLoopback());
-
-  // Connect to loopback:bind_addr_port+1.
-  struct sockaddr_storage addr_storage = InetLoopbackAddr();
-  struct sockaddr* addr = reinterpret_cast<struct sockaddr*>(&addr_storage);
-  SetPort(&addr_storage, *Port(&bind_addr_storage_) + 1);
-  ASSERT_THAT(connect(bind_.get(), addr, addrlen_), SyscallSucceeds());
-
-  // Bind sock to loopback:bind_addr_port+1.
-  ASSERT_THAT(bind(sock_.get(), addr, addrlen_), SyscallSucceeds());
-
-  // Set sock to non-blocking.
-  int opts = 0;
-  ASSERT_THAT(opts = fcntl(sock_.get(), F_GETFL), SyscallSucceeds());
-  ASSERT_THAT(fcntl(sock_.get(), F_SETFL, opts | O_NONBLOCK),
-              SyscallSucceeds());
-
-  char buf[3];
-  // Send zero length packet from bind_ to sock_.
-  ASSERT_THAT(write(bind_.get(), buf, 0), SyscallSucceedsWithValue(0));
-
-  struct pollfd pfd = {sock_.get(), POLLIN, 0};
-  ASSERT_THAT(RetryEINTR(poll)(&pfd, 1, /*timeout=*/1000),
-              SyscallSucceedsWithValue(1));
-
-  // Receive the packet.
-  char received[3];
-  EXPECT_THAT(read(sock_.get(), received, sizeof(received)),
-              SyscallSucceedsWithValue(0));
-  EXPECT_THAT(read(sock_.get(), received, sizeof(received)),
-              SyscallFailsWithErrno(EAGAIN));
-}
-
-TEST_P(UdpSocketTest, SendAndReceiveNotConnected) {
-  ASSERT_NO_ERRNO(BindLoopback());
-
-  // Send some data to bind_.
-  char buf[512];
-  RandomizeBuffer(buf, sizeof(buf));
-
-  ASSERT_THAT(sendto(sock_.get(), buf, sizeof(buf), 0, bind_addr_, addrlen_),
-              SyscallSucceedsWithValue(sizeof(buf)));
-
-  // Receive the data.
-  char received[sizeof(buf)];
-  EXPECT_THAT(recv(bind_.get(), received, sizeof(received), 0),
-              SyscallSucceedsWithValue(sizeof(received)));
-  EXPECT_EQ(memcmp(buf, received, sizeof(buf)), 0);
-}
-
-TEST_P(UdpSocketTest, SendAndReceiveConnected) {
-  ASSERT_NO_ERRNO(BindLoopback());
-
-  // Connect to loopback:bind_addr_port+1.
-  struct sockaddr_storage addr_storage = InetLoopbackAddr();
-  struct sockaddr* addr = reinterpret_cast<struct sockaddr*>(&addr_storage);
-  SetPort(&addr_storage, *Port(&bind_addr_storage_) + 1);
-  ASSERT_THAT(connect(bind_.get(), addr, addrlen_), SyscallSucceeds());
-
-  // Bind sock to loopback:TestPort+1.
-  ASSERT_THAT(bind(sock_.get(), addr, addrlen_), SyscallSucceeds());
-
-  // Send some data from sock to bind_.
-  char buf[512];
-  RandomizeBuffer(buf, sizeof(buf));
-
-  ASSERT_THAT(sendto(sock_.get(), buf, sizeof(buf), 0, bind_addr_, addrlen_),
-              SyscallSucceedsWithValue(sizeof(buf)));
-
-  // Receive the data.
-  char received[sizeof(buf)];
-  EXPECT_THAT(recv(bind_.get(), received, sizeof(received), 0),
-              SyscallSucceedsWithValue(sizeof(received)));
-  EXPECT_EQ(memcmp(buf, received, sizeof(buf)), 0);
-}
-
-TEST_P(UdpSocketTest, ReceiveFromNotConnected) {
-  ASSERT_NO_ERRNO(BindLoopback());
-
-  // Connect to loopback:bind_addr_port+1.
-  struct sockaddr_storage addr_storage = InetLoopbackAddr();
-  struct sockaddr* addr = reinterpret_cast<struct sockaddr*>(&addr_storage);
-  SetPort(&addr_storage, *Port(&bind_addr_storage_) + 1);
-  ASSERT_THAT(connect(bind_.get(), addr, addrlen_), SyscallSucceeds());
-
-  // Bind sock to loopback:bind_addr_port+2.
-  struct sockaddr_storage addr2_storage = InetLoopbackAddr();
-  struct sockaddr* addr2 = reinterpret_cast<struct sockaddr*>(&addr2_storage);
-  SetPort(&addr2_storage, *Port(&bind_addr_storage_) + 2);
-  ASSERT_THAT(bind(sock_.get(), addr2, addrlen_), SyscallSucceeds());
-
-  // Send some data from sock to bind_.
-  char buf[512];
-  ASSERT_THAT(sendto(sock_.get(), buf, sizeof(buf), 0, bind_addr_, addrlen_),
-              SyscallSucceedsWithValue(sizeof(buf)));
-
-  // Check that the data isn't received because it was sent from a different
-  // address than we're connected.
-  EXPECT_THAT(recv(sock_.get(), buf, sizeof(buf), MSG_DONTWAIT),
-              SyscallFailsWithErrno(EWOULDBLOCK));
-}
-
-TEST_P(UdpSocketTest, ReceiveBeforeConnect) {
-  ASSERT_NO_ERRNO(BindLoopback());
-
-  // Bind sock to loopback:bind_addr_port+2.
-  struct sockaddr_storage addr2_storage = InetLoopbackAddr();
-  struct sockaddr* addr2 = reinterpret_cast<struct sockaddr*>(&addr2_storage);
-  SetPort(&addr2_storage, *Port(&bind_addr_storage_) + 2);
-  ASSERT_THAT(bind(sock_.get(), addr2, addrlen_), SyscallSucceeds());
-
-  // Send some data from sock to bind_.
-  char buf[512];
-  RandomizeBuffer(buf, sizeof(buf));
-
-  ASSERT_THAT(sendto(sock_.get(), buf, sizeof(buf), 0, bind_addr_, addrlen_),
-              SyscallSucceedsWithValue(sizeof(buf)));
-
-  // Connect to loopback:TestPort+1.
-  struct sockaddr_storage addr_storage = InetLoopbackAddr();
-  struct sockaddr* addr = reinterpret_cast<struct sockaddr*>(&addr_storage);
-  SetPort(&addr_storage, *Port(&bind_addr_storage_) + 1);
-  ASSERT_THAT(connect(bind_.get(), addr, addrlen_), SyscallSucceeds());
-
-  // Receive the data. It works because it was sent before the connect.
-  char received[sizeof(buf)];
-  EXPECT_THAT(recv(bind_.get(), received, sizeof(received), 0),
-              SyscallSucceedsWithValue(sizeof(received)));
-  EXPECT_EQ(memcmp(buf, received, sizeof(buf)), 0);
-
-  // Send again. This time it should not be received.
-  ASSERT_THAT(sendto(sock_.get(), buf, sizeof(buf), 0, bind_addr_, addrlen_),
-              SyscallSucceedsWithValue(sizeof(buf)));
-
-  EXPECT_THAT(recv(bind_.get(), buf, sizeof(buf), MSG_DONTWAIT),
-              SyscallFailsWithErrno(EWOULDBLOCK));
-}
-
-TEST_P(UdpSocketTest, ReceiveFrom) {
-  ASSERT_NO_ERRNO(BindLoopback());
-
-  // Connect to loopback:bind_addr_port+1.
-  struct sockaddr_storage addr_storage = InetLoopbackAddr();
-  struct sockaddr* addr = reinterpret_cast<struct sockaddr*>(&addr_storage);
-  SetPort(&addr_storage, *Port(&bind_addr_storage_) + 1);
-  ASSERT_THAT(connect(bind_.get(), addr, addrlen_), SyscallSucceeds());
-
-  // Bind sock to loopback:TestPort+1.
-  ASSERT_THAT(bind(sock_.get(), addr, addrlen_), SyscallSucceeds());
-
-  // Send some data from sock to bind_.
-  char buf[512];
-  RandomizeBuffer(buf, sizeof(buf));
-
-  ASSERT_THAT(sendto(sock_.get(), buf, sizeof(buf), 0, bind_addr_, addrlen_),
-              SyscallSucceedsWithValue(sizeof(buf)));
-
-  // Receive the data and sender address.
-  char received[sizeof(buf)];
-  struct sockaddr_storage addr2;
-  socklen_t addr2len = sizeof(addr2);
-  EXPECT_THAT(recvfrom(bind_.get(), received, sizeof(received), 0,
-                       reinterpret_cast<sockaddr*>(&addr2), &addr2len),
-              SyscallSucceedsWithValue(sizeof(received)));
-  EXPECT_EQ(memcmp(buf, received, sizeof(buf)), 0);
-  EXPECT_EQ(addr2len, addrlen_);
-  EXPECT_EQ(memcmp(addr, &addr2, addrlen_), 0);
-}
-
-TEST_P(UdpSocketTest, Listen) {
-  ASSERT_THAT(listen(sock_.get(), SOMAXCONN),
-              SyscallFailsWithErrno(EOPNOTSUPP));
-}
-
-TEST_P(UdpSocketTest, Accept) {
-  ASSERT_THAT(accept(sock_.get(), nullptr, nullptr),
-              SyscallFailsWithErrno(EOPNOTSUPP));
-}
-
-// This test validates that a read shutdown with pending data allows the read
-// to proceed with the data before returning EAGAIN.
-TEST_P(UdpSocketTest, ReadShutdownNonblockPendingData) {
-  ASSERT_NO_ERRNO(BindLoopback());
-
-  // Connect to loopback:bind_addr_port+1.
-  struct sockaddr_storage addr_storage = InetLoopbackAddr();
-  struct sockaddr* addr = reinterpret_cast<struct sockaddr*>(&addr_storage);
-  SetPort(&addr_storage, *Port(&bind_addr_storage_) + 1);
-  ASSERT_THAT(connect(bind_.get(), addr, addrlen_), SyscallSucceeds());
-
-  // Bind to loopback:bind_addr_port+1 and connect to bind_addr_.
-  ASSERT_THAT(bind(sock_.get(), addr, addrlen_), SyscallSucceeds());
-  ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds());
-
-  // Verify that we get EWOULDBLOCK when there is nothing to read.
-  char received[512];
-  EXPECT_THAT(recv(bind_.get(), received, sizeof(received), MSG_DONTWAIT),
-              SyscallFailsWithErrno(EWOULDBLOCK));
-
-  const char* buf = "abc";
-  EXPECT_THAT(write(sock_.get(), buf, 3), SyscallSucceedsWithValue(3));
-
-  int opts = 0;
-  ASSERT_THAT(opts = fcntl(bind_.get(), F_GETFL), SyscallSucceeds());
-  ASSERT_THAT(fcntl(bind_.get(), F_SETFL, opts | O_NONBLOCK),
-              SyscallSucceeds());
-  ASSERT_THAT(opts = fcntl(bind_.get(), F_GETFL), SyscallSucceeds());
-  ASSERT_NE(opts & O_NONBLOCK, 0);
-
-  EXPECT_THAT(shutdown(bind_.get(), SHUT_RD), SyscallSucceeds());
-
-  struct pollfd pfd = {bind_.get(), POLLIN, 0};
-  ASSERT_THAT(RetryEINTR(poll)(&pfd, 1, /*timeout=*/1000),
-              SyscallSucceedsWithValue(1));
-
-  // We should get the data even though read has been shutdown.
-  EXPECT_THAT(recv(bind_.get(), received, 2, 0), SyscallSucceedsWithValue(2));
-
-  // Because we read less than the entire packet length, since it's a packet
-  // based socket any subsequent reads should return EWOULDBLOCK.
-  EXPECT_THAT(recv(bind_.get(), received, 1, 0),
-              SyscallFailsWithErrno(EWOULDBLOCK));
-}
-
-// This test is validating that even after a socket is shutdown if it's
-// reconnected it will reset the shutdown state.
-TEST_P(UdpSocketTest, ReadShutdownSameSocketResetsShutdownState) {
-  char received[512];
-  EXPECT_THAT(recv(bind_.get(), received, sizeof(received), MSG_DONTWAIT),
-              SyscallFailsWithErrno(EWOULDBLOCK));
-
-  EXPECT_THAT(shutdown(bind_.get(), SHUT_RD), SyscallFailsWithErrno(ENOTCONN));
-
-  EXPECT_THAT(recv(bind_.get(), received, sizeof(received), MSG_DONTWAIT),
-              SyscallFailsWithErrno(EWOULDBLOCK));
-
-  // Connect the socket, then try to shutdown again.
-  ASSERT_NO_ERRNO(BindLoopback());
-
-  // Connect to loopback:bind_addr_port+1.
-  struct sockaddr_storage addr_storage = InetLoopbackAddr();
-  struct sockaddr* addr = reinterpret_cast<struct sockaddr*>(&addr_storage);
-  SetPort(&addr_storage, *Port(&bind_addr_storage_) + 1);
-  ASSERT_THAT(connect(bind_.get(), addr, addrlen_), SyscallSucceeds());
-
-  EXPECT_THAT(recv(bind_.get(), received, sizeof(received), MSG_DONTWAIT),
-              SyscallFailsWithErrno(EWOULDBLOCK));
-}
-
-TEST_P(UdpSocketTest, ReadShutdown) {
-  // TODO(gvisor.dev/issue/1202): Calling recv() after shutdown without
-  // MSG_DONTWAIT blocks indefinitely.
-  SKIP_IF(IsRunningWithHostinet());
-
-  ASSERT_NO_ERRNO(BindLoopback());
-
-  char received[512];
-  EXPECT_THAT(recv(sock_.get(), received, sizeof(received), MSG_DONTWAIT),
-              SyscallFailsWithErrno(EWOULDBLOCK));
-
-  EXPECT_THAT(shutdown(sock_.get(), SHUT_RD), SyscallFailsWithErrno(ENOTCONN));
-
-  EXPECT_THAT(recv(sock_.get(), received, sizeof(received), MSG_DONTWAIT),
-              SyscallFailsWithErrno(EWOULDBLOCK));
-
-  // Connect the socket, then try to shutdown again.
-  ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds());
-
-  EXPECT_THAT(recv(sock_.get(), received, sizeof(received), MSG_DONTWAIT),
-              SyscallFailsWithErrno(EWOULDBLOCK));
-
-  EXPECT_THAT(shutdown(sock_.get(), SHUT_RD), SyscallSucceeds());
-
-  EXPECT_THAT(recv(sock_.get(), received, sizeof(received), 0),
-              SyscallSucceedsWithValue(0));
-}
-
-TEST_P(UdpSocketTest, ReadShutdownDifferentThread) {
-  // TODO(gvisor.dev/issue/1202): Calling recv() after shutdown without
-  // MSG_DONTWAIT blocks indefinitely.
-  SKIP_IF(IsRunningWithHostinet());
-  ASSERT_NO_ERRNO(BindLoopback());
-
-  char received[512];
-  EXPECT_THAT(recv(sock_.get(), received, sizeof(received), MSG_DONTWAIT),
-              SyscallFailsWithErrno(EWOULDBLOCK));
-
-  // Connect the socket, then shutdown from another thread.
-  ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds());
-
-  EXPECT_THAT(recv(sock_.get(), received, sizeof(received), MSG_DONTWAIT),
-              SyscallFailsWithErrno(EWOULDBLOCK));
-
-  ScopedThread t([&] {
-    absl::SleepFor(absl::Milliseconds(200));
-    EXPECT_THAT(shutdown(sock_.get(), SHUT_RD), SyscallSucceeds());
-  });
-  EXPECT_THAT(RetryEINTR(recv)(sock_.get(), received, sizeof(received), 0),
-              SyscallSucceedsWithValue(0));
-  t.Join();
-
-  EXPECT_THAT(RetryEINTR(recv)(sock_.get(), received, sizeof(received), 0),
-              SyscallSucceedsWithValue(0));
-}
-
-TEST_P(UdpSocketTest, WriteShutdown) {
-  ASSERT_NO_ERRNO(BindLoopback());
-  EXPECT_THAT(shutdown(sock_.get(), SHUT_WR), SyscallFailsWithErrno(ENOTCONN));
-  ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds());
-  EXPECT_THAT(shutdown(sock_.get(), SHUT_WR), SyscallSucceeds());
-}
-
-TEST_P(UdpSocketTest, SynchronousReceive) {
-  ASSERT_NO_ERRNO(BindLoopback());
-
-  // Send some data to bind_ from another thread.
-  char buf[512];
-  RandomizeBuffer(buf, sizeof(buf));
-
-  // Receive the data prior to actually starting the other thread.
-  char received[512];
-  EXPECT_THAT(
-      RetryEINTR(recv)(bind_.get(), received, sizeof(received), MSG_DONTWAIT),
-      SyscallFailsWithErrno(EWOULDBLOCK));
-
-  // Start the thread.
-  ScopedThread t([&] {
-    absl::SleepFor(absl::Milliseconds(200));
-    ASSERT_THAT(sendto(sock_.get(), buf, sizeof(buf), 0, this->bind_addr_,
-                       this->addrlen_),
-                SyscallSucceedsWithValue(sizeof(buf)));
-  });
-
-  EXPECT_THAT(RetryEINTR(recv)(bind_.get(), received, sizeof(received), 0),
-              SyscallSucceedsWithValue(512));
-  EXPECT_EQ(memcmp(buf, received, sizeof(buf)), 0);
-}
-
-TEST_P(UdpSocketTest, BoundaryPreserved_SendRecv) {
-  ASSERT_NO_ERRNO(BindLoopback());
-
-  // Send 3 packets from sock to bind_.
-  constexpr int psize = 100;
-  char buf[3 * psize];
-  RandomizeBuffer(buf, sizeof(buf));
-
-  for (int i = 0; i < 3; ++i) {
-    ASSERT_THAT(
-        sendto(sock_.get(), buf + i * psize, psize, 0, bind_addr_, addrlen_),
-        SyscallSucceedsWithValue(psize));
-  }
-
-  // Receive the data as 3 separate packets.
-  char received[6 * psize];
-  for (int i = 0; i < 3; ++i) {
-    EXPECT_THAT(recv(bind_.get(), received + i * psize, 3 * psize, 0),
-                SyscallSucceedsWithValue(psize));
-  }
-  EXPECT_EQ(memcmp(buf, received, 3 * psize), 0);
-}
-
-TEST_P(UdpSocketTest, BoundaryPreserved_WritevReadv) {
-  ASSERT_NO_ERRNO(BindLoopback());
-
-  // Direct writes from sock to bind_.
-  ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds());
-
-  // Send 2 packets from sock to bind_, where each packet's data consists of
-  // 2 discontiguous iovecs.
-  constexpr size_t kPieceSize = 100;
-  char buf[4 * kPieceSize];
-  RandomizeBuffer(buf, sizeof(buf));
-
-  for (int i = 0; i < 2; i++) {
-    struct iovec iov[2];
-    for (int j = 0; j < 2; j++) {
-      iov[j].iov_base = reinterpret_cast<void*>(
-          reinterpret_cast<uintptr_t>(buf) + (i + 2 * j) * kPieceSize);
-      iov[j].iov_len = kPieceSize;
-    }
-    ASSERT_THAT(writev(sock_.get(), iov, 2),
-                SyscallSucceedsWithValue(2 * kPieceSize));
-  }
-
-  // Receive the data as 2 separate packets.
-  char received[6 * kPieceSize];
-  for (int i = 0; i < 2; i++) {
-    struct iovec iov[3];
-    for (int j = 0; j < 3; j++) {
-      iov[j].iov_base = reinterpret_cast<void*>(
-          reinterpret_cast<uintptr_t>(received) + (i + 2 * j) * kPieceSize);
-      iov[j].iov_len = kPieceSize;
-    }
-    ASSERT_THAT(readv(bind_.get(), iov, 3),
-                SyscallSucceedsWithValue(2 * kPieceSize));
-  }
-  EXPECT_EQ(memcmp(buf, received, 4 * kPieceSize), 0);
-}
-
-TEST_P(UdpSocketTest, BoundaryPreserved_SendMsgRecvMsg) {
-  ASSERT_NO_ERRNO(BindLoopback());
-
-  // Send 2 packets from sock to bind_, where each packet's data consists of
-  // 2 discontiguous iovecs.
-  constexpr size_t kPieceSize = 100;
-  char buf[4 * kPieceSize];
-  RandomizeBuffer(buf, sizeof(buf));
-
-  for (int i = 0; i < 2; i++) {
-    struct iovec iov[2];
-    for (int j = 0; j < 2; j++) {
-      iov[j].iov_base = reinterpret_cast<void*>(
-          reinterpret_cast<uintptr_t>(buf) + (i + 2 * j) * kPieceSize);
-      iov[j].iov_len = kPieceSize;
-    }
-    struct msghdr msg = {};
-    msg.msg_name = bind_addr_;
-    msg.msg_namelen = addrlen_;
-    msg.msg_iov = iov;
-    msg.msg_iovlen = 2;
-    ASSERT_THAT(sendmsg(sock_.get(), &msg, 0),
-                SyscallSucceedsWithValue(2 * kPieceSize));
-  }
-
-  // Receive the data as 2 separate packets.
-  char received[6 * kPieceSize];
-  for (int i = 0; i < 2; i++) {
-    struct iovec iov[3];
-    for (int j = 0; j < 3; j++) {
-      iov[j].iov_base = reinterpret_cast<void*>(
-          reinterpret_cast<uintptr_t>(received) + (i + 2 * j) * kPieceSize);
-      iov[j].iov_len = kPieceSize;
-    }
-    struct msghdr msg = {};
-    msg.msg_iov = iov;
-    msg.msg_iovlen = 3;
-    ASSERT_THAT(recvmsg(bind_.get(), &msg, 0),
-                SyscallSucceedsWithValue(2 * kPieceSize));
-  }
-  EXPECT_EQ(memcmp(buf, received, 4 * kPieceSize), 0);
-}
-
-TEST_P(UdpSocketTest, FIONREADShutdown) {
-  ASSERT_NO_ERRNO(BindLoopback());
-
-  int n = -1;
-  EXPECT_THAT(ioctl(sock_.get(), FIONREAD, &n), SyscallSucceedsWithValue(0));
-  EXPECT_EQ(n, 0);
-
-  // A UDP socket must be connected before it can be shutdown.
-  ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds());
-
-  n = -1;
-  EXPECT_THAT(ioctl(sock_.get(), FIONREAD, &n), SyscallSucceedsWithValue(0));
-  EXPECT_EQ(n, 0);
-
-  EXPECT_THAT(shutdown(sock_.get(), SHUT_RD), SyscallSucceeds());
-
-  n = -1;
-  EXPECT_THAT(ioctl(sock_.get(), FIONREAD, &n), SyscallSucceedsWithValue(0));
-  EXPECT_EQ(n, 0);
-}
-
-TEST_P(UdpSocketTest, FIONREADWriteShutdown) {
-  int n = -1;
-  EXPECT_THAT(ioctl(bind_.get(), FIONREAD, &n), SyscallSucceedsWithValue(0));
-  EXPECT_EQ(n, 0);
-
-  ASSERT_NO_ERRNO(BindLoopback());
-
-  // A UDP socket must be connected before it can be shutdown.
-  ASSERT_THAT(connect(bind_.get(), bind_addr_, addrlen_), SyscallSucceeds());
-
-  n = -1;
-  EXPECT_THAT(ioctl(bind_.get(), FIONREAD, &n), SyscallSucceedsWithValue(0));
-  EXPECT_EQ(n, 0);
-
-  const char str[] = "abc";
-  ASSERT_THAT(send(bind_.get(), str, sizeof(str), 0),
-              SyscallSucceedsWithValue(sizeof(str)));
-
-  struct pollfd pfd = {bind_.get(), POLLIN, 0};
-  ASSERT_THAT(RetryEINTR(poll)(&pfd, 1, /*timeout=*/1000),
-              SyscallSucceedsWithValue(1));
-
-  n = -1;
-  EXPECT_THAT(ioctl(bind_.get(), FIONREAD, &n), SyscallSucceedsWithValue(0));
-  EXPECT_EQ(n, sizeof(str));
-
-  EXPECT_THAT(shutdown(bind_.get(), SHUT_RD), SyscallSucceeds());
-
-  n = -1;
-  EXPECT_THAT(ioctl(bind_.get(), FIONREAD, &n), SyscallSucceedsWithValue(0));
-  EXPECT_EQ(n, sizeof(str));
-}
-
-// NOTE: Do not use `FIONREAD` as test name because it will be replaced by the
-// corresponding macro and become `0x541B`.
-TEST_P(UdpSocketTest, Fionread) {
-  ASSERT_NO_ERRNO(BindLoopback());
-
-  // Check that the bound socket with an empty buffer reports an empty first
-  // packet.
-  int n = -1;
-  EXPECT_THAT(ioctl(bind_.get(), FIONREAD, &n), SyscallSucceedsWithValue(0));
-  EXPECT_EQ(n, 0);
-
-  // Send 3 packets from sock to bind_.
-  constexpr int psize = 100;
-  char buf[3 * psize];
-  RandomizeBuffer(buf, sizeof(buf));
-
-  struct pollfd pfd = {bind_.get(), POLLIN, 0};
-  for (int i = 0; i < 3; ++i) {
-    ASSERT_THAT(
-        sendto(sock_.get(), buf + i * psize, psize, 0, bind_addr_, addrlen_),
-        SyscallSucceedsWithValue(psize));
-
-    ASSERT_THAT(RetryEINTR(poll)(&pfd, 1, /*timeout=*/1000),
-                SyscallSucceedsWithValue(1));
-
-    // Check that regardless of how many packets are in the queue, the size
-    // reported is that of a single packet.
-    n = -1;
-    EXPECT_THAT(ioctl(bind_.get(), FIONREAD, &n), SyscallSucceedsWithValue(0));
-    EXPECT_EQ(n, psize);
-  }
-}
-
-TEST_P(UdpSocketTest, FIONREADZeroLengthPacket) {
-  ASSERT_NO_ERRNO(BindLoopback());
-
-  // Check that the bound socket with an empty buffer reports an empty first
-  // packet.
-  int n = -1;
-  EXPECT_THAT(ioctl(bind_.get(), FIONREAD, &n), SyscallSucceedsWithValue(0));
-  EXPECT_EQ(n, 0);
-
-  // Send 3 packets from sock to bind_.
-  constexpr int psize = 100;
-  char buf[3 * psize];
-  RandomizeBuffer(buf, sizeof(buf));
-
-  struct pollfd pfd = {bind_.get(), POLLIN, 0};
-  for (int i = 0; i < 3; ++i) {
-    ASSERT_THAT(
-        sendto(sock_.get(), buf + i * psize, 0, 0, bind_addr_, addrlen_),
-        SyscallSucceedsWithValue(0));
-
-    // TODO(gvisor.dev/issue/2726): sending a zero-length message to a hostinet
-    // socket does not cause a poll event to be triggered.
-    if (!IsRunningWithHostinet()) {
-      ASSERT_THAT(RetryEINTR(poll)(&pfd, 1, /*timeout=*/1000),
-                  SyscallSucceedsWithValue(1));
-    }
-
-    // Check that regardless of how many packets are in the queue, the size
-    // reported is that of a single packet.
-    n = -1;
-    EXPECT_THAT(ioctl(bind_.get(), FIONREAD, &n), SyscallSucceedsWithValue(0));
-    EXPECT_EQ(n, 0);
-  }
-}
-
-TEST_P(UdpSocketTest, FIONREADZeroLengthWriteShutdown) {
-  int n = -1;
-  EXPECT_THAT(ioctl(bind_.get(), FIONREAD, &n), SyscallSucceedsWithValue(0));
-  EXPECT_EQ(n, 0);
-
-  ASSERT_NO_ERRNO(BindLoopback());
-
-  // A UDP socket must be connected before it can be shutdown.
-  ASSERT_THAT(connect(bind_.get(), bind_addr_, addrlen_), SyscallSucceeds());
-
-  n = -1;
-  EXPECT_THAT(ioctl(bind_.get(), FIONREAD, &n), SyscallSucceedsWithValue(0));
-  EXPECT_EQ(n, 0);
-
-  const char str[] = "abc";
-  ASSERT_THAT(send(bind_.get(), str, 0, 0), SyscallSucceedsWithValue(0));
-
-  n = -1;
-  EXPECT_THAT(ioctl(bind_.get(), FIONREAD, &n), SyscallSucceedsWithValue(0));
-  EXPECT_EQ(n, 0);
-
-  EXPECT_THAT(shutdown(bind_.get(), SHUT_RD), SyscallSucceeds());
-
-  n = -1;
-  EXPECT_THAT(ioctl(bind_.get(), FIONREAD, &n), SyscallSucceedsWithValue(0));
-  EXPECT_EQ(n, 0);
-}
-
-TEST_P(UdpSocketTest, SoNoCheckOffByDefault) {
-  // TODO(gvisor.dev/issue/1202): SO_NO_CHECK socket option not supported by
-  // hostinet.
-  SKIP_IF(IsRunningWithHostinet());
-
-  int v = -1;
-  socklen_t optlen = sizeof(v);
-  ASSERT_THAT(getsockopt(bind_.get(), SOL_SOCKET, SO_NO_CHECK, &v, &optlen),
-              SyscallSucceeds());
-  ASSERT_EQ(v, kSockOptOff);
-  ASSERT_EQ(optlen, sizeof(v));
-}
-
-TEST_P(UdpSocketTest, SoNoCheck) {
-  // TODO(gvisor.dev/issue/1202): SO_NO_CHECK socket option not supported by
-  // hostinet.
-  SKIP_IF(IsRunningWithHostinet());
-
-  int v = kSockOptOn;
-  socklen_t optlen = sizeof(v);
-  ASSERT_THAT(setsockopt(bind_.get(), SOL_SOCKET, SO_NO_CHECK, &v, optlen),
-              SyscallSucceeds());
-  v = -1;
-  ASSERT_THAT(getsockopt(bind_.get(), SOL_SOCKET, SO_NO_CHECK, &v, &optlen),
-              SyscallSucceeds());
-  ASSERT_EQ(v, kSockOptOn);
-  ASSERT_EQ(optlen, sizeof(v));
-
-  v = kSockOptOff;
-  ASSERT_THAT(setsockopt(bind_.get(), SOL_SOCKET, SO_NO_CHECK, &v, optlen),
-              SyscallSucceeds());
-  v = -1;
-  ASSERT_THAT(getsockopt(bind_.get(), SOL_SOCKET, SO_NO_CHECK, &v, &optlen),
-              SyscallSucceeds());
-  ASSERT_EQ(v, kSockOptOff);
-  ASSERT_EQ(optlen, sizeof(v));
-}
-
-TEST_P(UdpSocketTest, SoTimestampOffByDefault) {
-  // TODO(gvisor.dev/issue/1202): SO_TIMESTAMP socket option not supported by
-  // hostinet.
-  SKIP_IF(IsRunningWithHostinet());
-
-  int v = -1;
-  socklen_t optlen = sizeof(v);
-  ASSERT_THAT(getsockopt(bind_.get(), SOL_SOCKET, SO_TIMESTAMP, &v, &optlen),
-              SyscallSucceeds());
-  ASSERT_EQ(v, kSockOptOff);
-  ASSERT_EQ(optlen, sizeof(v));
-}
-
-TEST_P(UdpSocketTest, SoTimestamp) {
-  // TODO(gvisor.dev/issue/1202): ioctl() and SO_TIMESTAMP socket option are not
-  // supported by hostinet.
-  SKIP_IF(IsRunningWithHostinet());
-
-  ASSERT_NO_ERRNO(BindLoopback());
-  ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds());
-
-  int v = 1;
-  ASSERT_THAT(setsockopt(bind_.get(), SOL_SOCKET, SO_TIMESTAMP, &v, sizeof(v)),
-              SyscallSucceeds());
-
-  char buf[3];
-  // Send zero length packet from sock to bind_.
-  ASSERT_THAT(RetryEINTR(write)(sock_.get(), buf, 0),
-              SyscallSucceedsWithValue(0));
-
-  struct pollfd pfd = {bind_.get(), POLLIN, 0};
-  ASSERT_THAT(RetryEINTR(poll)(&pfd, 1, /*timeout=*/1000),
-              SyscallSucceedsWithValue(1));
-
-  char cmsgbuf[CMSG_SPACE(sizeof(struct timeval))];
-  msghdr msg;
-  memset(&msg, 0, sizeof(msg));
-  iovec iov;
-  memset(&iov, 0, sizeof(iov));
-  msg.msg_iov = &iov;
-  msg.msg_iovlen = 1;
-  msg.msg_control = cmsgbuf;
-  msg.msg_controllen = sizeof(cmsgbuf);
-
-  ASSERT_THAT(RetryEINTR(recvmsg)(bind_.get(), &msg, 0),
-              SyscallSucceedsWithValue(0));
-
-  struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg);
-  ASSERT_NE(cmsg, nullptr);
-  ASSERT_EQ(cmsg->cmsg_level, SOL_SOCKET);
-  ASSERT_EQ(cmsg->cmsg_type, SO_TIMESTAMP);
-  ASSERT_EQ(cmsg->cmsg_len, CMSG_LEN(sizeof(struct timeval)));
-
-  struct timeval tv = {};
-  memcpy(&tv, CMSG_DATA(cmsg), sizeof(struct timeval));
-
-  ASSERT_TRUE(tv.tv_sec != 0 || tv.tv_usec != 0);
-
-  // There should be nothing to get via ioctl.
-  ASSERT_THAT(ioctl(bind_.get(), SIOCGSTAMP, &tv),
-              SyscallFailsWithErrno(ENOENT));
-}
-
-TEST_P(UdpSocketTest, WriteShutdownNotConnected) {
-  EXPECT_THAT(shutdown(bind_.get(), SHUT_WR), SyscallFailsWithErrno(ENOTCONN));
-}
-
-TEST_P(UdpSocketTest, TimestampIoctl) {
-  // TODO(gvisor.dev/issue/1202): ioctl() is not supported by hostinet.
-  SKIP_IF(IsRunningWithHostinet());
-
-  ASSERT_NO_ERRNO(BindLoopback());
-  ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds());
-
-  char buf[3];
-  // Send packet from sock to bind_.
-  ASSERT_THAT(RetryEINTR(write)(sock_.get(), buf, sizeof(buf)),
-              SyscallSucceedsWithValue(sizeof(buf)));
-
-  struct pollfd pfd = {bind_.get(), POLLIN, 0};
-  ASSERT_THAT(RetryEINTR(poll)(&pfd, 1, /*timeout=*/1000),
-              SyscallSucceedsWithValue(1));
-
-  // There should be no control messages.
-  char recv_buf[sizeof(buf)];
-  ASSERT_NO_FATAL_FAILURE(RecvNoCmsg(bind_.get(), recv_buf, sizeof(recv_buf)));
-
-  // A nonzero timeval should be available via ioctl.
-  struct timeval tv = {};
-  ASSERT_THAT(ioctl(bind_.get(), SIOCGSTAMP, &tv), SyscallSucceeds());
-  ASSERT_TRUE(tv.tv_sec != 0 || tv.tv_usec != 0);
-}
-
-TEST_P(UdpSocketTest, TimestampIoctlNothingRead) {
-  // TODO(gvisor.dev/issue/1202): ioctl() is not supported by hostinet.
-  SKIP_IF(IsRunningWithHostinet());
-
-  ASSERT_NO_ERRNO(BindLoopback());
-  ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds());
-
-  struct timeval tv = {};
-  ASSERT_THAT(ioctl(sock_.get(), SIOCGSTAMP, &tv),
-              SyscallFailsWithErrno(ENOENT));
-}
-
-// Test that the timestamp accessed via SIOCGSTAMP is still accessible after
-// SO_TIMESTAMP is enabled and used to retrieve a timestamp.
-TEST_P(UdpSocketTest, TimestampIoctlPersistence) {
-  // TODO(gvisor.dev/issue/1202): ioctl() and SO_TIMESTAMP socket option are not
-  // supported by hostinet.
-  SKIP_IF(IsRunningWithHostinet());
-
-  ASSERT_NO_ERRNO(BindLoopback());
-  ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds());
-
-  char buf[3];
-  // Send packet from sock to bind_.
-  ASSERT_THAT(RetryEINTR(write)(sock_.get(), buf, sizeof(buf)),
-              SyscallSucceedsWithValue(sizeof(buf)));
-  ASSERT_THAT(RetryEINTR(write)(sock_.get(), buf, 0),
-              SyscallSucceedsWithValue(0));
-
-  struct pollfd pfd = {bind_.get(), POLLIN, 0};
-  ASSERT_THAT(RetryEINTR(poll)(&pfd, 1, /*timeout=*/1000),
-              SyscallSucceedsWithValue(1));
-
-  // There should be no control messages.
-  char recv_buf[sizeof(buf)];
-  ASSERT_NO_FATAL_FAILURE(RecvNoCmsg(bind_.get(), recv_buf, sizeof(recv_buf)));
-
-  // A nonzero timeval should be available via ioctl.
-  struct timeval tv = {};
-  ASSERT_THAT(ioctl(bind_.get(), SIOCGSTAMP, &tv), SyscallSucceeds());
-  ASSERT_TRUE(tv.tv_sec != 0 || tv.tv_usec != 0);
-
-  // Enable SO_TIMESTAMP and send a message.
-  int v = 1;
-  EXPECT_THAT(setsockopt(bind_.get(), SOL_SOCKET, SO_TIMESTAMP, &v, sizeof(v)),
-              SyscallSucceeds());
-  ASSERT_THAT(RetryEINTR(write)(sock_.get(), buf, 0),
-              SyscallSucceedsWithValue(0));
-
-  ASSERT_THAT(RetryEINTR(poll)(&pfd, 1, /*timeout=*/1000),
-              SyscallSucceedsWithValue(1));
-
-  // There should be a message for SO_TIMESTAMP.
-  char cmsgbuf[CMSG_SPACE(sizeof(struct timeval))];
-  msghdr msg = {};
-  iovec iov = {};
-  msg.msg_iov = &iov;
-  msg.msg_iovlen = 1;
-  msg.msg_control = cmsgbuf;
-  msg.msg_controllen = sizeof(cmsgbuf);
-  ASSERT_THAT(RetryEINTR(recvmsg)(bind_.get(), &msg, 0),
-              SyscallSucceedsWithValue(0));
-  struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg);
-  ASSERT_NE(cmsg, nullptr);
-
-  // The ioctl should return the exact same values as before.
-  struct timeval tv2 = {};
-  ASSERT_THAT(ioctl(bind_.get(), SIOCGSTAMP, &tv2), SyscallSucceeds());
-  ASSERT_EQ(tv.tv_sec, tv2.tv_sec);
-  ASSERT_EQ(tv.tv_usec, tv2.tv_usec);
-}
-
-// Test that a socket with IP_TOS or IPV6_TCLASS set will set the TOS byte on
-// outgoing packets, and that a receiving socket with IP_RECVTOS or
-// IPV6_RECVTCLASS will create the corresponding control message.
-TEST_P(UdpSocketTest, SetAndReceiveTOS) {
-  ASSERT_NO_ERRNO(BindLoopback());
-  ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds());
-
-  // Allow socket to receive control message.
-  int recv_level = SOL_IP;
-  int recv_type = IP_RECVTOS;
-  if (GetParam() != AddressFamily::kIpv4) {
-    recv_level = SOL_IPV6;
-    recv_type = IPV6_RECVTCLASS;
-  }
-  ASSERT_THAT(setsockopt(bind_.get(), recv_level, recv_type, &kSockOptOn,
-                         sizeof(kSockOptOn)),
-              SyscallSucceeds());
-
-  // Set socket TOS.
-  int sent_level = recv_level;
-  int sent_type = IP_TOS;
-  if (sent_level == SOL_IPV6) {
-    sent_type = IPV6_TCLASS;
-  }
-  int sent_tos = IPTOS_LOWDELAY;  // Choose some TOS value.
-  ASSERT_THAT(setsockopt(sock_.get(), sent_level, sent_type, &sent_tos,
-                         sizeof(sent_tos)),
-              SyscallSucceeds());
-
-  // Prepare message to send.
-  constexpr size_t kDataLength = 1024;
-  struct msghdr sent_msg = {};
-  struct iovec sent_iov = {};
-  char sent_data[kDataLength];
-  sent_iov.iov_base = &sent_data[0];
-  sent_iov.iov_len = kDataLength;
-  sent_msg.msg_iov = &sent_iov;
-  sent_msg.msg_iovlen = 1;
-
-  ASSERT_THAT(RetryEINTR(sendmsg)(sock_.get(), &sent_msg, 0),
-              SyscallSucceedsWithValue(kDataLength));
-
-  // Receive message.
-  struct msghdr received_msg = {};
-  struct iovec received_iov = {};
-  char received_data[kDataLength];
-  received_iov.iov_base = &received_data[0];
-  received_iov.iov_len = kDataLength;
-  received_msg.msg_iov = &received_iov;
-  received_msg.msg_iovlen = 1;
-  size_t cmsg_data_len = sizeof(int8_t);
-  if (sent_type == IPV6_TCLASS) {
-    cmsg_data_len = sizeof(int);
-  }
-  std::vector<char> received_cmsgbuf(CMSG_SPACE(cmsg_data_len));
-  received_msg.msg_control = &received_cmsgbuf[0];
-  received_msg.msg_controllen = received_cmsgbuf.size();
-  ASSERT_THAT(RetryEINTR(recvmsg)(bind_.get(), &received_msg, 0),
-              SyscallSucceedsWithValue(kDataLength));
-
-  struct cmsghdr* cmsg = CMSG_FIRSTHDR(&received_msg);
-  ASSERT_NE(cmsg, nullptr);
-  EXPECT_EQ(cmsg->cmsg_len, CMSG_LEN(cmsg_data_len));
-  EXPECT_EQ(cmsg->cmsg_level, sent_level);
-  EXPECT_EQ(cmsg->cmsg_type, sent_type);
-  int8_t received_tos = 0;
-  memcpy(&received_tos, CMSG_DATA(cmsg), sizeof(received_tos));
-  EXPECT_EQ(received_tos, sent_tos);
-}
-
-// Test that sendmsg with IP_TOS and IPV6_TCLASS control messages will set the
-// TOS byte on outgoing packets, and that a receiving socket with IP_RECVTOS or
-// IPV6_RECVTCLASS will create the corresponding control message.
-TEST_P(UdpSocketTest, SendAndReceiveTOS) {
-  // TODO(b/146661005): Setting TOS via cmsg not supported for netstack.
-  SKIP_IF(IsRunningOnGvisor() && !IsRunningWithHostinet());
-
-  ASSERT_NO_ERRNO(BindLoopback());
-  ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds());
-
-  // Allow socket to receive control message.
-  int recv_level = SOL_IP;
-  int recv_type = IP_RECVTOS;
-  if (GetParam() != AddressFamily::kIpv4) {
-    recv_level = SOL_IPV6;
-    recv_type = IPV6_RECVTCLASS;
-  }
-  int recv_opt = kSockOptOn;
-  ASSERT_THAT(setsockopt(bind_.get(), recv_level, recv_type, &recv_opt,
-                         sizeof(recv_opt)),
-              SyscallSucceeds());
-
-  // Prepare message to send.
-  constexpr size_t kDataLength = 1024;
-  int sent_level = recv_level;
-  int sent_type = IP_TOS;
-  int sent_tos = IPTOS_LOWDELAY;  // Choose some TOS value.
-
-  struct msghdr sent_msg = {};
-  struct iovec sent_iov = {};
-  char sent_data[kDataLength];
-  sent_iov.iov_base = &sent_data[0];
-  sent_iov.iov_len = kDataLength;
-  sent_msg.msg_iov = &sent_iov;
-  sent_msg.msg_iovlen = 1;
-  size_t cmsg_data_len = sizeof(int8_t);
-  if (sent_level == SOL_IPV6) {
-    sent_type = IPV6_TCLASS;
-    cmsg_data_len = sizeof(int);
-  }
-  std::vector<char> sent_cmsgbuf(CMSG_SPACE(cmsg_data_len));
-  sent_msg.msg_control = &sent_cmsgbuf[0];
-  sent_msg.msg_controllen = CMSG_LEN(cmsg_data_len);
-
-  // Manually add control message.
-  struct cmsghdr* sent_cmsg = CMSG_FIRSTHDR(&sent_msg);
-  sent_cmsg->cmsg_len = CMSG_LEN(cmsg_data_len);
-  sent_cmsg->cmsg_level = sent_level;
-  sent_cmsg->cmsg_type = sent_type;
-  *(int8_t*)CMSG_DATA(sent_cmsg) = sent_tos;
-
-  ASSERT_THAT(RetryEINTR(sendmsg)(sock_.get(), &sent_msg, 0),
-              SyscallSucceedsWithValue(kDataLength));
-
-  // Receive message.
-  struct msghdr received_msg = {};
-  struct iovec received_iov = {};
-  char received_data[kDataLength];
-  received_iov.iov_base = &received_data[0];
-  received_iov.iov_len = kDataLength;
-  received_msg.msg_iov = &received_iov;
-  received_msg.msg_iovlen = 1;
-  std::vector<char> received_cmsgbuf(CMSG_SPACE(cmsg_data_len));
-  received_msg.msg_control = &received_cmsgbuf[0];
-  received_msg.msg_controllen = CMSG_LEN(cmsg_data_len);
-  ASSERT_THAT(RetryEINTR(recvmsg)(bind_.get(), &received_msg, 0),
-              SyscallSucceedsWithValue(kDataLength));
-
-  struct cmsghdr* cmsg = CMSG_FIRSTHDR(&received_msg);
-  ASSERT_NE(cmsg, nullptr);
-  EXPECT_EQ(cmsg->cmsg_len, CMSG_LEN(cmsg_data_len));
-  EXPECT_EQ(cmsg->cmsg_level, sent_level);
-  EXPECT_EQ(cmsg->cmsg_type, sent_type);
-  int8_t received_tos = 0;
-  memcpy(&received_tos, CMSG_DATA(cmsg), sizeof(received_tos));
-  EXPECT_EQ(received_tos, sent_tos);
-}
-
-TEST_P(UdpSocketTest, RecvBufLimitsEmptyRcvBuf) {
-  // Discover minimum buffer size by setting it to zero.
-  constexpr int kRcvBufSz = 0;
-  ASSERT_THAT(setsockopt(bind_.get(), SOL_SOCKET, SO_RCVBUF, &kRcvBufSz,
-                         sizeof(kRcvBufSz)),
-              SyscallSucceeds());
-
-  int min = 0;
-  socklen_t min_len = sizeof(min);
-  ASSERT_THAT(getsockopt(bind_.get(), SOL_SOCKET, SO_RCVBUF, &min, &min_len),
-              SyscallSucceeds());
-
-  // Bind bind_ to loopback.
-  ASSERT_NO_ERRNO(BindLoopback());
-
-  {
-    // Send data of size min and verify that it's received.
-    std::vector<char> buf(min);
-    RandomizeBuffer(buf.data(), buf.size());
-    ASSERT_THAT(
-        sendto(sock_.get(), buf.data(), buf.size(), 0, bind_addr_, addrlen_),
-        SyscallSucceedsWithValue(buf.size()));
-    std::vector<char> received(buf.size());
-    EXPECT_THAT(
-        recv(bind_.get(), received.data(), received.size(), MSG_DONTWAIT),
-        SyscallSucceedsWithValue(received.size()));
-  }
-
-  {
-    // Send data of size min + 1 and verify that its received. Both linux and
-    // Netstack accept a dgram that exceeds rcvBuf limits if the receive buffer
-    // is currently empty.
-    std::vector<char> buf(min + 1);
-    RandomizeBuffer(buf.data(), buf.size());
-    ASSERT_THAT(
-        sendto(sock_.get(), buf.data(), buf.size(), 0, bind_addr_, addrlen_),
-        SyscallSucceedsWithValue(buf.size()));
-
-    std::vector<char> received(buf.size());
-    EXPECT_THAT(
-        recv(bind_.get(), received.data(), received.size(), MSG_DONTWAIT),
-        SyscallSucceedsWithValue(received.size()));
-  }
-}
-
-// Test that receive buffer limits are enforced.
-TEST_P(UdpSocketTest, RecvBufLimits) {
-  // Bind s_ to loopback.
-  ASSERT_NO_ERRNO(BindLoopback());
-
-  int min = 0;
-  {
-    // Discover minimum buffer size by trying to set it to zero.
-    constexpr int kRcvBufSz = 0;
-    ASSERT_THAT(setsockopt(bind_.get(), SOL_SOCKET, SO_RCVBUF, &kRcvBufSz,
-                           sizeof(kRcvBufSz)),
-                SyscallSucceeds());
-
-    socklen_t min_len = sizeof(min);
-    ASSERT_THAT(getsockopt(bind_.get(), SOL_SOCKET, SO_RCVBUF, &min, &min_len),
-                SyscallSucceeds());
-  }
-
-  // Now set the limit to min * 4.
-  int new_rcv_buf_sz = min * 4;
-  if (!IsRunningOnGvisor() || IsRunningWithHostinet()) {
-    // Linux doubles the value specified so just set to min * 2.
-    new_rcv_buf_sz = min * 2;
-  }
-
-  ASSERT_THAT(setsockopt(bind_.get(), SOL_SOCKET, SO_RCVBUF, &new_rcv_buf_sz,
-                         sizeof(new_rcv_buf_sz)),
-              SyscallSucceeds());
-  int rcv_buf_sz = 0;
-  {
-    socklen_t rcv_buf_len = sizeof(rcv_buf_sz);
-    ASSERT_THAT(getsockopt(bind_.get(), SOL_SOCKET, SO_RCVBUF, &rcv_buf_sz,
-                           &rcv_buf_len),
-                SyscallSucceeds());
-  }
-
-  {
-    std::vector<char> buf(min);
-    RandomizeBuffer(buf.data(), buf.size());
-
-    ASSERT_THAT(
-        sendto(sock_.get(), buf.data(), buf.size(), 0, bind_addr_, addrlen_),
-        SyscallSucceedsWithValue(buf.size()));
-    ASSERT_THAT(
-        sendto(sock_.get(), buf.data(), buf.size(), 0, bind_addr_, addrlen_),
-        SyscallSucceedsWithValue(buf.size()));
-    ASSERT_THAT(
-        sendto(sock_.get(), buf.data(), buf.size(), 0, bind_addr_, addrlen_),
-        SyscallSucceedsWithValue(buf.size()));
-    ASSERT_THAT(
-        sendto(sock_.get(), buf.data(), buf.size(), 0, bind_addr_, addrlen_),
-        SyscallSucceedsWithValue(buf.size()));
-    int sent = 4;
-    if (IsRunningOnGvisor() && !IsRunningWithHostinet()) {
-      // Linux seems to drop the 4th packet even though technically it should
-      // fit in the receive buffer.
-      ASSERT_THAT(
-          sendto(sock_.get(), buf.data(), buf.size(), 0, bind_addr_, addrlen_),
-          SyscallSucceedsWithValue(buf.size()));
-      sent++;
-    }
-
-    for (int i = 0; i < sent - 1; i++) {
-      // Receive the data.
-      std::vector<char> received(buf.size());
-      EXPECT_THAT(
-          recv(bind_.get(), received.data(), received.size(), MSG_DONTWAIT),
-          SyscallSucceedsWithValue(received.size()));
-      EXPECT_EQ(memcmp(buf.data(), received.data(), buf.size()), 0);
-    }
-
-    // The last receive should fail with EAGAIN as the last packet should have
-    // been dropped due to lack of space in the receive buffer.
-    std::vector<char> received(buf.size());
-    EXPECT_THAT(
-        recv(bind_.get(), received.data(), received.size(), MSG_DONTWAIT),
-        SyscallFailsWithErrno(EAGAIN));
-  }
-}
-
-#ifndef __fuchsia__
-
-// TODO(gvisor.dev/2746): Support SO_ATTACH_FILTER/SO_DETACH_FILTER.
-// gVisor currently silently ignores attaching a filter.
-TEST_P(UdpSocketTest, SetSocketDetachFilter) {
-  // Program generated using sudo tcpdump -i lo udp and port 1234 -dd
-  struct sock_filter code[] = {
-      {0x28, 0, 0, 0x0000000c},  {0x15, 0, 6, 0x000086dd},
-      {0x30, 0, 0, 0x00000014},  {0x15, 0, 15, 0x00000011},
-      {0x28, 0, 0, 0x00000036},  {0x15, 12, 0, 0x000004d2},
-      {0x28, 0, 0, 0x00000038},  {0x15, 10, 11, 0x000004d2},
-      {0x15, 0, 10, 0x00000800}, {0x30, 0, 0, 0x00000017},
-      {0x15, 0, 8, 0x00000011},  {0x28, 0, 0, 0x00000014},
-      {0x45, 6, 0, 0x00001fff},  {0xb1, 0, 0, 0x0000000e},
-      {0x48, 0, 0, 0x0000000e},  {0x15, 2, 0, 0x000004d2},
-      {0x48, 0, 0, 0x00000010},  {0x15, 0, 1, 0x000004d2},
-      {0x6, 0, 0, 0x00040000},   {0x6, 0, 0, 0x00000000},
-  };
-  struct sock_fprog bpf = {
-      .len = ABSL_ARRAYSIZE(code),
-      .filter = code,
-  };
-  ASSERT_THAT(
-      setsockopt(sock_.get(), SOL_SOCKET, SO_ATTACH_FILTER, &bpf, sizeof(bpf)),
-      SyscallSucceeds());
-
-  constexpr int val = 0;
-  ASSERT_THAT(
-      setsockopt(sock_.get(), SOL_SOCKET, SO_DETACH_FILTER, &val, sizeof(val)),
-      SyscallSucceeds());
-}
-
-TEST_P(UdpSocketTest, SetSocketDetachFilterNoInstalledFilter) {
-  // TODO(gvisor.dev/2746): Support SO_ATTACH_FILTER/SO_DETACH_FILTER.
-  SKIP_IF(IsRunningOnGvisor());
-  constexpr int val = 0;
-  ASSERT_THAT(
-      setsockopt(sock_.get(), SOL_SOCKET, SO_DETACH_FILTER, &val, sizeof(val)),
-      SyscallFailsWithErrno(ENOENT));
-}
-
-TEST_P(UdpSocketTest, GetSocketDetachFilter) {
-  int val = 0;
-  socklen_t val_len = sizeof(val);
-  ASSERT_THAT(
-      getsockopt(sock_.get(), SOL_SOCKET, SO_DETACH_FILTER, &val, &val_len),
-      SyscallFailsWithErrno(ENOPROTOOPT));
-}
-
-#endif  // __fuchsia__
-
-}  // namespace testing
-}  // namespace gvisor
diff --git a/test/syscalls/linux/udp_socket_test_cases.h b/test/syscalls/linux/udp_socket_test_cases.h
deleted file mode 100644
index f7e25c8058..0000000000
--- a/test/syscalls/linux/udp_socket_test_cases.h
+++ /dev/null
@@ -1,82 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef THIRD_PARTY_GOLANG_GVISOR_TEST_SYSCALLS_LINUX_SOCKET_IPV4_UDP_UNBOUND_H_
-#define THIRD_PARTY_GOLANG_GVISOR_TEST_SYSCALLS_LINUX_SOCKET_IPV4_UDP_UNBOUND_H_
-
-#include <sys/socket.h>
-
-#include "gtest/gtest.h"
-#include "test/syscalls/linux/socket_test_util.h"
-#include "test/util/file_descriptor.h"
-#include "test/util/posix_error.h"
-
-namespace gvisor {
-namespace testing {
-
-// The initial port to be be used on gvisor.
-constexpr int TestPort = 40000;
-
-// Fixture for tests parameterized by the address family to use (AF_INET and
-// AF_INET6) when creating sockets.
-class UdpSocketTest
-    : public ::testing::TestWithParam<gvisor::testing::AddressFamily> {
- protected:
-  // Creates two sockets that will be used by test cases.
-  void SetUp() override;
-
-  // Binds the socket bind_ to the loopback and updates bind_addr_.
-  PosixError BindLoopback();
-
-  // Binds the socket bind_ to Any and updates bind_addr_.
-  PosixError BindAny();
-
-  // Binds given socket to address addr and updates.
-  PosixError BindSocket(int socket, struct sockaddr* addr);
-
-  // Return initialized Any address to port 0.
-  struct sockaddr_storage InetAnyAddr();
-
-  // Return initialized Loopback address to port 0.
-  struct sockaddr_storage InetLoopbackAddr();
-
-  // Disconnects socket sockfd.
-  void Disconnect(int sockfd);
-
-  // Get family for the test.
-  int GetFamily();
-
-  // Socket used by Bind methods
-  FileDescriptor bind_;
-
-  // Second socket used for tests.
-  FileDescriptor sock_;
-
-  // Address for bind_ socket.
-  struct sockaddr* bind_addr_;
-
-  // Initialized to the length based on GetFamily().
-  socklen_t addrlen_;
-
-  // Storage for bind_addr_.
-  struct sockaddr_storage bind_addr_storage_;
-
- private:
-  // Helper to initialize addrlen_ for the test case.
-  socklen_t GetAddrLength();
-};
-}  // namespace testing
-}  // namespace gvisor
-
-#endif  // THIRD_PARTY_GOLANG_GVISOR_TEST_SYSCALLS_LINUX_SOCKET_IPV4_UDP_UNBOUND_H_
diff --git a/test/util/fs_util.cc b/test/util/fs_util.cc
index dffa16183d..5726756226 100644
--- a/test/util/fs_util.cc
+++ b/test/util/fs_util.cc
@@ -15,9 +15,9 @@
 #include "test/util/fs_util.h"
 
 #include <dirent.h>
-#ifndef __fuchsia__
+#ifdef __linux__
 #include <linux/magic.h>
-#endif  // __fuchsia__
+#endif  // __linux__
 #include <sys/stat.h>
 #include <sys/statfs.h>
 #include <sys/types.h>
@@ -633,7 +633,7 @@ PosixErrorOr<std::string> ProcessExePath(int pid) {
   return ReadLink(absl::StrCat("/proc/", pid, "/exe"));
 }
 
-#ifndef __fuchsia__
+#ifdef __linux__
 PosixErrorOr<bool> IsTmpfs(const std::string& path) {
   struct statfs stat;
   if (statfs(path.c_str(), &stat)) {
@@ -647,7 +647,7 @@ PosixErrorOr<bool> IsTmpfs(const std::string& path) {
   }
   return stat.f_type == TMPFS_MAGIC;
 }
-#endif  // __fuchsia__
+#endif  // __linux__
 
 }  // namespace testing
 }  // namespace gvisor
diff --git a/test/util/fs_util.h b/test/util/fs_util.h
index 0441906579..314637de04 100644
--- a/test/util/fs_util.h
+++ b/test/util/fs_util.h
@@ -179,10 +179,10 @@ std::string CleanPath(absl::string_view path);
 // Returns the full path to the executable of the given pid or a PosixError.
 PosixErrorOr<std::string> ProcessExePath(int pid);
 
-#ifndef __fuchsia__
+#ifdef __linux__
 // IsTmpfs returns true if the file at path is backed by tmpfs.
 PosixErrorOr<bool> IsTmpfs(const std::string& path);
-#endif  // __fucshia__
+#endif  // __linux__
 
 namespace internal {
 // Not part of the public API.
diff --git a/test/util/test_util_runfiles.cc b/test/util/test_util_runfiles.cc
index 694d21692a..7210094ebc 100644
--- a/test/util/test_util_runfiles.cc
+++ b/test/util/test_util_runfiles.cc
@@ -12,8 +12,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#ifndef __fuchsia__
-
 #include <iostream>
 #include <string>
 
@@ -46,5 +44,3 @@ std::string RunfilePath(std::string path) {
 
 }  // namespace testing
 }  // namespace gvisor
-
-#endif  // __fuchsia__

From 47b496054e05c2dd33c0ecf1386a36b3edf7c6ef Mon Sep 17 00:00:00 2001
From: Jamie Liu <jamieliu@google.com>
Date: Mon, 31 Aug 2020 13:55:18 -0700
Subject: [PATCH 120/211] Don't use read-only host FD for writable gofer
 dentries in VFS2.

As documented for gofer.dentry.hostFD.

PiperOrigin-RevId: 329372319
---
 pkg/sentry/fsimpl/gofer/gofer.go | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/pkg/sentry/fsimpl/gofer/gofer.go b/pkg/sentry/fsimpl/gofer/gofer.go
index 81d34cfe36..57bff17894 100644
--- a/pkg/sentry/fsimpl/gofer/gofer.go
+++ b/pkg/sentry/fsimpl/gofer/gofer.go
@@ -1472,8 +1472,9 @@ func (d *dentry) ensureSharedHandle(ctx context.Context, read, write, trunc bool
 			return err
 		}
 
-		if d.hostFD < 0 && openReadable && h.fd >= 0 {
-			// We have no existing FD; use the new FD for at least reading.
+		if d.hostFD < 0 && h.fd >= 0 && openReadable && (d.writeFile.isNil() || openWritable) {
+			// We have no existing FD, and the new FD meets the requirements
+			// for d.hostFD, so start using it.
 			d.hostFD = h.fd
 		} else if d.hostFD >= 0 && d.writeFile.isNil() && openWritable {
 			// We have an existing read-only FD, but the file has just been

From 219fa4845f62baa670f047be198179846a4cb199 Mon Sep 17 00:00:00 2001
From: Jay Zhuang <jayzhuang@google.com>
Date: Mon, 31 Aug 2020 15:29:50 -0700
Subject: [PATCH 121/211] Set errno on response when syscall actually fails

This prevents setting stale errno on responses.

Also fixes TestDiscardsUDPPacketsWithMcastSourceAddressV6 to use correct
multicast addresses in test.

Fixes #3793

PiperOrigin-RevId: 329391155
---
 test/packetimpact/dut/posix_server.cc         | 56 ++++++++++++++-----
 .../udp_discard_mcast_source_addr_test.go     |  6 +-
 2 files changed, 46 insertions(+), 16 deletions(-)

diff --git a/test/packetimpact/dut/posix_server.cc b/test/packetimpact/dut/posix_server.cc
index 0f8e279f8a..de5b4be93b 100644
--- a/test/packetimpact/dut/posix_server.cc
+++ b/test/packetimpact/dut/posix_server.cc
@@ -116,7 +116,9 @@ class PosixImpl final : public posix_server::Posix::Service {
     socklen_t addrlen = sizeof(addr);
     response->set_fd(accept(request->sockfd(),
                             reinterpret_cast<sockaddr *>(&addr), &addrlen));
-    response->set_errno_(errno);
+    if (response->fd() < 0) {
+      response->set_errno_(errno);
+    }
     return sockaddr_to_proto(addr, addrlen, response->mutable_addr());
   }
 
@@ -137,7 +139,9 @@ class PosixImpl final : public posix_server::Posix::Service {
 
     response->set_ret(
         bind(request->sockfd(), reinterpret_cast<sockaddr *>(&addr), addr_len));
-    response->set_errno_(errno);
+    if (response->ret() < 0) {
+      response->set_errno_(errno);
+    }
     return ::grpc::Status::OK;
   }
 
@@ -145,7 +149,9 @@ class PosixImpl final : public posix_server::Posix::Service {
                        const ::posix_server::CloseRequest *request,
                        ::posix_server::CloseResponse *response) override {
     response->set_ret(close(request->fd()));
-    response->set_errno_(errno);
+    if (response->ret() < 0) {
+      response->set_errno_(errno);
+    }
     return ::grpc::Status::OK;
   }
 
@@ -165,7 +171,9 @@ class PosixImpl final : public posix_server::Posix::Service {
 
     response->set_ret(connect(request->sockfd(),
                               reinterpret_cast<sockaddr *>(&addr), addr_len));
-    response->set_errno_(errno);
+    if (response->ret() < 0) {
+      response->set_errno_(errno);
+    }
     return ::grpc::Status::OK;
   }
 
@@ -173,7 +181,9 @@ class PosixImpl final : public posix_server::Posix::Service {
                        const ::posix_server::FcntlRequest *request,
                        ::posix_server::FcntlResponse *response) override {
     response->set_ret(::fcntl(request->fd(), request->cmd(), request->arg()));
-    response->set_errno_(errno);
+    if (response->ret() < 0) {
+      response->set_errno_(errno);
+    }
     return ::grpc::Status::OK;
   }
 
@@ -185,7 +195,9 @@ class PosixImpl final : public posix_server::Posix::Service {
     socklen_t addrlen = sizeof(addr);
     response->set_ret(getsockname(
         request->sockfd(), reinterpret_cast<sockaddr *>(&addr), &addrlen));
-    response->set_errno_(errno);
+    if (response->ret() < 0) {
+      response->set_errno_(errno);
+    }
     return sockaddr_to_proto(addr, addrlen, response->mutable_addr());
   }
 
@@ -227,7 +239,9 @@ class PosixImpl final : public posix_server::Posix::Service {
         return ::grpc::Status(grpc::StatusCode::INVALID_ARGUMENT,
                               "Unknown SockOpt Type");
     }
-    response->set_errno_(errno);
+    if (response->ret() < 0) {
+      response->set_errno_(errno);
+    }
     return ::grpc::Status::OK;
   }
 
@@ -235,7 +249,9 @@ class PosixImpl final : public posix_server::Posix::Service {
                         const ::posix_server::ListenRequest *request,
                         ::posix_server::ListenResponse *response) override {
     response->set_ret(listen(request->sockfd(), request->backlog()));
-    response->set_errno_(errno);
+    if (response->ret() < 0) {
+      response->set_errno_(errno);
+    }
     return ::grpc::Status::OK;
   }
 
@@ -244,7 +260,9 @@ class PosixImpl final : public posix_server::Posix::Service {
                       ::posix_server::SendResponse *response) override {
     response->set_ret(::send(request->sockfd(), request->buf().data(),
                              request->buf().size(), request->flags()));
-    response->set_errno_(errno);
+    if (response->ret() < 0) {
+      response->set_errno_(errno);
+    }
     return ::grpc::Status::OK;
   }
 
@@ -265,7 +283,9 @@ class PosixImpl final : public posix_server::Posix::Service {
     response->set_ret(::sendto(request->sockfd(), request->buf().data(),
                                request->buf().size(), request->flags(),
                                reinterpret_cast<sockaddr *>(&addr), addr_len));
-    response->set_errno_(errno);
+    if (response->ret() < 0) {
+      response->set_errno_(errno);
+    }
     return ::grpc::Status::OK;
   }
 
@@ -299,7 +319,9 @@ class PosixImpl final : public posix_server::Posix::Service {
         return ::grpc::Status(grpc::StatusCode::INVALID_ARGUMENT,
                               "Unknown SockOpt Type");
     }
-    response->set_errno_(errno);
+    if (response->ret() < 0) {
+      response->set_errno_(errno);
+    }
     return ::grpc::Status::OK;
   }
 
@@ -308,14 +330,18 @@ class PosixImpl final : public posix_server::Posix::Service {
                         ::posix_server::SocketResponse *response) override {
     response->set_fd(
         socket(request->domain(), request->type(), request->protocol()));
-    response->set_errno_(errno);
+    if (response->fd() < 0) {
+      response->set_errno_(errno);
+    }
     return ::grpc::Status::OK;
   }
 
   ::grpc::Status Shutdown(grpc_impl::ServerContext *context,
                           const ::posix_server::ShutdownRequest *request,
                           ::posix_server::ShutdownResponse *response) override {
-    response->set_errno_(shutdown(request->fd(), request->how()));
+    if (shutdown(request->fd(), request->how()) < 0) {
+      response->set_errno_(errno);
+    }
     return ::grpc::Status::OK;
   }
 
@@ -328,7 +354,9 @@ class PosixImpl final : public posix_server::Posix::Service {
     if (response->ret() >= 0) {
       response->set_buf(buf.data(), response->ret());
     }
-    response->set_errno_(errno);
+    if (response->ret() < 0) {
+      response->set_errno_(errno);
+    }
     return ::grpc::Status::OK;
   }
 };
diff --git a/test/packetimpact/tests/udp_discard_mcast_source_addr_test.go b/test/packetimpact/tests/udp_discard_mcast_source_addr_test.go
index d30177e647..3d2791a6e0 100644
--- a/test/packetimpact/tests/udp_discard_mcast_source_addr_test.go
+++ b/test/packetimpact/tests/udp_discard_mcast_source_addr_test.go
@@ -53,6 +53,7 @@ func TestDiscardsUDPPacketsWithMcastSourceAddressV4(t *testing.T) {
 				t,
 				testbench.IPv4{SrcAddr: testbench.Address(tcpip.Address(mcastAddr.To4()))},
 				testbench.UDP{},
+				&testbench.Payload{Bytes: []byte("test payload")},
 			)
 
 			ret, payload, errno := dut.RecvWithErrno(context.Background(), t, remoteFD, 100, 0)
@@ -76,14 +77,15 @@ func TestDiscardsUDPPacketsWithMcastSourceAddressV6(t *testing.T) {
 		net.IPv6interfacelocalallnodes,
 		net.IPv6linklocalallnodes,
 		net.IPv6linklocalallrouters,
-		net.ParseIP("fe01::42"),
-		net.ParseIP("fe02::4242"),
+		net.ParseIP("ff01::42"),
+		net.ParseIP("ff02::4242"),
 	} {
 		t.Run(fmt.Sprintf("srcaddr=%s", mcastAddr), func(t *testing.T) {
 			conn.SendIPv6(
 				t,
 				testbench.IPv6{SrcAddr: testbench.Address(tcpip.Address(mcastAddr.To16()))},
 				testbench.UDP{},
+				&testbench.Payload{Bytes: []byte("test payload")},
 			)
 			ret, payload, errno := dut.RecvWithErrno(context.Background(), t, remoteFD, 100, 0)
 			if errno != syscall.EAGAIN || errno != syscall.EWOULDBLOCK {

From ae2e5a38a5e7fb9035d4b1c959a98ef306788d89 Mon Sep 17 00:00:00 2001
From: Adin Scannell <ascannell@google.com>
Date: Mon, 31 Aug 2020 17:07:35 -0700
Subject: [PATCH 122/211] Change nogo failures to test failures, instead of
 build failures.

PiperOrigin-RevId: 329408633
---
 tools/checkescape/BUILD                |   1 -
 tools/checkescape/checkescape.go       | 832 +++++++++++++++----------
 tools/checkescape/test1/test1.go       |   1 +
 tools/checkescape/test2/test2.go       |   1 +
 tools/go_marshal/test/escape/escape.go |   4 +
 tools/nogo/BUILD                       |   1 -
 tools/nogo/config.go                   |   5 +
 tools/nogo/defs.bzl                    |  70 ++-
 tools/nogo/dump/BUILD                  |  10 -
 tools/nogo/dump/dump.go                |  78 ---
 tools/nogo/nogo.go                     | 126 ++--
 tools/nogo/register.go                 |   3 +
 12 files changed, 627 insertions(+), 505 deletions(-)
 delete mode 100644 tools/nogo/dump/BUILD
 delete mode 100644 tools/nogo/dump/dump.go

diff --git a/tools/checkescape/BUILD b/tools/checkescape/BUILD
index 6273aa779c..8956be6217 100644
--- a/tools/checkescape/BUILD
+++ b/tools/checkescape/BUILD
@@ -8,7 +8,6 @@ go_library(
     nogo = False,
     visibility = ["//tools/nogo:__subpackages__"],
     deps = [
-        "//tools/nogo/dump",
         "@org_golang_x_tools//go/analysis:go_tool_library",
         "@org_golang_x_tools//go/analysis/passes/buildssa:go_tool_library",
         "@org_golang_x_tools//go/ssa:go_tool_library",
diff --git a/tools/checkescape/checkescape.go b/tools/checkescape/checkescape.go
index aab3c36a11..d98f5c3a1f 100644
--- a/tools/checkescape/checkescape.go
+++ b/tools/checkescape/checkescape.go
@@ -61,19 +61,20 @@ package checkescape
 import (
 	"bufio"
 	"bytes"
+	"flag"
 	"fmt"
 	"go/ast"
 	"go/token"
 	"go/types"
 	"io"
+	"os"
+	"os/exec"
 	"path/filepath"
-	"strconv"
 	"strings"
 
 	"golang.org/x/tools/go/analysis"
 	"golang.org/x/tools/go/analysis/passes/buildssa"
 	"golang.org/x/tools/go/ssa"
-	"gvisor.dev/gvisor/tools/nogo/dump"
 )
 
 const (
@@ -90,81 +91,20 @@ const (
 	exempt = "// escapes"
 )
 
-// escapingBuiltins are builtins known to escape.
-//
-// These are lowered at an earlier stage of compilation to explicit function
-// calls, but are not available for recursive analysis.
-var escapingBuiltins = []string{
-	"append",
-	"makemap",
-	"newobject",
-	"mallocgc",
-}
-
-// Analyzer defines the entrypoint.
-var Analyzer = &analysis.Analyzer{
-	Name:      "checkescape",
-	Doc:       "surfaces recursive escape analysis results",
-	Run:       run,
-	Requires:  []*analysis.Analyzer{buildssa.Analyzer},
-	FactTypes: []analysis.Fact{(*packageEscapeFacts)(nil)},
-}
-
-// packageEscapeFacts is the set of all functions in a package, and whether or
-// not they recursively pass escape analysis.
-//
-// All the type names for receivers are encoded in the full key. The key
-// represents the fully qualified package and type name used at link time.
-type packageEscapeFacts struct {
-	Funcs map[string][]Escape
-}
-
-// AFact implements analysis.Fact.AFact.
-func (*packageEscapeFacts) AFact() {}
-
-// CallSite is a single call site.
-//
-// These can be chained.
-type CallSite struct {
-	LocalPos token.Pos
-	Resolved LinePosition
-}
-
-// Escape is a single escape instance.
-type Escape struct {
-	Reason EscapeReason
-	Detail string
-	Chain  []CallSite
-}
-
-// LinePosition is a low-resolution token.Position.
-//
-// This is used to match against possible exemptions placed in the source.
-type LinePosition struct {
-	Filename string
-	Line     int
-}
+var (
+	// Binary is the binary under analysis.
+	//
+	// See Reader, below.
+	binary = flag.String("binary", "", "binary under analysis")
 
-// String implements fmt.Stringer.String.
-func (e *LinePosition) String() string {
-	return fmt.Sprintf("%s:%d", e.Filename, e.Line)
-}
+	// Reader is the input stream.
+	//
+	// This may be set instead of Binary.
+	Reader io.Reader
 
-// String implements fmt.Stringer.String.
-//
-// Note that this string will contain new lines.
-func (e *Escape) String() string {
-	var b bytes.Buffer
-	fmt.Fprintf(&b, "%s", e.Reason.String())
-	for i, cs := range e.Chain {
-		if i == len(e.Chain)-1 {
-			fmt.Fprintf(&b, "\n @ %s → %s", cs.Resolved.String(), e.Detail)
-		} else {
-			fmt.Fprintf(&b, "\n + %s", cs.Resolved.String())
-		}
-	}
-	return b.String()
-}
+	// Tool is the tool used to dump a binary.
+	tool = flag.String("dump_tool", "", "tool used to dump a binary")
+)
 
 // EscapeReason is an escape reason.
 //
@@ -172,12 +112,12 @@ func (e *Escape) String() string {
 type EscapeReason int
 
 const (
-	interfaceInvoke EscapeReason = iota
-	unknownPackage
-	allocation
+	allocation EscapeReason = iota
 	builtin
+	interfaceInvoke
 	dynamicCall
 	stackSplit
+	unknownPackage
 	reasonCount // Count for below.
 )
 
@@ -188,17 +128,17 @@ const (
 func (e EscapeReason) String() string {
 	switch e {
 	case interfaceInvoke:
-		return "interface: function invocation via interface"
+		return "interface: call to potentially allocating function"
 	case unknownPackage:
 		return "unknown: no package information available"
 	case allocation:
-		return "heap: call to runtime heap allocation"
+		return "heap: explicit allocation"
 	case builtin:
-		return "builtin: call to runtime builtin"
+		return "builtin: call to potentially allocating builtin"
 	case dynamicCall:
-		return "dynamic: call via dynamic function"
+		return "dynamic: call to potentially allocating function"
 	case stackSplit:
-		return "stack: stack split on function entry"
+		return "stack: possible split on function entry"
 	default:
 		panic(fmt.Sprintf("unknown reason: %d", e))
 	}
@@ -227,46 +167,241 @@ var escapeTypes = func() map[string]EscapeReason {
 	return result
 }()
 
-// EscapeCount counts escapes.
+// escapingBuiltins are builtins known to escape.
 //
-// It is used to avoid accumulating too many escapes for the same reason, for
-// the same function. We limit each class to 3 instances (arbitrarily).
-type EscapeCount struct {
-	byReason [reasonCount]uint32
+// These are lowered at an earlier stage of compilation to explicit function
+// calls, but are not available for recursive analysis.
+var escapingBuiltins = []string{
+	"append",
+	"makemap",
+	"newobject",
+	"mallocgc",
 }
 
-// maxRecordsPerReason is the number of explicit records.
+// packageEscapeFacts is the set of all functions in a package, and whether or
+// not they recursively pass escape analysis.
 //
-// See EscapeCount (and usage), and Record implementation.
-const maxRecordsPerReason = 5
-
-// Record records the reason or returns false if it should not be added.
-func (ec *EscapeCount) Record(reason EscapeReason) bool {
-	ec.byReason[reason]++
-	if ec.byReason[reason] > maxRecordsPerReason {
-		return false
+// All the type names for receivers are encoded in the full key. The key
+// represents the fully qualified package and type name used at link time.
+//
+// Note that each Escapes object is a summary. Local findings may be reported
+// using more detailed information.
+type packageEscapeFacts struct {
+	Funcs map[string]Escapes
+}
+
+// AFact implements analysis.Fact.AFact.
+func (*packageEscapeFacts) AFact() {}
+
+// Analyzer includes specific results.
+var Analyzer = &analysis.Analyzer{
+	Name:      "checkescape",
+	Doc:       "escape analysis checks based on +checkescape annotations",
+	Run:       runSelectEscapes,
+	Requires:  []*analysis.Analyzer{buildssa.Analyzer},
+	FactTypes: []analysis.Fact{(*packageEscapeFacts)(nil)},
+}
+
+// EscapeAnalyzer includes all local escape results.
+var EscapeAnalyzer = &analysis.Analyzer{
+	Name:     "checkescape",
+	Doc:      "complete local escape analysis results (requires Analyzer facts)",
+	Run:      runAllEscapes,
+	Requires: []*analysis.Analyzer{buildssa.Analyzer},
+}
+
+// LinePosition is a low-resolution token.Position.
+//
+// This is used to match against possible exemptions placed in the source.
+type LinePosition struct {
+	Filename string
+	Line     int
+}
+
+// String implements fmt.Stringer.String.
+func (e LinePosition) String() string {
+	return fmt.Sprintf("%s:%d", e.Filename, e.Line)
+}
+
+// Simplified returns the simplified name.
+func (e LinePosition) Simplified() string {
+	return fmt.Sprintf("%s:%d", filepath.Base(e.Filename), e.Line)
+}
+
+// CallSite is a single call site.
+//
+// These can be chained.
+type CallSite struct {
+	LocalPos token.Pos
+	Resolved LinePosition
+}
+
+// IsValid indicates whether the CallSite is valid or not.
+func (cs *CallSite) IsValid() bool {
+	return cs.LocalPos.IsValid()
+}
+
+// Escapes is a collection of escapes.
+//
+// We record at most one escape for each reason, but record the number of
+// escapes that were omitted.
+//
+// This object should be used to summarize all escapes for a single line (local
+// analysis) or a single function (package facts).
+//
+// All fields are exported for gob.
+type Escapes struct {
+	CallSites [reasonCount][]CallSite
+	Details   [reasonCount]string
+	Omitted   [reasonCount]int
+}
+
+// add is called by Add and Merge.
+func (es *Escapes) add(r EscapeReason, detail string, omitted int, callSites ...CallSite) {
+	if es.CallSites[r] != nil {
+		// We will either be replacing the current escape or dropping
+		// the added one. Either way, we increment omitted by the
+		// appropriate amount.
+		es.Omitted[r]++
+		// If the callSites in the other is only a single element, then
+		// we will universally favor this. This provides the cleanest
+		// set of escapes to summarize, and more importantly: if there
+		if len(es.CallSites) == 1 || len(callSites) != 1 {
+			return
+		}
+	}
+	es.Details[r] = detail
+	es.CallSites[r] = callSites
+	es.Omitted[r] += omitted
+}
+
+// Add adds a single escape.
+func (es *Escapes) Add(r EscapeReason, detail string, callSites ...CallSite) {
+	es.add(r, detail, 0, callSites...)
+}
+
+// IsEmpty returns true iff this Escapes is empty.
+func (es *Escapes) IsEmpty() bool {
+	for _, cs := range es.CallSites {
+		if cs != nil {
+			return false
+		}
 	}
 	return true
 }
 
+// Filter filters out all escapes except those matches the given reasons.
+//
+// If local is set, then non-local escapes will also be filtered.
+func (es *Escapes) Filter(reasons []EscapeReason, local bool) {
+FilterReasons:
+	for r := EscapeReason(0); r < reasonCount; r++ {
+		for i := 0; i < len(reasons); i++ {
+			if r == reasons[i] {
+				continue FilterReasons
+			}
+		}
+		// Zap this reason.
+		es.CallSites[r] = nil
+		es.Details[r] = ""
+		es.Omitted[r] = 0
+	}
+	if !local {
+		return
+	}
+	for r := EscapeReason(0); r < reasonCount; r++ {
+		// Is does meet our local requirement?
+		if len(es.CallSites[r]) > 1 {
+			es.CallSites[r] = nil
+			es.Details[r] = ""
+			es.Omitted[r] = 0
+		}
+	}
+}
+
+// MergeWithCall merges these escapes with another.
+//
+// If callSite is nil, no call is added.
+func (es *Escapes) MergeWithCall(other Escapes, callSite CallSite) {
+	for r := EscapeReason(0); r < reasonCount; r++ {
+		if other.CallSites[r] != nil {
+			// Construct our new call chain.
+			newCallSites := other.CallSites[r]
+			if callSite.IsValid() {
+				newCallSites = append([]CallSite{callSite}, newCallSites...)
+			}
+			// Add (potentially replacing) the underlying escape.
+			es.add(r, other.Details[r], other.Omitted[r], newCallSites...)
+		}
+	}
+}
+
+// Reportf will call Reportf for each class of escapes.
+func (es *Escapes) Reportf(pass *analysis.Pass) {
+	var b bytes.Buffer // Reused for all escapes.
+	for r := EscapeReason(0); r < reasonCount; r++ {
+		if es.CallSites[r] == nil {
+			continue
+		}
+		b.Reset()
+		fmt.Fprintf(&b, "%s ", r.String())
+		if es.Omitted[r] > 0 {
+			fmt.Fprintf(&b, "(%d omitted) ", es.Omitted[r])
+		}
+		for _, cs := range es.CallSites[r][1:] {
+			fmt.Fprintf(&b, "→ %s ", cs.Resolved.String())
+		}
+		fmt.Fprintf(&b, "→ %s", es.Details[r])
+		pass.Reportf(es.CallSites[r][0].LocalPos, b.String())
+	}
+}
+
+// MergeAll merges a sequence of escapes.
+func MergeAll(others []Escapes) (es Escapes) {
+	for _, other := range others {
+		es.MergeWithCall(other, CallSite{})
+	}
+	return
+}
+
 // loadObjdump reads the objdump output.
 //
 // This records if there is a call any function for every source line. It is
 // used only to remove false positives for escape analysis. The call will be
 // elided if escape analysis is able to put the object on the heap exclusively.
-func loadObjdump() (map[LinePosition]string, error) {
-	cmd, out, err := dump.Command()
+//
+// Note that the map uses <basename.go>:<line> because that is all that is
+// provided in the objdump format. Since this is all local, it is sufficient.
+func loadObjdump() (map[string][]string, error) {
+	var (
+		args  []string
+		stdin io.Reader
+	)
+	if *binary != "" {
+		args = append(args, *binary)
+	} else if Reader != nil {
+		stdin = Reader
+	} else {
+		// We have no input stream or binary.
+		return nil, fmt.Errorf("no binary or reader provided")
+	}
+
+	// Construct our command.
+	cmd := exec.Command(*tool, args...)
+	cmd.Stdin = stdin
+	cmd.Stderr = os.Stderr
+	out, err := cmd.StdoutPipe()
 	if err != nil {
 		return nil, err
 	}
+	if err := cmd.Start(); err != nil {
+		return nil, err
+	}
 
 	// Build the map.
-	m := make(map[LinePosition]string)
+	m := make(map[string][]string)
 	r := bufio.NewReader(out)
-	var (
-		lastField string
-		lastPos   LinePosition
-	)
+NextLine:
 	for {
 		line, err := r.ReadString('\n')
 		if err != nil && err != io.EOF {
@@ -286,41 +421,62 @@ func loadObjdump() (map[LinePosition]string, error) {
 			if !strings.Contains(fields[3], "CALL") {
 				continue
 			}
+			site := strings.TrimSpace(fields[0])
+			var callStr string // Friendly string.
+			if len(fields) > 5 {
+				callStr = strings.Join(fields[5:], " ")
+			}
+			if len(callStr) == 0 {
+				// Just a raw call? is this asm?
+				callStr = strings.Join(fields[3:], " ")
+			}
 
 			// Ignore strings containing duffzero, which is just
 			// used by stack allocations for types that are large
 			// enough to warrant Duff's device.
-			if strings.Contains(line, "runtime.duffzero") {
+			if strings.Contains(callStr, "runtime.duffzero") ||
+				strings.Contains(callStr, "runtime.duffcopy") {
 				continue
 			}
 
 			// Ignore the racefuncenter call, which is used for
 			// race builds. This does not escape.
-			if strings.Contains(line, "runtime.racefuncenter") {
+			if strings.Contains(callStr, "runtime.racefuncenter") {
 				continue
 			}
 
-			// Calculate the filename and line. Note that per the
-			// example above, the filename is not a fully qualified
-			// base, just the basename (what we require).
-			if fields[0] != lastField {
-				parts := strings.SplitN(fields[0], ":", 2)
-				lineNum, err := strconv.ParseInt(parts[1], 10, 64)
-				if err != nil {
-					return nil, err
-				}
-				lastPos = LinePosition{
-					Filename: parts[0],
-					Line:     int(lineNum),
-				}
-				lastField = fields[0]
+			// Ignore the write barriers.
+			if strings.Contains(callStr, "runtime.gcWriteBarrier") {
+				continue
 			}
-			if _, ok := m[lastPos]; ok {
-				continue // Already marked.
+
+			// Ignore retpolines.
+			if strings.Contains(callStr, "runtime.retpoline") {
+				continue
 			}
 
-			// Save the actual call for the detail.
-			m[lastPos] = strings.Join(fields[3:], " ")
+			// Ignore stack sanity check (does not split).
+			if strings.Contains(callStr, "runtime.stackcheck") {
+				continue
+			}
+
+			// Ignore tls functions.
+			if strings.Contains(callStr, "runtime.settls") {
+				continue
+			}
+
+			// Does this exist already?
+			existing, ok := m[site]
+			if !ok {
+				existing = make([]string, 0, 1)
+			}
+			for _, other := range existing {
+				if callStr == other {
+					continue NextLine
+				}
+			}
+			existing = append(existing, callStr)
+			m[site] = existing // Update.
 		}
 		if err == io.EOF {
 			break
@@ -340,65 +496,148 @@ type poser interface {
 	Pos() token.Pos
 }
 
+// runSelectEscapes runs with only select escapes.
+func runSelectEscapes(pass *analysis.Pass) (interface{}, error) {
+	return run(pass, false)
+}
+
+// runAllEscapes runs with all escapes included.
+func runAllEscapes(pass *analysis.Pass) (interface{}, error) {
+	return run(pass, true)
+}
+
+// findReasons extracts reasons from the function.
+func findReasons(pass *analysis.Pass, fdecl *ast.FuncDecl) ([]EscapeReason, bool, map[EscapeReason]bool) {
+	// Is there a comment?
+	if fdecl.Doc == nil {
+		return nil, false, nil
+	}
+	var (
+		reasons     []EscapeReason
+		local       bool
+		testReasons = make(map[EscapeReason]bool) // reason -> local?
+	)
+	// Scan all lines.
+	found := false
+	for _, c := range fdecl.Doc.List {
+		// Does the comment contain a +checkescape line?
+		if !strings.HasPrefix(c.Text, magic) && !strings.HasPrefix(c.Text, testMagic) {
+			continue
+		}
+		if c.Text == magic {
+			// Default: hard reasons, local only.
+			reasons = hardReasons
+			local = true
+		} else if strings.HasPrefix(c.Text, magicParams) {
+			// Extract specific reasons.
+			types := strings.Split(c.Text[len(magicParams):], ",")
+			found = true // For below.
+			for i := 0; i < len(types); i++ {
+				if types[i] == "local" {
+					// Limit search to local escapes.
+					local = true
+				} else if types[i] == "all" {
+					// Append all reasons.
+					reasons = append(reasons, allReasons...)
+				} else if types[i] == "hard" {
+					// Append all hard reasons.
+					reasons = append(reasons, hardReasons...)
+				} else {
+					r, ok := escapeTypes[types[i]]
+					if !ok {
+						// This is not a valid escape reason.
+						pass.Reportf(fdecl.Pos(), "unknown reason: %v", types[i])
+						continue
+					}
+					reasons = append(reasons, r)
+				}
+			}
+		} else if strings.HasPrefix(c.Text, testMagic) {
+			types := strings.Split(c.Text[len(testMagic):], ",")
+			local := false
+			for i := 0; i < len(types); i++ {
+				if types[i] == "local" {
+					local = true
+				} else {
+					r, ok := escapeTypes[types[i]]
+					if !ok {
+						// This is not a valid escape reason.
+						pass.Reportf(fdecl.Pos(), "unknown reason: %v", types[i])
+						continue
+					}
+					if v, ok := testReasons[r]; ok && v {
+						// Already registered as local.
+						continue
+					}
+					testReasons[r] = local
+				}
+			}
+		}
+	}
+	if len(reasons) == 0 && found {
+		// A magic annotation was provided, but no reasons.
+		pass.Reportf(fdecl.Pos(), "no reasons provided")
+	}
+	return reasons, local, testReasons
+}
+
 // run performs the analysis.
-func run(pass *analysis.Pass) (interface{}, error) {
+func run(pass *analysis.Pass, localEscapes bool) (interface{}, error) {
 	calls, err := loadObjdump()
 	if err != nil {
 		return nil, err
 	}
-	pef := packageEscapeFacts{
-		Funcs: make(map[string][]Escape),
-	}
+	allEscapes := make(map[string][]Escapes)
+	mergedEscapes := make(map[string]Escapes)
 	linePosition := func(inst, parent poser) LinePosition {
 		p := pass.Fset.Position(inst.Pos())
 		if (p.Filename == "" || p.Line == 0) && parent != nil {
 			p = pass.Fset.Position(parent.Pos())
 		}
 		return LinePosition{
-			Filename: filepath.Base(p.Filename),
+			Filename: p.Filename,
 			Line:     p.Line,
 		}
 	}
-	hasCall := func(inst poser) (string, bool) {
-		p := linePosition(inst, nil)
-		s, ok := calls[p]
-		return s, ok
-	}
 	callSite := func(inst ssa.Instruction) CallSite {
 		return CallSite{
 			LocalPos: inst.Pos(),
 			Resolved: linePosition(inst, inst.Parent()),
 		}
 	}
-	escapes := func(reason EscapeReason, detail string, inst ssa.Instruction, ec *EscapeCount) []Escape {
-		if !ec.Record(reason) {
-			return nil // Skip.
-		}
-		es := Escape{
-			Reason: reason,
-			Detail: detail,
-			Chain:  []CallSite{callSite(inst)},
+	hasCall := func(inst poser) (string, bool) {
+		p := linePosition(inst, nil)
+		s, ok := calls[p.Simplified()]
+		if !ok {
+			return "", false
 		}
-		return []Escape{es}
+		// Join all calls together.
+		return strings.Join(s, " or "), true
 	}
-	resolve := func(sub []Escape, inst ssa.Instruction, ec *EscapeCount) (es []Escape) {
-		for _, e := range sub {
-			if !ec.Record(e.Reason) {
-				continue // Skip.
+	state := pass.ResultOf[buildssa.Analyzer].(*buildssa.SSA)
+
+	// Build the exception list.
+	exemptions := make(map[LinePosition]string)
+	for _, f := range pass.Files {
+		for _, cg := range f.Comments {
+			for _, c := range cg.List {
+				p := pass.Fset.Position(c.Slash)
+				if strings.HasPrefix(strings.ToLower(c.Text), exempt) {
+					exemptions[LinePosition{
+						Filename: p.Filename,
+						Line:     p.Line,
+					}] = c.Text[len(exempt):]
+				}
 			}
-			es = append(es, Escape{
-				Reason: e.Reason,
-				Detail: e.Detail,
-				Chain:  append([]CallSite{callSite(inst)}, e.Chain...),
-			})
 		}
-		return es
 	}
-	state := pass.ResultOf[buildssa.Analyzer].(*buildssa.SSA)
-
-	var loadFunc func(*ssa.Function) []Escape // Used below.
 
-	analyzeInstruction := func(inst ssa.Instruction, ec *EscapeCount) []Escape {
+	var loadFunc func(*ssa.Function) Escapes // Used below.
+	analyzeInstruction := func(inst ssa.Instruction) (es Escapes) {
+		cs := callSite(inst)
+		if _, ok := exemptions[cs.Resolved]; ok {
+			return // No escape.
+		}
 		switch x := inst.(type) {
 		case *ssa.Call:
 			if x.Call.IsInvoke() {
@@ -407,13 +646,15 @@ func run(pass *analysis.Pass) (interface{}, error) {
 				// not, since we don't know the underlying
 				// type.
 				call, _ := hasCall(inst)
-				return escapes(interfaceInvoke, call, inst, ec)
+				es.Add(interfaceInvoke, call, cs)
+				return
 			}
 			switch x := x.Call.Value.(type) {
 			case *ssa.Function:
 				if x.Pkg == nil {
 					// Can't resolve the package.
-					return escapes(unknownPackage, "no package", inst, ec)
+					es.Add(unknownPackage, "no package", cs)
+					return
 				}
 
 				// Is this a local function? If yes, call the
@@ -421,7 +662,8 @@ func run(pass *analysis.Pass) (interface{}, error) {
 				// local escapes are the escapes found in the
 				// local function.
 				if x.Pkg.Pkg == pass.Pkg {
-					return resolve(loadFunc(x), inst, ec)
+					es.MergeWithCall(loadFunc(x), cs)
+					return
 				}
 
 				// Recursively collect information from
@@ -430,22 +672,26 @@ func run(pass *analysis.Pass) (interface{}, error) {
 				if !pass.ImportPackageFact(x.Pkg.Pkg, &imp) {
 					// Unable to import the dependency; we must
 					// declare these as escaping.
-					return escapes(unknownPackage, "no analysis", inst, ec)
+					es.Add(unknownPackage, "no analysis", cs)
+					return
 				}
 
 				// The escapes of this instruction are the
 				// escapes of the called function directly.
-				return resolve(imp.Funcs[x.RelString(x.Pkg.Pkg)], inst, ec)
+				// Note that this may record many escapes.
+				es.MergeWithCall(imp.Funcs[x.RelString(x.Pkg.Pkg)], cs)
+				return
 			case *ssa.Builtin:
 				// Ignore elided escapes.
 				if _, has := hasCall(inst); !has {
-					return nil
+					return
 				}
 
 				// Check if the builtin is escaping.
 				for _, name := range escapingBuiltins {
 					if x.Name() == name {
-						return escapes(builtin, name, inst, ec)
+						es.Add(builtin, name, cs)
+						return
 					}
 				}
 			default:
@@ -454,82 +700,87 @@ func run(pass *analysis.Pass) (interface{}, error) {
 				// dispatches. We cannot actually look up what
 				// this refers to using static analysis alone.
 				call, _ := hasCall(inst)
-				return escapes(dynamicCall, call, inst, ec)
+				es.Add(dynamicCall, call, cs)
 			}
 		case *ssa.Alloc:
 			// Ignore non-heap allocations.
 			if !x.Heap {
-				return nil
+				return
 			}
 
 			// Ignore elided escapes.
 			call, has := hasCall(inst)
 			if !has {
-				return nil
+				return
 			}
 
 			// This is a real heap allocation.
-			return escapes(allocation, call, inst, ec)
+			es.Add(allocation, call, cs)
 		case *ssa.MakeMap:
-			return escapes(builtin, "makemap", inst, ec)
+			es.Add(builtin, "makemap", cs)
 		case *ssa.MakeSlice:
-			return escapes(builtin, "makeslice", inst, ec)
+			es.Add(builtin, "makeslice", cs)
 		case *ssa.MakeClosure:
-			return escapes(builtin, "makeclosure", inst, ec)
+			es.Add(builtin, "makeclosure", cs)
 		case *ssa.MakeChan:
-			return escapes(builtin, "makechan", inst, ec)
+			es.Add(builtin, "makechan", cs)
 		}
-		return nil // No escapes.
+		return
 	}
 
-	var analyzeBasicBlock func(*ssa.BasicBlock, *EscapeCount) []Escape // Recursive.
-	analyzeBasicBlock = func(block *ssa.BasicBlock, ec *EscapeCount) (rval []Escape) {
+	var analyzeBasicBlock func(*ssa.BasicBlock) []Escapes // Recursive.
+	analyzeBasicBlock = func(block *ssa.BasicBlock) (rval []Escapes) {
 		for _, inst := range block.Instrs {
-			rval = append(rval, analyzeInstruction(inst, ec)...)
+			if es := analyzeInstruction(inst); !es.IsEmpty() {
+				rval = append(rval, es)
+			}
 		}
-		return rval // N.B. may be empty.
+		return
 	}
 
-	loadFunc = func(fn *ssa.Function) []Escape {
+	loadFunc = func(fn *ssa.Function) Escapes {
 		// Is this already available?
 		name := fn.RelString(pass.Pkg)
-		if es, ok := pef.Funcs[name]; ok {
+		if es, ok := mergedEscapes[name]; ok {
 			return es
 		}
 
 		// In the case of a true cycle, we assume that the current
-		// function itself has no escapes until the rest of the
-		// analysis is complete. This will trip the above in the case
-		// of a cycle of any kind.
-		pef.Funcs[name] = nil
+		// function itself has no escapes.
+		//
+		// When evaluating the function again, the proper escapes will
+		// be filled in here.
+		allEscapes[name] = nil
+		mergedEscapes[name] = Escapes{}
 
 		// Perform the basic analysis.
-		var (
-			es []Escape
-			ec EscapeCount
-		)
+		var es []Escapes
 		if fn.Recover != nil {
-			es = append(es, analyzeBasicBlock(fn.Recover, &ec)...)
+			es = append(es, analyzeBasicBlock(fn.Recover)...)
 		}
 		for _, block := range fn.Blocks {
-			es = append(es, analyzeBasicBlock(block, &ec)...)
+			es = append(es, analyzeBasicBlock(block)...)
 		}
 
 		// Check for a stack split.
 		if call, has := hasCall(fn); has {
-			es = append(es, Escape{
-				Reason: stackSplit,
-				Detail: call,
-				Chain: []CallSite{CallSite{
-					LocalPos: fn.Pos(),
-					Resolved: linePosition(fn, fn.Parent()),
-				}},
+			var ss Escapes
+			ss.Add(stackSplit, call, CallSite{
+				LocalPos: fn.Pos(),
+				Resolved: linePosition(fn, fn.Parent()),
 			})
+			es = append(es, ss)
 		}
 
 		// Save the result and return.
-		pef.Funcs[name] = es
-		return es
+		//
+		// Note that we merge the result when saving to the facts. It
+		// doesn't really matter the specific escapes, as long as we
+		// have recorded all the appropriate classes of escapes.
+		summary := MergeAll(es)
+		allEscapes[name] = es
+		mergedEscapes[name] = summary
+		return summary
 	}
 
 	// Complete all local functions.
@@ -537,173 +788,76 @@ func run(pass *analysis.Pass) (interface{}, error) {
 		loadFunc(fn)
 	}
 
-	// Build the exception list.
-	exemptions := make(map[LinePosition]string)
-	for _, f := range pass.Files {
-		for _, cg := range f.Comments {
-			for _, c := range cg.List {
-				p := pass.Fset.Position(c.Slash)
-				if strings.HasPrefix(strings.ToLower(c.Text), exempt) {
-					exemptions[LinePosition{
-						Filename: filepath.Base(p.Filename),
-						Line:     p.Line,
-					}] = c.Text[len(exempt):]
-				}
-			}
-		}
+	if !localEscapes {
+		// Export all findings for future packages. We only do this in
+		// non-local escapes mode, and expect to run this analysis
+		// after the SelectAnalysis.
+		pass.ExportPackageFact(&packageEscapeFacts{
+			Funcs: mergedEscapes,
+		})
 	}
 
-	// Delete everything matching the excemtions.
-	//
-	// This has the implication that exceptions are applied recursively,
-	// since this now modified set is what will be saved.
-	for name, escapes := range pef.Funcs {
-		var newEscapes []Escape
-		for _, escape := range escapes {
-			isExempt := false
-			for line, _ := range exemptions {
-				// Note that an exemption applies if it is
-				// marked as an exemption anywhere in the call
-				// chain. It need not be marked as escapes in
-				// the function itself, nor in the top-level
-				// caller.
-				for _, callSite := range escape.Chain {
-					if callSite.Resolved == line {
-						isExempt = true
-						break
-					}
-				}
-				if isExempt {
-					break
-				}
-			}
-			if !isExempt {
-				// Record this escape; not an exception.
-				newEscapes = append(newEscapes, escape)
-			}
-		}
-		pef.Funcs[name] = newEscapes // Update.
-	}
-
-	// Export all findings for future packages.
-	pass.ExportPackageFact(&pef)
-
 	// Scan all functions for violations.
 	for _, f := range pass.Files {
 		// Scan all declarations.
 		for _, decl := range f.Decls {
-			fdecl, ok := decl.(*ast.FuncDecl)
 			// Function declaration?
+			fdecl, ok := decl.(*ast.FuncDecl)
 			if !ok {
 				continue
 			}
-			// Is there a comment?
-			if fdecl.Doc == nil {
-				continue
-			}
 			var (
 				reasons     []EscapeReason
-				found       bool
 				local       bool
-				testReasons = make(map[EscapeReason]bool) // reason -> local?
+				testReasons map[EscapeReason]bool
 			)
-			// Does the comment contain a +checkescape line?
-			for _, c := range fdecl.Doc.List {
-				if !strings.HasPrefix(c.Text, magic) && !strings.HasPrefix(c.Text, testMagic) {
-					continue
-				}
-				if c.Text == magic {
-					// Default: hard reasons, local only.
-					reasons = hardReasons
-					local = true
-				} else if strings.HasPrefix(c.Text, magicParams) {
-					// Extract specific reasons.
-					types := strings.Split(c.Text[len(magicParams):], ",")
-					found = true // For below.
-					for i := 0; i < len(types); i++ {
-						if types[i] == "local" {
-							// Limit search to local escapes.
-							local = true
-						} else if types[i] == "all" {
-							// Append all reasons.
-							reasons = append(reasons, allReasons...)
-						} else if types[i] == "hard" {
-							// Append all hard reasons.
-							reasons = append(reasons, hardReasons...)
-						} else {
-							r, ok := escapeTypes[types[i]]
-							if !ok {
-								// This is not a valid escape reason.
-								pass.Reportf(fdecl.Pos(), "unknown reason: %v", types[i])
-								continue
-							}
-							reasons = append(reasons, r)
-						}
-					}
-				} else if strings.HasPrefix(c.Text, testMagic) {
-					types := strings.Split(c.Text[len(testMagic):], ",")
-					local := false
-					for i := 0; i < len(types); i++ {
-						if types[i] == "local" {
-							local = true
-						} else {
-							r, ok := escapeTypes[types[i]]
-							if !ok {
-								// This is not a valid escape reason.
-								pass.Reportf(fdecl.Pos(), "unknown reason: %v", types[i])
-								continue
-							}
-							if v, ok := testReasons[r]; ok && v {
-								// Already registered as local.
-								continue
-							}
-							testReasons[r] = local
-						}
-					}
-				}
-			}
-			if len(reasons) == 0 && found {
-				// A magic annotation was provided, but no reasons.
-				pass.Reportf(fdecl.Pos(), "no reasons provided")
-				continue
+			if localEscapes {
+				// Find all hard escapes.
+				reasons = hardReasons
+			} else {
+				// Find all declared reasons.
+				reasons, local, testReasons = findReasons(pass, fdecl)
 			}
 
 			// Scan for matches.
 			fn := pass.TypesInfo.Defs[fdecl.Name].(*types.Func)
-			name := state.Pkg.Prog.FuncValue(fn).RelString(pass.Pkg)
-			es, ok := pef.Funcs[name]
-			if !ok {
+			fv := state.Pkg.Prog.FuncValue(fn)
+			if fv == nil {
+				continue
+			}
+			name := fv.RelString(pass.Pkg)
+			all, allOk := allEscapes[name]
+			merged, mergedOk := mergedEscapes[name]
+			if !allOk || !mergedOk {
 				pass.Reportf(fdecl.Pos(), "internal error: function %s not found.", name)
 				continue
 			}
-			for _, e := range es {
-				for _, r := range reasons {
-					// Is does meet our local requirement?
-					if local && len(e.Chain) > 1 {
-						continue
-					}
-					// Does this match the reason? Emit
-					// with a full stack trace that
-					// explains why this violates our
-					// constraints.
-					if e.Reason == r {
-						pass.Reportf(e.Chain[0].LocalPos, "%s", e.String())
-					}
-				}
+
+			// Filter reasons and report.
+			//
+			// For the findings, we use all escapes.
+			for _, es := range all {
+				es.Filter(reasons, local)
+				es.Reportf(pass)
 			}
 
 			// Scan for test (required) matches.
+			//
+			// For tests we need only the merged escapes.
 			testReasonsFound := make(map[EscapeReason]bool)
-			for _, e := range es {
+			for r := EscapeReason(0); r < reasonCount; r++ {
+				if merged.CallSites[r] == nil {
+					continue
+				}
 				// Is this local?
-				local, ok := testReasons[e.Reason]
-				wantLocal := len(e.Chain) == 1
-				testReasonsFound[e.Reason] = wantLocal
+				wantLocal, ok := testReasons[r]
+				isLocal := len(merged.CallSites[r]) == 1
+				testReasonsFound[r] = isLocal
 				if !ok {
 					continue
 				}
-				if local == wantLocal {
-					delete(testReasons, e.Reason)
+				if isLocal == wantLocal {
+					delete(testReasons, r)
 				}
 			}
 			for reason, local := range testReasons {
@@ -711,10 +865,8 @@ func run(pass *analysis.Pass) (interface{}, error) {
 				pass.Reportf(fdecl.Pos(), fmt.Sprintf("testescapes not found: reason=%s, local=%t", reason, local))
 			}
 			if len(testReasons) > 0 {
-				// Dump all reasons found to help in debugging.
-				for _, e := range es {
-					pass.Reportf(e.Chain[0].LocalPos, "escape found: %s", e.String())
-				}
+				// Report for debugging.
+				merged.Reportf(pass)
 			}
 		}
 	}
diff --git a/tools/checkescape/test1/test1.go b/tools/checkescape/test1/test1.go
index a1d36459fc..27991649f8 100644
--- a/tools/checkescape/test1/test1.go
+++ b/tools/checkescape/test1/test1.go
@@ -175,6 +175,7 @@ func Split() {
 
 // +mustescape:stack
 //go:noinline
+//go:nosplit
 func splitRec() {
 	Split()
 }
diff --git a/tools/checkescape/test2/test2.go b/tools/checkescape/test2/test2.go
index 2d5865f474..067d5a1f4c 100644
--- a/tools/checkescape/test2/test2.go
+++ b/tools/checkescape/test2/test2.go
@@ -83,6 +83,7 @@ func dynamicCrossPkg(f func()) {
 
 // +mustescape:stack
 //go:noinline
+//go:nosplit
 func splitCrosssPkt() {
 	test1.Split()
 }
diff --git a/tools/go_marshal/test/escape/escape.go b/tools/go_marshal/test/escape/escape.go
index 6a46ddbf80..3a1a64e9cd 100644
--- a/tools/go_marshal/test/escape/escape.go
+++ b/tools/go_marshal/test/escape/escape.go
@@ -64,6 +64,7 @@ func doCopyOut(t *dummyTask) {
 
 // +mustescape:builtin
 // +mustescape:stack
+//go:nosplit
 func doMarshalBytesDirect(t *dummyTask) {
 	var stat test.Stat
 	buf := t.CopyScratchBuffer(stat.SizeBytes())
@@ -73,6 +74,7 @@ func doMarshalBytesDirect(t *dummyTask) {
 
 // +mustescape:builtin
 // +mustescape:stack
+//go:nosplit
 func doMarshalUnsafeDirect(t *dummyTask) {
 	var stat test.Stat
 	buf := t.CopyScratchBuffer(stat.SizeBytes())
@@ -82,6 +84,7 @@ func doMarshalUnsafeDirect(t *dummyTask) {
 
 // +mustescape:local,heap
 // +mustescape:stack
+//go:nosplit
 func doMarshalBytesViaMarshallable(t *dummyTask) {
 	var stat test.Stat
 	t.MarshalBytes(usermem.Addr(0xf000ba12), &stat)
@@ -89,6 +92,7 @@ func doMarshalBytesViaMarshallable(t *dummyTask) {
 
 // +mustescape:local,heap
 // +mustescape:stack
+//go:nosplit
 func doMarshalUnsafeViaMarshallable(t *dummyTask) {
 	var stat test.Stat
 	t.MarshalUnsafe(usermem.Addr(0xf000ba12), &stat)
diff --git a/tools/nogo/BUILD b/tools/nogo/BUILD
index fb35c5ffd3..9f1fcd9c74 100644
--- a/tools/nogo/BUILD
+++ b/tools/nogo/BUILD
@@ -27,7 +27,6 @@ go_library(
     deps = [
         "//tools/checkescape",
         "//tools/checkunsafe",
-        "//tools/nogo/dump",
         "@org_golang_x_tools//go/analysis:go_tool_library",
         "@org_golang_x_tools//go/analysis/internal/facts:go_tool_library",
         "@org_golang_x_tools//go/analysis/passes/asmdecl:go_tool_library",
diff --git a/tools/nogo/config.go b/tools/nogo/config.go
index 451cd4a4ca..cfe7b4aa4f 100644
--- a/tools/nogo/config.go
+++ b/tools/nogo/config.go
@@ -122,3 +122,8 @@ var analyzerConfig = map[*analysis.Analyzer]matcher{
 	checkescape.Analyzer: internalMatches(),
 	checkunsafe.Analyzer: internalMatches(),
 }
+
+var escapesConfig = map[*analysis.Analyzer]matcher{
+	// Informational only: include all packages.
+	checkescape.EscapeAnalyzer: alwaysMatches(),
+}
diff --git a/tools/nogo/defs.bzl b/tools/nogo/defs.bzl
index 963084d532..4804380474 100644
--- a/tools/nogo/defs.bzl
+++ b/tools/nogo/defs.bzl
@@ -50,6 +50,7 @@ NogoStdlibInfo = provider(
     "information for nogo analysis (standard library facts)",
     fields = {
         "facts": "serialized standard library facts",
+        "findings": "package findings (if relevant)",
     },
 )
 
@@ -59,18 +60,18 @@ def _nogo_stdlib_impl(ctx):
 
     # Build the standard library facts.
     facts = ctx.actions.declare_file(ctx.label.name + ".facts")
+    findings = ctx.actions.declare_file(ctx.label.name + ".findings")
     config = struct(
         Srcs = [f.path for f in go_ctx.stdlib_srcs],
         GOOS = go_ctx.goos,
         GOARCH = go_ctx.goarch,
         Tags = go_ctx.tags,
-        FactOutput = facts.path,
     )
     config_file = ctx.actions.declare_file(ctx.label.name + ".cfg")
     ctx.actions.write(config_file, config.to_json())
     ctx.actions.run(
         inputs = [config_file] + go_ctx.stdlib_srcs,
-        outputs = [facts],
+        outputs = [facts, findings],
         tools = depset(go_ctx.runfiles.to_list() + ctx.files._dump_tool),
         executable = ctx.files._nogo[0],
         mnemonic = "GoStandardLibraryAnalysis",
@@ -78,12 +79,15 @@ def _nogo_stdlib_impl(ctx):
         arguments = go_ctx.nogo_args + [
             "-dump_tool=%s" % ctx.files._dump_tool[0].path,
             "-stdlib=%s" % config_file.path,
+            "-findings=%s" % findings.path,
+            "-facts=%s" % facts.path,
         ],
     )
 
     # Return the stdlib facts as output.
     return [NogoStdlibInfo(
         facts = facts,
+        findings = findings,
     )]
 
 nogo_stdlib = go_rule(
@@ -108,6 +112,7 @@ NogoInfo = provider(
     "information for nogo analysis",
     fields = {
         "facts": "serialized package facts",
+        "findings": "package findings (if relevant)",
         "importpath": "package import path",
         "binaries": "package binary files",
         "srcs": "original source files (for go_test support)",
@@ -203,6 +208,8 @@ def _nogo_aspect_impl(target, ctx):
 
     # The nogo tool operates on a configuration serialized in JSON format.
     facts = ctx.actions.declare_file(target.label.name + ".facts")
+    findings = ctx.actions.declare_file(target.label.name + ".findings")
+    escapes = ctx.actions.declare_file(target.label.name + ".escapes")
     config = struct(
         ImportPath = importpath,
         GoFiles = [src.path for src in srcs if src.path.endswith(".go")],
@@ -213,14 +220,13 @@ def _nogo_aspect_impl(target, ctx):
         FactMap = fact_map,
         ImportMap = import_map,
         StdlibFacts = stdlib_facts.path,
-        FactOutput = facts.path,
     )
     config_file = ctx.actions.declare_file(target.label.name + ".cfg")
     ctx.actions.write(config_file, config.to_json())
     inputs.append(config_file)
     ctx.actions.run(
         inputs = inputs,
-        outputs = [facts],
+        outputs = [facts, findings, escapes],
         tools = depset(go_ctx.runfiles.to_list() + ctx.files._dump_tool),
         executable = ctx.files._nogo[0],
         mnemonic = "GoStaticAnalysis",
@@ -229,17 +235,30 @@ def _nogo_aspect_impl(target, ctx):
             "-binary=%s" % target_objfile.path,
             "-dump_tool=%s" % ctx.files._dump_tool[0].path,
             "-package=%s" % config_file.path,
+            "-findings=%s" % findings.path,
+            "-facts=%s" % facts.path,
+            "-escapes=%s" % escapes.path,
         ],
     )
 
     # Return the package facts as output.
-    return [NogoInfo(
-        facts = facts,
-        importpath = importpath,
-        binaries = binaries,
-        srcs = srcs,
-        deps = deps,
-    )]
+    return [
+        NogoInfo(
+            facts = facts,
+            findings = findings,
+            importpath = importpath,
+            binaries = binaries,
+            srcs = srcs,
+            deps = deps,
+        ),
+        OutputGroupInfo(
+            # Expose all findings (should just be a single file). This can be
+            # used for build analysis of the nogo findings.
+            nogo_findings = depset([findings]),
+            # Expose all escape analysis findings (see above).
+            nogo_escapes = depset([escapes]),
+        ),
+    ]
 
 nogo_aspect = go_rule(
     aspect,
@@ -250,15 +269,9 @@ nogo_aspect = go_rule(
         "embed",
     ],
     attrs = {
-        "_nogo": attr.label(
-            default = "//tools/nogo/check:check",
-        ),
-        "_nogo_stdlib": attr.label(
-            default = "//tools/nogo:stdlib",
-        ),
-        "_dump_tool": attr.label(
-            default = "//tools/nogo:dump_tool",
-        ),
+        "_nogo": attr.label(default = "//tools/nogo/check:check"),
+        "_nogo_stdlib": attr.label(default = "//tools/nogo:stdlib"),
+        "_dump_tool": attr.label(default = "//tools/nogo:dump_tool"),
     },
 )
 
@@ -270,13 +283,26 @@ def _nogo_test_impl(ctx):
     # this way so that any test applied is effectively pushed down to all
     # upstream dependencies through the aspect.
     inputs = []
+    findings = []
     runner = ctx.actions.declare_file("%s-executer" % ctx.label.name)
     runner_content = ["#!/bin/bash"]
     for dep in ctx.attr.deps:
+        # Extract the findings.
         info = dep[NogoInfo]
-        inputs.append(info.facts)
+        inputs.append(info.findings)
+        findings.append(info.findings)
+
+        # Include all source files, transitively. This will make this target
+        # "directly affected" for the purpose of build analysis.
+        inputs += info.srcs
+
+        # If there are findings, dump them and fail.
+        runner_content.append("if [[ -s \"%s\" ]]; then cat \"%s\" && exit 1; fi" % (
+            info.findings.short_path,
+            info.findings.short_path,
+        ))
 
-        # Draw a sweet unicode checkmark with the package name (in green).
+        # Otherwise, draw a sweet unicode checkmark with the package name (in green).
         runner_content.append("echo -e \"\\033[0;32m\\xE2\\x9C\\x94\\033[0;31m\\033[0m %s\"" % info.importpath)
     runner_content.append("exit 0\n")
     ctx.actions.write(runner, "\n".join(runner_content), is_executable = True)
diff --git a/tools/nogo/dump/BUILD b/tools/nogo/dump/BUILD
deleted file mode 100644
index dfa29d6519..0000000000
--- a/tools/nogo/dump/BUILD
+++ /dev/null
@@ -1,10 +0,0 @@
-load("//tools:defs.bzl", "go_library")
-
-package(licenses = ["notice"])
-
-go_library(
-    name = "dump",
-    srcs = ["dump.go"],
-    nogo = False,
-    visibility = ["//tools:__subpackages__"],
-)
diff --git a/tools/nogo/dump/dump.go b/tools/nogo/dump/dump.go
deleted file mode 100644
index f06567e0fd..0000000000
--- a/tools/nogo/dump/dump.go
+++ /dev/null
@@ -1,78 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Package dump contains data dump tools.
-//
-// The interface used by the package corresponds to the tool generated by the
-// nogo_dump_tool rule.
-//
-// This package is separate in order to avoid a dependency cycle.
-package dump
-
-import (
-	"flag"
-	"fmt"
-	"io"
-	"os"
-	"os/exec"
-)
-
-var (
-	// Binary is the binary under analysis.
-	//
-	// See Reader, below.
-	binary = flag.String("binary", "", "binary under analysis")
-
-	// Reader is the input stream.
-	//
-	// This may be set instead of Binary.
-	Reader io.Reader
-
-	// Tool is the tool used to dump a binary.
-	tool = flag.String("dump_tool", "", "tool used to dump a binary")
-)
-
-// Command returns a command that will emit the dumped object on stdout.
-//
-// You must call Wait on the resulting command.
-func Command() (*exec.Cmd, io.Reader, error) {
-	var (
-		args  []string
-		stdin io.Reader
-	)
-	if *binary != "" {
-		args = append(args, *binary)
-		*binary = "" // Clear.
-	} else if Reader != nil {
-		stdin = Reader
-		Reader = nil // Clear.
-	} else {
-		// We have no input stream or binary.
-		return nil, nil, fmt.Errorf("no binary or reader provided!")
-	}
-
-	// Construct our command.
-	cmd := exec.Command(*tool, args...)
-	cmd.Stdin = stdin
-	cmd.Stderr = os.Stderr
-	out, err := cmd.StdoutPipe()
-	if err != nil {
-		return nil, nil, err
-	}
-	if err := cmd.Start(); err != nil {
-		return nil, nil, err
-	}
-
-	return cmd, out, err
-}
diff --git a/tools/nogo/nogo.go b/tools/nogo/nogo.go
index e44f32d4cf..40e48540da 100644
--- a/tools/nogo/nogo.go
+++ b/tools/nogo/nogo.go
@@ -40,18 +40,19 @@ import (
 	"golang.org/x/tools/go/analysis"
 	"golang.org/x/tools/go/analysis/internal/facts"
 	"golang.org/x/tools/go/gcexportdata"
-	"gvisor.dev/gvisor/tools/nogo/dump"
+
+	// Special case: flags live here and change overall behavior.
+	"gvisor.dev/gvisor/tools/checkescape"
 )
 
 // stdlibConfig is serialized as the configuration.
 //
 // This contains everything required for stdlib analysis.
 type stdlibConfig struct {
-	Srcs       []string
-	GOOS       string
-	GOARCH     string
-	Tags       []string
-	FactOutput string
+	Srcs   []string
+	GOOS   string
+	GOARCH string
+	Tags   []string
 }
 
 // packageConfig is serialized as the configuration.
@@ -66,7 +67,6 @@ type packageConfig struct {
 	GOARCH      string
 	ImportMap   map[string]string
 	FactMap     map[string]string
-	FactOutput  string
 	StdlibFacts string
 }
 
@@ -111,14 +111,6 @@ func (c *packageConfig) factLoader() (loader, error) {
 	}, nil
 }
 
-// factSaver may be used directly as a saver.
-func (c *packageConfig) factSaver(factData []byte) error {
-	if c.FactOutput == "" {
-		return nil // Nothing to save.
-	}
-	return ioutil.WriteFile(c.FactOutput, factData, 0644)
-}
-
 // shouldInclude indicates whether the file should be included.
 //
 // NOTE: This does only basic parsing of tags.
@@ -200,9 +192,9 @@ var ErrSkip = errors.New("skipped")
 //
 // Note that not all parts of the source are expected to build. We skip obvious
 // test files, and cmd files, which should not be dependencies.
-func checkStdlib(config *stdlibConfig) ([]string, error) {
+func checkStdlib(config *stdlibConfig, ac map[*analysis.Analyzer]matcher) ([]string, []byte, error) {
 	if len(config.Srcs) == 0 {
-		return nil, nil
+		return nil, nil, nil
 	}
 
 	// Ensure all paths are normalized.
@@ -283,23 +275,21 @@ func checkStdlib(config *stdlibConfig) ([]string, error) {
 		}
 
 		// Provide the input.
-		oldReader := dump.Reader
-		dump.Reader = rc // For analysis.
+		oldReader := checkescape.Reader
+		checkescape.Reader = rc // For analysis.
 		defer func() {
 			rc.Close()
-			dump.Reader = oldReader // Restore.
+			checkescape.Reader = oldReader // Restore.
 		}()
 
 		// Run the analysis.
-		findings, err := checkPackage(config, func(factData []byte) error {
-			stdlibFacts[pkg] = factData
-			return nil
-		}, checkOne)
+		findings, factData, err := checkPackage(config, ac, checkOne)
 		if err != nil {
 			// If we can't analyze a package from the standard library,
 			// then we skip it. It will simply not have any findings.
 			return nil
 		}
+		stdlibFacts[pkg] = factData
 		allFindings = append(allFindings, findings...)
 		return nil
 	}
@@ -316,14 +306,11 @@ func checkStdlib(config *stdlibConfig) ([]string, error) {
 	// Write out all findings.
 	factData, err := json.Marshal(stdlibFacts)
 	if err != nil {
-		return nil, fmt.Errorf("error saving stdlib facts: %w", err)
-	}
-	if err := ioutil.WriteFile(config.FactOutput, factData, 0644); err != nil {
-		return nil, fmt.Errorf("error saving findings to %q: %v", config.FactOutput, err)
+		return nil, nil, fmt.Errorf("error saving stdlib facts: %w", err)
 	}
 
 	// Return all findings.
-	return allFindings, nil
+	return allFindings, factData, nil
 }
 
 // checkPackage runs all analyzers.
@@ -334,7 +321,7 @@ func checkStdlib(config *stdlibConfig) ([]string, error) {
 //
 // [1] bazelbuid/rules_go/tools/builders/nogo_main.go
 // [2] golang.org/x/tools/go/checker/internal/checker
-func checkPackage(config *packageConfig, factSaver saver, importCallback func(string) error) ([]string, error) {
+func checkPackage(config *packageConfig, ac map[*analysis.Analyzer]matcher, importCallback func(string) error) ([]string, []byte, error) {
 	imp := &importer{
 		packageConfig: config,
 		fset:          token.NewFileSet(),
@@ -347,14 +334,14 @@ func checkPackage(config *packageConfig, factSaver saver, importCallback func(st
 	for _, file := range config.GoFiles {
 		include, err := config.shouldInclude(file)
 		if err != nil {
-			return nil, fmt.Errorf("error evaluating file %q: %v", file, err)
+			return nil, nil, fmt.Errorf("error evaluating file %q: %v", file, err)
 		}
 		if !include {
 			continue
 		}
 		s, err := parser.ParseFile(imp.fset, file, nil, parser.ParseComments)
 		if err != nil {
-			return nil, fmt.Errorf("error parsing file %q: %v", file, err)
+			return nil, nil, fmt.Errorf("error parsing file %q: %v", file, err)
 		}
 		syntax = append(syntax, s)
 	}
@@ -372,17 +359,17 @@ func checkPackage(config *packageConfig, factSaver saver, importCallback func(st
 	}
 	types, err := typeConfig.Check(config.ImportPath, imp.fset, syntax, typesInfo)
 	if err != nil && imp.lastErr != ErrSkip {
-		return nil, fmt.Errorf("error checking types: %w", err)
+		return nil, nil, fmt.Errorf("error checking types: %w", err)
 	}
 
 	// Load all package facts.
 	loader, err := config.factLoader()
 	if err != nil {
-		return nil, fmt.Errorf("error loading facts: %w", err)
+		return nil, nil, fmt.Errorf("error loading facts: %w", err)
 	}
 	facts, err := facts.Decode(types, loader)
 	if err != nil {
-		return nil, fmt.Errorf("error decoding facts: %w", err)
+		return nil, nil, fmt.Errorf("error decoding facts: %w", err)
 	}
 
 	// Register fact types and establish dependencies between analyzers.
@@ -404,7 +391,7 @@ func checkPackage(config *packageConfig, factSaver saver, importCallback func(st
 		}
 
 		// Prepare the matcher.
-		m := analyzerConfig[a]
+		m := ac[a]
 		report := func(d analysis.Diagnostic) {
 			if m.ShouldReport(d, imp.fset) {
 				diagnostics[a] = append(diagnostics[a], d)
@@ -445,22 +432,16 @@ func checkPackage(config *packageConfig, factSaver saver, importCallback func(st
 		return nil // Success.
 	}
 
-	// Visit all analysis recursively.
-	for a, _ := range analyzerConfig {
+	// Visit all analyzers recursively.
+	for a, _ := range ac {
 		if imp.lastErr == ErrSkip {
 			continue // No local analysis.
 		}
 		if err := visit(a); err != nil {
-			return nil, err // Already has context.
+			return nil, nil, err // Already has context.
 		}
 	}
 
-	// Write the output file.
-	factData := facts.Encode()
-	if err := factSaver(factData); err != nil {
-		return nil, fmt.Errorf("error: unable to save facts: %v", err)
-	}
-
 	// Convert all diagnostics to strings.
 	findings := make([]string, 0, len(diagnostics))
 	for a, ds := range diagnostics {
@@ -471,12 +452,16 @@ func checkPackage(config *packageConfig, factSaver saver, importCallback func(st
 	}
 
 	// Return all findings.
-	return findings, nil
+	factData := facts.Encode()
+	return findings, factData, nil
 }
 
 var (
-	packageFile = flag.String("package", "", "package configuration file (in JSON format)")
-	stdlibFile  = flag.String("stdlib", "", "stdlib configuration file (in JSON format)")
+	packageFile    = flag.String("package", "", "package configuration file (in JSON format)")
+	stdlibFile     = flag.String("stdlib", "", "stdlib configuration file (in JSON format)")
+	findingsOutput = flag.String("findings", "", "output file (or stdout, if not specified)")
+	factsOutput    = flag.String("facts", "", "output file for facts (optional)")
+	escapesOutput  = flag.String("escapes", "", "output file for escapes (optional)")
 )
 
 func loadConfig(file string, config interface{}) interface{} {
@@ -503,6 +488,7 @@ func Main() {
 
 	var (
 		findings []string
+		factData []byte
 		err      error
 	)
 
@@ -510,15 +496,50 @@ func Main() {
 	if *packageFile != "" && *stdlibFile != "" {
 		log.Fatalf("unable to perform stdlib and package analysis; provide only one!")
 	} else if *stdlibFile != "" {
+		// Perform basic analysis.
 		c := loadConfig(*stdlibFile, new(stdlibConfig)).(*stdlibConfig)
-		findings, err = checkStdlib(c)
+		findings, factData, err = checkStdlib(c, analyzerConfig)
 	} else if *packageFile != "" {
+		// Perform basic analysis.
 		c := loadConfig(*packageFile, new(packageConfig)).(*packageConfig)
-		findings, err = checkPackage(c, c.factSaver, nil)
+		findings, factData, err = checkPackage(c, analyzerConfig, nil)
+		// Do we need to do escape analysis?
+		if *escapesOutput != "" {
+			escapes, _, err := checkPackage(c, escapesConfig, nil)
+			if err != nil {
+				log.Fatalf("error performing escape analysis: %v", err)
+			}
+			f, err := os.OpenFile(*escapesOutput, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644)
+			if err != nil {
+				log.Fatalf("unable to open output %q: %v", *escapesOutput, err)
+			}
+			defer f.Close()
+			for _, escape := range escapes {
+				fmt.Fprintf(f, "%s\n", escape)
+			}
+		}
 	} else {
 		log.Fatalf("please provide at least one of package or stdlib!")
 	}
 
+	// Save facts.
+	if *factsOutput != "" {
+		if err := ioutil.WriteFile(*factsOutput, factData, 0644); err != nil {
+			log.Fatalf("error saving findings to %q: %v", *factsOutput, err)
+		}
+	}
+
+	// Open the output file.
+	var w io.Writer = os.Stdout
+	if *findingsOutput != "" {
+		f, err := os.OpenFile(*findingsOutput, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644)
+		if err != nil {
+			log.Fatalf("unable to open output %q: %v", *findingsOutput, err)
+		}
+		defer f.Close()
+		w = f
+	}
+
 	// Handle findings & errors.
 	if err != nil {
 		log.Fatalf("error checking package: %v", err)
@@ -527,9 +548,8 @@ func Main() {
 		return
 	}
 
-	// Print findings and exit with non-zero code.
+	// Print findings.
 	for _, finding := range findings {
-		fmt.Fprintf(os.Stdout, "%s\n", finding)
+		fmt.Fprintf(w, "%s\n", finding)
 	}
-	os.Exit(1)
 }
diff --git a/tools/nogo/register.go b/tools/nogo/register.go
index 62b499661d..34b173937b 100644
--- a/tools/nogo/register.go
+++ b/tools/nogo/register.go
@@ -26,6 +26,9 @@ func analyzers() (all []*analysis.Analyzer) {
 	for a, _ := range analyzerConfig {
 		all = append(all, a)
 	}
+	for a, _ := range escapesConfig {
+		all = append(all, a)
+	}
 	return all
 }
 

From 307366b64e7dbc7526213d59bf907f4c65f44a40 Mon Sep 17 00:00:00 2001
From: Zach Koopmans <zkoopmans@google.com>
Date: Mon, 31 Aug 2020 17:15:14 -0700
Subject: [PATCH 123/211] Fix bug in bazel build benchmark.

PiperOrigin-RevId: 329409802
---
 test/benchmarks/fs/bazel_test.go | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/test/benchmarks/fs/bazel_test.go b/test/benchmarks/fs/bazel_test.go
index fdbbfe280f..ef1b8e4eac 100644
--- a/test/benchmarks/fs/bazel_test.go
+++ b/test/benchmarks/fs/bazel_test.go
@@ -62,7 +62,7 @@ func runBuildBenchmark(b *testing.B, image, workdir, target string) {
 			container := machine.GetContainer(ctx, b)
 			defer container.CleanUp(ctx)
 
-			// Start a container and sleep by an order of b.N.
+			// Start a container and sleep.
 			if err := container.Spawn(ctx, dockerutil.RunOpts{
 				Image: image,
 			}, "sleep", fmt.Sprintf("%d", 1000000)); err != nil {
@@ -70,12 +70,13 @@ func runBuildBenchmark(b *testing.B, image, workdir, target string) {
 			}
 
 			// If we are running on a tmpfs, copy to /tmp which is a tmpfs.
+			prefix := ""
 			if bm.tmpfs {
 				if out, err := container.Exec(ctx, dockerutil.ExecOpts{},
 					"cp", "-r", workdir, "/tmp/."); err != nil {
 					b.Fatalf("failed to copy directory: %v (%s)", err, out)
 				}
-				workdir = "/tmp" + workdir
+				prefix = "/tmp"
 			}
 
 			// Restart profiles after the copy.
@@ -94,7 +95,7 @@ func runBuildBenchmark(b *testing.B, image, workdir, target string) {
 				b.StartTimer()
 
 				got, err := container.Exec(ctx, dockerutil.ExecOpts{
-					WorkDir: workdir,
+					WorkDir: prefix + workdir,
 				}, "bazel", "build", "-c", "opt", target)
 				if err != nil {
 					b.Fatalf("build failed with: %v", err)
@@ -107,7 +108,7 @@ func runBuildBenchmark(b *testing.B, image, workdir, target string) {
 				}
 				// Clean bazel in case we use b.N.
 				_, err = container.Exec(ctx, dockerutil.ExecOpts{
-					WorkDir: workdir,
+					WorkDir: prefix + workdir,
 				}, "bazel", "clean")
 				if err != nil {
 					b.Fatalf("build failed with: %v", err)

From 08ab6cbdddb4149b6ee3e7d9c59698b5d7db2028 Mon Sep 17 00:00:00 2001
From: Ian Lewis <ianlewis@google.com>
Date: Tue, 1 Sep 2020 01:26:10 -0700
Subject: [PATCH 124/211] Use 1080p background image.

This makes the background image on the top page 1/3 as big and allows it to
load in roughly half the time.

PiperOrigin-RevId: 329462030
---
 website/_sass/front.scss                   |   2 +-
 website/assets/images/background_1080p.jpg | Bin 0 -> 344285 bytes
 2 files changed, 1 insertion(+), 1 deletion(-)
 create mode 100644 website/assets/images/background_1080p.jpg

diff --git a/website/_sass/front.scss b/website/_sass/front.scss
index 0e4208f3cc..f1b0605600 100644
--- a/website/_sass/front.scss
+++ b/website/_sass/front.scss
@@ -1,5 +1,5 @@
 .jumbotron {
-  background-image: url(/assets/images/background.jpg);
+  background-image: url(/assets/images/background_1080p.jpg);
   background-position: center;
   background-repeat: no-repeat;
   background-size: cover;
diff --git a/website/assets/images/background_1080p.jpg b/website/assets/images/background_1080p.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..d312595a679bcc728b131fa3b31e967f8ff40239
GIT binary patch
literal 344285
zcmb5Vhc}$x6F)AU2nj)yAkn+%(ISOqttHy(M7L`6L`^~nR@=q0T7q4@vpR1{lvQI_
zU%jv1d-?i&f9LxL{N_35InR0SxifR;+_`6FUURQTuV%;|sRMujvKu#Ul8IhFWLHG8
zcVxG2{*V6mzI9Ev@813&k>9<0=Pm{L{reQ;6cqO#JbZAUlA4l&f{KQU`r)HTG>`5-
zpnXjH=<)UUqyI6v@xPh3Zj)b6d_+k>dHwhQCtdwVMniu4<-ISrZqSh3q`7g6=El`u
zGS+KV{~wY4U%J-(*6llY?~zkn4=X>qL3Z<6^v&D1@7%h5lZ@=)jhi%Nw9mzEz0u~d
zy#3hy>mBj<UvJA>2e$8W&fcTb*&&y(qJQVXp!*H}go|6!v*PzP7nc7g;l}?bf{g5%
z>;DyUJ^1okn``>t`flC4cZ2N49kOc~&&6opXy3XPA@2V8>-O9DUw6vy(&@Ag%<fom
zO3*WSNM4PTJ-BtvhUOLxnG)F*Svv5&0I1tw`PJ`+ddUL4g~(hNk@^CW2DeLpn|z)z
z)wZw$ZYh>~)8sFQyF_G*VM(k2v2+<r{hIP9XrcK7nJyV_2e0k#xcC;Qok$%cV;KRU
z;?qR}L-mSV19%}d3g_>5MK+q>mqSiJ&4o6g1$L-(adpalF$wM)6~as_r?pt85_p&a
zbulBSi<^Za(u7DRuW2>bhNqc}OBb?L`B!9CXhSBi#S>dL(65=Ey%vwLyy0;%0xA9(
zQM9W<I<TUuwV}(4T+`qr@Y~V6SeX+gC*C?gy}e_FZLG#5K6Qq~tP{a%2Kq61qTeA0
zD1=yPhgx~{y_6=3X9@t!Cq?0EYx~&&yaye<Hh56y!S5m$X(vw!iVpsQC(sY}ZaQx7
z8w#ea=~1Aw+n<V`GkI(_!FpQGvpsw^!s^t8(h(mBIMgg(g9N<`=J81lrb*^)(^SxL
zRZdZt`bwP|fmV_vn0fca)BVgxyGpC489n~~VWvx&6C-1*jGo5nE?C^j1=(@bD;QiC
zlp$hF1H-610lj8zZnV=ugYE<hr*N$%MV0%`xY@J*1fttu+zeb%k#|NI^Y8%x7tv|<
zmJ8THwrn@P24}jY^B+1cx=&qb|1^fGrTOJ|o-&F-ix4#gGxz%j^dgS024qvJBj%?w
zg)+z0G@lqd?do)4U2{^;FWUfDWTv%M<pa5r4SZ2K%M;QXk6qQ;uv?W6>g{v&o&QY<
zD;V*6Kstwblnap7t{R_mRu4tQ9p_9#lk$%WYJzjIo;rTp{5i*@!*>GZ6$S-`)r<5&
zn-Ma;&!fAcYJgJ5;xvNOY4BYHa9884|9%32?ab}4&A+LgS>LHF%~G-V_iAk;M_Ne|
z*n*KT#us;IT>FZw8!Nd{cRLr-%|DEjx)X2G(sIxt=F<a&hW927{BZpue<8D%0-Qzl
zMd!l|`*j&N6Md0S_??|79y@3Mr@#vvUWA|N`?TQR+03(QETo;;{?TepF6jM$qve<7
zv<iIHRH@*R&&mnr=LG0-@SB`8>IvR;=f2*u`%ZFSyahouvQlo_Pe8<<@#0)k;!PnP
zu)rECxD$IFo|F6*wm7G0k9yR7T|i0XJuAvb0y5Vv-paw7=hZ$W^Ct;f>8Lbg*XD)f
znm(<Q(y0_MjiW)DNx2m!t+`<op7KVsV6aBw&ui(zeTf1`Pw`M=4%*R>|E3i-nxU9f
zH(Q{1C>QCbxX4s+pM-$m#yvKiWV@3_%0_>1>3&=LFjKSaf01{G3cu_inUB1T>NW6%
zk%a;Um!m4WRY1JM<Xih3tN2O5%o3_sQ$QIauQoS659?-->J)|H=P6OMCVy4@<c;-c
ztJwgqw#}W%YtFmnxh64nrg_>^vL=x69*7CIU+uOL<*1oT!OK|1wWK62^qr`i42PnC
zM&g+z2acNVFeAyT;hxq(cp`w<+^7A!Q^#*i{5c~n_^l2)C<R+AWmyePw`rfs)ig0!
z{5&u5#1X|u_;XW6qOcB|LwRPk=*OZt_katqrUUzS*yG`0PSGqUAXd&X?K`o(G*=nd
z{aJQ&@bu$`-f<63YD@)UQtm<-`HX%+tp6P(fAMyTV|=b7u}N_oIE0adghu5je0r$h
zduE4dl|JJMegq%dU7!G~hl^!Su&R>Jv;TncJ%1jcHrCd18opWgpjqkzuU7cMALNmQ
z&%aCtnukb?e9VcG7)xVlko0YPGwC(|iwf=BE3yyB`bH!4j@VnihT&dz4U7CZsLBj`
z6q6YrYx>b|VIP-eW2OL4QVi0CxRb1xo~5_V{^}JfEP15*mBw1{kc60{V3Ytm6BBc@
zjwr)=N(Qh*XfS`P!Y`ACyeEYrJxdq^1;1!84SEou=S3*RPu>go9mH^hCo%ar56GFN
z|M%-y(O~}UPyb4G#@Bci>ZjsEsXb%v$9WowU?%xk`e`ZpZU$SvY7L{)xt#FS0DJuv
zd+POxtmUk}V$}Pan+++*mF5&tEiO2zT^p`m@^#u7O~MIyTV?x83`Y0f3VMLwU;8jO
zW%7LWtvMpMg)?x^lqIH@4_A><!S`Ru;?xzH#&eP+*6vlh)vl<H1ow-#EdjxAinI>d
z?!Ou;k9{AgEzrbkS1Gn{W@2-}4f?TPUk3Z7?Y|LAvbJp}X4wwo#bPxkuh}^NZIV^x
zOS6cQ?U)<oYxS)O@)W%y!{5gLQTa1B56CMWYONZsxUVj|-rp|w_C=h?xeYqJn+rZ|
zEgEx+z;tuS2)Gr-f(ePT$z%q7_FCvZtrNJ2t}|zfjUkl}yn+vJbir0?ugI(^phfgo
zWPX0%$Y>umv?}qWD{!=IPA<LM-9(nn_HzG>QdVN2D5?VK{_(fUnTs}ad-pIp)Jlbp
zTHgHC!p5E5>H(eOzJvuK?wT8Ac5X~YBA>B(S7eWDSw%ICzf6AGiG=$bvH@)=cMi~g
zt`pPl%S>u$uD;--5aU@vSSkQ!HJGT#tlK6m{%a@ac?&r1v(Jkb@0B!y%h#QcuxLfO
zmLN|}@28@;yncp5)8I!i9@dw976DIFZx7jYwC{hl#v@3Dwp)Dyt^`Yls(<XP1Z!uT
z%~Xtm3m#WBy{^0S;tPr)-$kYyZu$A)*(CLk{Y8#rbMZw;uZIErifpT13r*5&V30z=
z(u@F34(u`9v=%};L|8AOjuOeeDVlrym9g_5TxJ=GbwhH>81r79KZCI>{z>ps${R|&
zBKu6hhjN>B#5zMZn00m|k#jGc{X9xvDOP}szCmdNd{s4KlQ5$m3-CY|^V|c7%JF7G
zZecDVIhd*@(x-T<%(KzwdR1Xrx}*aJ`8HfPQO#O1HHUZZkzt>rlE-CjJ9vC0XZyIQ
zoOJHx=qcw6Jkj*j`CNcghrk^MjxQ7LhXdYjoz%8U3IqVy*~$BXOhL6+6jt38bQjs-
zm0cjVqP_^hJ&|0-?Ud^RKh*3{r|HK6YwRW~pKjUYl%IF9v)|LOA3(6iyz}p^liqeZ
zkhd0%)#tt6Eb7-Kp@+b;IfBN+EF!!zd+`~$zjcw^k=$J60~VKJE8V#*nkI?+?No|#
zbTcU1Kc#o)MNkp8yiAjYF-!yM&G_scF&H$e<ssJg6x?{T8PWpNil~!%qQ!G?*t?T$
znyxEz(W<0NVRQnWfARn?rLSL+JrYw}Zvs9V-AOS=7$8IplC+1Z_8apj8u8y7q{26x
zYY$?Rfzcp;s@M`rLh;Sa3jk8TiJ6--`rnz&g=|8GFo)I;B-j>z>?6~DgtZGe9(pLJ
zhf|^n<wFAe!UNANU2M1o$6g$Fij*QngmQlC=-aEE<_i@i1U&<&z(<`tU?ZzMFeRPc
zfq`=aw3A5M_<hmL68ni>e#|cW(Z$QI{Z8ppZ3~TQuAwI~c%~q3M%XLksbTFthMZph
zd>PJn^YsEs&&?+bpUC~;UR`Fs_StqCy62Xyi1^&qT_Y8xALXTo=;|ZcKk2iK9wu)d
z#^##fld=p*;0XdeNluO*trooXvnOFz0Mzt4JRo6&Aug!MF)#0ulW-uauD7!)?XCml
zi4H2Z3c2R2C=L`!SRmyN#R@Wlc|$F($Y7QNw%ff1G7Vvmy8ItrkukoVG?UMD1gDv<
zkq2uVW=6aXO5U%PZq$o9;5VaZk0t4{<!a^ULOuIT6M%lDRCJdDDkHn;y)`e}z>-b}
zPpmHa@x%+vR^aYpsv762kR129Du&DpQ#L<nAQSrvKZah#ltdxt6lH~CU`<-kheT@i
zd`|6yyx?=&yK<HTZ%wc%)smw@MY;<EEn9MyT5q*WdZM>40Q-W2xuMoO)EP;l>Q>TR
zO_l0et?CZ=<4$OHPDsP*WPmM`5#hkIyR!7+itL^K{;AGrUgIz|q{8DuOA*2T_f#%Z
z`QLd}s$Hp`+kuP2{|pqBN9tTEz9N&iSt9;I=B1b^{q++&0HI%2Y-eI1lm9Mrfgh3s
z0)$Tj`Q?ZEnM%yKpr^@QqPaUdt5EjvRn?&WCkHqSwH6~YR`iad6<7AZc&nIf$zS{m
zzn0mnvWvMzbDf$=!KT&}IX!!YIZ9$ap;lCECqt*;jnoSjU-5%i7-{W4&7K!ZLM{3N
z#S#HHy?22!f2Qe4hshq1t^*mrnDnJtwvc!6xMpYvU;1m9Rk+9%89{Qkv+l@<t<*H*
z)Yh%g*BO~RP`wIy)IyUbMRUtfW~zMt!$M9kd!(NSVt2rIX?rFkx_fZp%W+?tlBoKb
ze{Ctay3C^u#ZE^RP-pP$2-jNUW|x-W0op+&z0N4obE2Y!M`EL1J=-rQ)qfj4?2ZBU
zR&R@T{QcH0-pn2T;2Vm-b-eOcM(p6DNAT8*<C7;ZD{_y+HIvoJL7_gc#$@?M9*ej)
zzhVE&!IlSv-7L`mmR@Cl@We#2<q>JPCI>lwmf|6ETD>8095F~#8LJHNpFzc__(eQ_
zCC>bKQC-hP^CWlG5Hwih#oIysSNMu-ucoLA)p+v&KK}4*bRdvvXmeF;*@6eX+D`g~
z>`>j+`X&E)x}E<p;xwX={dy0>Bh2_sjRkoHY<p4cY|wZS_|*9<f6@C=ZJ7{uR<=wq
z2_SCOuz7Q_w`=I$Di6UyE9M7W&J8oUyU6-vb*GKx+EpG6xsw`nShr5+d(nFA`6ll~
z|7I6t<KtMdMDe3{T}cr^GP#5NB!h}8vW$~lx!{}2pVj)?ysVl1BpL`l5Oob}vCNKC
zfOcYd#z)xvCzU=yz^hkc5wc#(kW|I;r6gJjoaLe0Stld}T^+L}E2?c)>4Js_Z+Vf_
zpoOwaI~STJ;%H_l$29DFcsHVHXxJVe;n^O3dC03NmY^?OxOmjA6ZJ{_LQ@mZd3j{(
zEimF-tSH18fdBk9z?6bOLFFH8S~qKaN5PC)i3vYs4;Gp<t<x!xz&V0$znk-OUv<IT
z4DWC3_sTZQ05bFa1#@>kHiTu@;`n-Gmr(!7nh)Q*wgR)6^ON@*9i+tE4Xd2eAHh>9
z7)73ATO*GmCeKD~`@gH$7x8(Nx+}71Qa#2A0gyYq(<>Sz;Snt$M8Bs)&v!KK_wnPx
zCQdSi*Hk2H6MIE=Y^Y8#3$L%RmuFPQ$p72;lvh0cpW;|deokJuhOc%Jn?H`5KJ3j1
zX@AQAOc7-4bGF2FTra^-9O%DXIA0ersBh`MCsnEL*!4=}g{q40t$^M-Mjb67L@A+3
z@`Uo@p44H1T3v(f{w0TaJS%U$aA8jkjMinAJ;d*Qb*6@L^^Wk~Y}cbw=y4uT()zN%
z<J?@+oQ)>8EyRBIli+ex)j~X8kuAXwN?u`et-FB+tqb8+F5wx#9j_WHeEPX#KWj+r
zbHCk*;xo0D6l3<iW=*lk^100cAwNk#w~xxD`LVkz0pMsc*em|v;Pt#iV+;rn7F<v8
zGOd4)0k>r%GBKJoZttE-8B6+oOi-cHCN^KEhZecUZxicn-m=Ow{f0!lwOx_*Y0?o5
z?`a<<5=iZ|Fe_sZrDd!|E`>_HR6CHsv|lg5O5+?HF+WY@JGJH079%Ocbc}6HTv}p0
zF8JDpK6Knh=(N5d?b{cK(fBGX)wT-_*@|CkK%Wbn0bRyBBz`j<vsCMU^>dG3BmG_1
zo+jR-=Sm58KZNhgxtmE2uQpwgElahTQy1rto3SkZM_wLzMK&j8v4?yxvuq^B5|gnr
zOa0M2lh^x%JrnmYTsa|HrrHF19F!Omn4MN{oYT(oLpQiv?&CM}w+X@A-DpAtg)Ylj
zz?7w*@5jpz>AQ|!R({jEp`q;ZGQ^GVnEWRb2<0Z~5Wei^pSL$swY3~fT|~G++1tC0
zaf<RTMltvIvICkLwUtC@W9q<?omTvYgS!7D;qF-(;b1CFq}ip@t7Iv<@;I2+N*t-e
zk)CCIq6H|&G;eW>E%x<YhO^FwUAs(7DPOyJw;v$Y53JIB=J{<uYf59MRh;D2D;BHH
zv-L6$odg98P2Kln28u@(`9C@A`S{JE7MHJZyZ!}6xv-O|lmL<{hfIoVg_S4DcsrfM
zilzl?ijDIOjb=V1mj+6YiqAhemYj8ZzH7-HfA`o(^w{#$G0mtis11x{OORaQ<+mv8
z-#Tbj;ms;2vPto(_o&3YSMu$(Lt+{(gTU4QnLYSX(!ss>TOk!d$KV7=@Ds|$<PiKC
zw!-hR{(3)G@cPfMK_#A!q(Tu2L8GTYc|hhnG`~m!hxuehEG9|lmq(0|7-_#df?(Yg
zZM-y{gR0aLfeS9~<A9u^nrBM}xlc-Z2>;HL`T8SROuU;d&~!zQ!P=s#Ir}S#u6SPs
zt3i{T7oTh5PR#g~5if{s=!IE^eBorroDoFA1sCv*b#QW@rpN;laD|gq>RI&V&tTIY
zl0b!t*E=%2)Afl6nbr`Mj@AuYkY{#$d(;s{lyIx~SyrE-nm&Gcl2>F?vKsd&qneOE
zHeGi`CYvJPQx*jW@t^|!L+l<-)t;UFmi%P{uQajga>4tj6adW_=t?^s!8MbF8hZ(?
z%UQ>w{MQ3!5>8lE>psx5hOGk<+?$>-VD4#JqSp@pEsB|(@M+4-O+!^X+1WDZ^LQdP
z7rHgaV`~TuL{mr?!mB%;8KCXkAl);>1s(UE8AuV<j#!&*tVNfBhd(~hv1-Ty1wGvO
z2A%6jsB&nR`dP|pa%PKsx<FUT-KP<~$Rxf$0giPZro~#i#sMDxcmXeiQolM$+%#Iz
ziE_<c*V0G9-_E%kB=Gw?)Rxz(Us(KRQCKLJlhKD*K7&J>Qv6xV-7uE&P*p&MW?dH#
zaRa`)3+CO}R<^-aKZJ}o*pvrGek&B<IZy0ynB4nbTHB(PbPvRM1U0#z%NzK>7*c>^
z0-VON>pwl*z4&O#t(HH{;Gh<H-mBTB-R|aRN2g!Ht5$B^n0?~=IJm^lM6F`sk~&xd
z(?(jo*}5r<sdOIHORTcfFaX*wHk37wC8LE?9C!CMk-0BP*!1wotxq1Xm>6K8oErC=
z6DzE8Q`7m(xtHY?S)HP0f?Q2uPTRD)bIY^{&18bkr^;T@u+{xVGcn}}4T5&gdTP|4
z*8Y4ZqXGfIQNT=7QU2odcPc-{g`1!DVtPv5<&r^o8B+rux@bFf81nC9Ki#MG6|Mv+
z=zFeB`mq<-eRdD5dS?W-|BCE8R`fGZD4i1leOIbG>P?nILU9Yh@vs|_t+~dh))rT-
zZ=ppH`-Ih2$vtyf-~EaTVpRALxAokA>rcq1te^*5{J`<E;Xm_FKS)}DiU&C!qy$ro
z9hiy@)b__v=lrb7LvQsH70uHIhqAL0i^MTzU7V$L^W!_<n)n8<+a}m=hkX&mdtp%3
z{#(t{*!yvqHX(hbBo-IS{P&&2koI<9?nag~9!dyek=3wjP_Av15IM~`4U1*MBx=i=
zhYAH}Pze64!HZF&R?^*7k9sr}4yjRdeY&Iv@$vTCyt3gd?lCvofFC0EmkCtbj>Bg`
zW9>m{a7>Krf{LSAFYg_<{Z_edK81kjvT3K?4|7SQTRcQvMpU)`62kS>-P+W}q3MTA
zFzib2>}S3oH!B3KD?b-8W2V$36WYxSTVw?_#{!GWZXY~!!kxj0MmGptkm!qDAd3;e
zxo*wr#0s^7t#BAB<(#Z;Yg_Mw7)6H%L8+Ha45fCp<tukD6#P$GYs&vzz$WFKT21UK
zJA0*fAID^^&Ie+<b`U<pp)tv3yMaAPX7|4<kcTDxEt%{=LG)cpzT2eT&<E9Rpn#SP
zld0b0cPztz^vtDT-7)^IVr?@qf!kKPnuD8lI`GfTZgyOb9KH)(6Ym?Njq{NH8Lv{%
ziSG<s)!Lb3WT!o;7$)JT**e$puCs}Lm_-OciXoP&w#|5yN_}j2sCg)I4E;{)ip)?r
z$eGUH+E9q&0my4-8cSoulTLQGSZCCy<fOptCgW+eUyM()gkB=XWsO%Y?#Zx;J<C>j
z-7T=?Bf#rI6VklQk9ym%3bUUKj`X8fWTFyaDD&_CAbA_S_Vk*`LyPp!&{5E>K4DCC
zh^%@TxWjDVH&dbk`2MJVRmXLqK~mqCZ@)fUm{iF#99`-mx^%#)bCPK?ab`C!GawTs
z$LigbIPWwqr#iG7ov(v6^(#~7em<AB(Z^4aUS!NVj>42S9eG?RZ#U5G=B<(M=4&4K
zrnZ?dtO;2q`V-0;7qc9<O&e6*es0BvVS(~h=DShko77XL`0-s6t5aXjmS9G%94*F*
z7^J36iCFbcEPVlmKo<9QldM`4;a0qSim1A<6F0QzcF?_S8Yaz>L^s)%+~`=kUHh32
zf0Hoz3u0GfNG%G~T+)qmMbV3aCv_pGegta4Y^Cr6=X^S3z_sU#%48key}DOK2303c
z-&xVD5~RjIJX4C$`RR`g*;iEF4#(0Qi$+ANE6NJ22b*T1SIxBx{e+LTe!TGmvwo?0
zIDqmKz8&<q^rYFev}`B#x;~bxoS4WSdYn9cCO$Fk(!-8k{Y0syoJlq8{9L#xZG^(X
zI5&_XK=kj6fBQN2o;>ZR_+<U*U&l<dz%wN(2}ecQ7JNcR1V?|!aYSfL!saWD7EKTx
zG-^ATo;ziGPZ8Fl?w2hFe11h%c8nvihb6ckRk}Hk?+0q#Q#WY3E?&)T!4h+Uxzn%~
zfOn|&lxEAN&~ZiHkA^KFgI}^X=?ed@!_>k#9MJH_71=D^(g>k1nx`O2>A{*e%c`dI
z_8lVXiYyVS(#;SRKC8xj+Qs(30)LL={+Ai%6~U`J$6cMHK7D3~BycBmou{+^31W;!
zpvoDhhs&>xt$FRM>|c)!L%_Vg_8U?AoHJ&J-mh4G<*}u?%yr+|LcNc~3#@YP&Sd;h
z^?NC@mla83E*PsS59BgGhdk8*-9;27vTH<Z9-*L;k{M@Yw)FQsDRQENG*lq^#w`KE
z6rY{{R_och`LnJ`7*%O1m{U+j6}nzWnB%>u!WU1|&u8!sseWSUC?<GKjbB&luiVgE
zkFq3p8v3Z}kC=0-G)ut<(KW1PRr)RHOn5#cUMs`b`0}_ELTQ!e<(g?O&jM9A>H!VL
z#EmOAeO9wk9rgE23Bo`*uE=)XQu!(VoTbDLy3F)semO}8-qHh(A2!x?`)GnUVzS?N
zg?w)b=*0phi&A&!MW(4ux|s~sS73e~%UH>kV2|XQ{G~?YTAnG2dP!?r3%~Z`b;n&J
z^u8)6U~&d2R0egqyH_XAZ(oiaCow?CDggV9ODG_<lbnE@350Af5z-Y(-m%Bf@C$3w
zOZAh2CXja2r|GhGwkiT|dxP&tCo`Y{SCiJrbn`jC!HB@N^|szyu?hwj3s^7lm(%6X
z!2yh*g<r2^yc8^vZ=><;lw(}0RfP2|`3Wu)RRx>I&g?5PAU_Y94&Bu7vY@~UuKD%b
zm*74ILYHLDDL>5E@YI5(<lOpjrCa}3o}5>8&Aky=A=})iaY?;EDFG9uK2ODEWrT8D
z{XSy*)hOEO_I%e$-KxSgIb*vZ0Q&E_fy!3j(@H??zG(jB99Gu!<Is1jIFS3J?0%&Z
zd;Wi=u1sHGT=Ded=NqW%Y{_(^XVCq>NOosJl%Hfpvu6AXcr40_+dY=l?WJI0QAfzd
zKYwCSppS}0Gpy_Bu!{Yv9mG0G&x39wStEMulWPDXsMeg}_~aiOe^$?IN&`bp^c=c!
zxFR3+Eb7sxA^1W0iaJi9wxz<z|AvGf|B7TbI}{DbtR-OeBIqf2kjc@OASb)eTzzJS
ziS+ksQv28{iy2V{A?YArJJa=zwDf`0Du;D>v8k+Gh-PZ5)F+L9q^@SYE$|pIEr&Ed
zh6In9JybR4DlthFeOc2Dx|uk39Ff$a7rB^dAiJaQ9uR-rlM7=?c%8L4>2D`IrdxH&
z=*?^S5>8Z^W>dmI+l!vU*TKS(?kDJtHe0^hXh?8*7J!@wg%u?BX^B?~C4ZBX^Bu<@
z?V%8*4JT))zsN`dE};mrTT#UqyG>ZzHj8Jll+*(z?&->d9xIVNbUHIn6s&i@p=nxa
zxfzNpmJ@JlnWusMqVT>6Y6xs9AIcplwg?p)gzf7^5}TRZQVLBtbocAC$kx*u+~Iu?
zCN#tDXeaq!^+(aU<rjOR;?Rnmp(Yetv2MORdw}DVWMsa!_Bo7$CegQ5Xcze=JR7A>
z!=(;s^j(%=;tjKh69w#hl!%2}n$xEG0BJ8!tRdgOQP_K#E3#K9NgG8^C)h@8(q((2
z?)tJ8iNm2)y(Va@uG=k<EL$d92`;nZ>If<I1HUL@0BUZfSOy@arPJv@f@F1&AV`$F
zU6q+6Hrw$FujBo+FBjXn-g)S-W<XmHQFj+IsAPj5*ziq!Q(78BtI@g%CGVpJ5M^6!
zBj5G}Ox?@tWUSxoCDiy9%r+m(Dpg1RUijuqNh)95$87p&#V3#V!wt}kU0u|z28%sA
zzG`O0$8wI0x9SYA2lL|d9k1uM2{M|BlGmF3+}Xc8#?^dtDQy);sb%D96G>YsF2E1a
zHvHp!C%jGD`ic2iePa=-h{yv-y|K05O*a_|M`SH>QN#hZ(km&(kMH0Vz64Y#tFz)a
z_jC|#s2o}_30&%u-zHE3(A8>$|BeA4Jr3N4m5oah9oG)|bhJak{rV-)!crbxZ3AJZ
z2<mK_!K~Hdnx6deR&3VLuap9P%%5_s017OZEn0=E{mV3xx8KM+!ZWe29Sa~(sP3tk
zgk6*yc=enjg9yqA7an?%qem~!G?Ult4%E%^Udj;9w~_>k`WcESPyFT#A$<_Uf9=8o
zE4Hj+V0(M(Sm%C$KVn6URz%^vu|iLmHcL(;r-fya1g|wNfXAf#tFf;4I$rU7yFOzS
zawq9(aI?_hfO)>{LVCB&XD41=y0Sqv%hQq@Om)EX+Z<kD9{Ss#Ke~IHML|eng<MY7
zsw^qRT<QvDoILq}_>pt7@g5nAV~O^1zsnE7u;=2Nho8l3+oXHjr#nRs^`gg@wwlF{
z1a0Qi#Ap);-qZd2y+HMiil9gRgy)<be~FnG8X$J+ohSo6A4|6$T1yFQOZ?r+jk_dE
zU#~XEb=64Yw|QLJDa8FOu3~zk_ht3eeX&HCqN22h(SvH8JtBX=lL?KD6x%@mPpw7%
zDtsl832#H=1#`wH;7vDOnMU7a<lipTcBvC}>!QmyvgT9$Y$D!%{9E!=U~WD6t^8Du
z?{e6lkxQ9}$Il@1Z6M2Waalp{y{KFGpY)s|eNPkZt>!=5;X!)dB*a;U*jbg+;zFMC
zrBuau$w$O``b>7=&s*+ef`MlXb5nZ&CnzG@YDgyNL{_rQ*5B->w>Wv>8)CIJIYH%n
z5@#N;$RB_W3VRs60##*GyA$?{Z-X;nNSgGZTw7JGk6A>E!n@n3w>|xtTW-?hxIem#
zdCyy3c>>*iRk*oGGniMAjMI1IFH;iWoo^r&YPXn&TKIw*mWsI?wb%Kh6a{@aur0*f
zgTs-s{a;5Eja6RPxDmFZFy&tmAiobur*nVjqW=&n<OT}Sz+k0ARfE}j4@a&qEYi2`
zHeu<Zm9ZB!dJ}abe=eNvZCrfQMmIerEGFO(vr0+A9?so0l|X^h)W(fz@lBC`ZId|~
z+2{NYS7c{n{Kgf(--^Y1PO@uWl%oe;O#sJLzHye1;y_sWvDo(`zV=vA=9Qevukkt}
zLjrxH+~I+-0J<e<mdN15@Ay_^R^aNlm8D8Zyd<7)zmrPw_Z3-^%{3IPCii_;#-apR
ziy`pMb9WRamK<%Q&#%rcUi7x>`H;Gv6-BVFVg0PR6|yx^wUug7jml<gVdcGu-@hu-
z-}2ZN7)mB8Fx7D~4!#R4YZuZtd*HQB@)6oWVUifJRey&X4JO(rSX0oVxQJU|gUU}i
zM<4drjpUN5@>OO{(?9VRF?p+_S)9tJrzNvBBg^!n-|+@bjf6~6$|{PWOv6ryVoK@~
zuji|NAb$fZ9uJMlFY}*D>Dd;2SQbf}6_yA~n0NI|r~|Bvr}KzM2#ZyHTv#LTGsa2d
zFz(aosxL_&hWJtmIpZ}O(|pU$GP^otR8^Ow3iY?s5lxP(A3Xv5_IC?*`(hHJ0ik>6
z+dzn6zR$P0G2xItN6k5{NiN(FCRz}CQQrL){q@H$>3bp<YvgC<>Lc)M1McVy1#2zm
zg6aRd#G?t-X|9W(s2U>yIwGl}T;msEm0_UAKdKwRJ28ijXQF5i{_@{vcT=+S3<8fP
zmRZ(~i%`!_OQSbSSxQ0#ZT(cBBSaFsFA_#GR8Tv8ub!vS&gQ8Aq>tZNrv0K*M{5zG
zxj$EaA~c!u*7sTBVtXQodfQ<49&`#plp1<KPv;af;*MAjXuA&zMBPnsyct&5=R$Cx
zSB$QBl!WM=f$#isEDMd|bvYbwDRGe+Ggx<M*Pc^K-TLG*fZajd>1tH01bzOHn2nyg
z$Pk)sT}`pH;iNKYY%67w(#W4u)$rYKLkc&}9yy~$KaOZYwJd{0)5g8>hK^Y+_1P3G
z+oE~$Yz6eE{`liQ=mjuMJ4tV*&bM!OG|m(@6&xs`Z8j<l^?Df*cWyRxrxBn+iNg%a
zQ8`rSPl!S_Z+oDF<=h6U1o|Nc>+Brcmx)aIZL*-jq@r<`1rl51U)$A<YkYo9DjW_*
zJJ$y2*>KIYNl*4(HhVcC#^t?EFPJ*(cn}>2eNn?vf9?p4xu<Yyi$k1^`qL(C?-nz=
zNP3eJTfkc&qVxj#ZKkdEQhu)^E@$$I0Bi{1>%9@ClcOK_z*5{=SmC15%y1V>&pfCw
z5+jZs{+&+23gQiRV;d{VhVBgD=em+g-RFh&Z3R_Wy1lL&ZQ88L%iG)qL<+sWhY5f1
z7`iW(^{8-a2jLP!sc4>@npSvbyW75!7y3u<<dPF1xgihuw4)MFVDK|LI#Miecsl!p
z5^U0SB0qz0DCJPIr;9pTrXy)f!aLXh=%h*=9Q9VIZ#;k$CHlDCV-XR}N|(OF;4Gr;
z8|q@sH_@3DBYj2o5?AoA$i}Y1Utc$PS_*Ab{u89rU4&rU*8V4@r$<cV7Qj$cmwEq*
zxPN!bBgO|x!>2sB`YPPnO-?e2@9pcMAzia=?<&VQ0gs;>npAdS{1Rr8o1d31%H4xM
z7O3|?L#kAx1(CdQz^0U1{%4P!5Gfup>m$nQfqY6A@oY*jHo_$*M+r7pWD{Njz(KaZ
z4HupY^E3-p9+~ZLZ$|MP{w&344S$nte0<$mAl{Oc-%(IWIWiv_M*6_L18+DnQH>=f
zU)b%?YHoYJ5-gVG$o6pwIk4BGK2%aF>P)LoR}rd&)zu%p@U5_Aa^VMhyZKe#*VGua
zt&$SqJn^0FrkZ(iX!raP-W;d0fL*~CYiBbk!;9AE8aW1>Xw8e);?~bB+p26$Bs7Z)
zx<_$%sl#mALP=4&hGG3($J9<5uUj>iC8i@RzgY|t^laL>Rq)5x6^-2wNe*h+&UDjG
z^Y=!_rRTEl1zStT<)GsSf#fZ1?KE08`_d6l^r_tY95dEYf25;P$fqL#y;N0VoXm-v
z?(@ukZ>j-tzuI_u?GTl&(QW^_)#W#mdsBipP2F;R_=A5gr7ngz7LD7UM5G@3_7oV}
zxlVdr*DtvkhMshcNR|~#1k%x#0y1-*p?A3UUHB(+%fFenSZW&ySSe3W#!%Rf@uXU^
zYv>$Mx<1=Oh~)z!DVvYrF28`yU6gs)T{?&s)qv!1@(aG##fAQ*FR{aS%vSn~iRBi}
zl;hNbm8Q0{j63rJ<@07Rdgrm2qyCdCGA8b-ukDk~J${fQYL_;&`tO!iDOD(+dUzkz
zt9UME6T=F_^+5lnStcN}cF=u4-Bs}kvaNO(Kh27^H{8xOxi`t&I9-k;wD%F#j8Asw
zkc?-dObx()4Q_AvdN6c?PQCz0$?tK!9un7N(vTHWty|*;Up#9AH6`WU+Wkl)%EFKH
z&3!=bH*Ft=uE=7=3U@y9E*Ll@!>#$na$|%kyYlhOLzQZ3p|<tgo=3RJ6;Z6r%V<wq
z2+#{w?a%wVL3nD^dq&rdd~tBfJam708v8L7+as+J$cLMrZl0`!gJ+*^j^KWj;3ZuS
zFFXqVHNJp3{OfHV2}Kp%igX>0Trucpct={_N{z~Wo2bAZp5Qi+t#K<P?YN|NcHvx4
zBI!Nt71`RWZ!Rg|p)9bjwNs7|X#Ucp#*6-w1B0V_v`XpVVV9UX;T_QJitKB?NjO+>
zg!q)(8LlO&qP`EGX@baKY<C|<wW&PL5&8B5Pu+etPdA?P@U1c+?}p4wPlH}k7uYqL
zVfLjw^~%wyT@I3a!754gISXlYe{F0+4DIbF;Ty%IsMfEOq-v1*hidkw>Bq!-5MFdB
z%T|kQYHvC}(8DZhu%aHG{+_aV3{yOoG`AZ9x-BK85q)G)qBW8K*p-<s)UY=p>`Xo?
zQBj`>oNG(#SCT^MRIfjMzT~q$<<}*1(M*irA1ofF9#hw^sgz#Kh~QeW(Q49h9q~I`
zLO(T#t4e>UMW9t5Hy`DWk~}fv<ocW0UH7+v?V%DDG`_v90M)U36EBe0CD63iXAHg_
zD7iEGI<DgV)X+4$%ZI=D^6xz5;P@)8^petcJ24j?N8%gF43uNO%(9$jwgqRKv2$&=
zzDhi6I)_VInu7_{cnpI}P+O-XN!Rb8U}nbm7mA(z)Zj2{P>WRa!-t3uYfm#Y+I6Bo
zK1_p0*<o><BIp)z%3Oz`h%J<EwIx5#MoQ9*TFm~}>a!)t9RZ9HMri&%zX8$N1{G;D
z)VYiS?n%-<+f>{ELVD&mE0CB0Ip?<x*8nNS=GR@<0V)3BZ`q^4;q;6J)<qF784^jA
z(UoB7$_mpYDo*sEetg#3!ZSP8HiJWUumw&W!KiNh`n=IS`sKQwhn}2s?@-QsGnh~X
zYpTNDUk}e0NG}7It4}`W>+AKGA;!pF>}cgUo{R6B(hj~XU5n;|0rR>~MosM*+XmSV
z+V(;Yi|7jrna#2V3DxkegRXQz#^9l`>=$V^>k1HRVV)HADJD#7@Q30pDK;>Xy;?dR
z{<>X<v)XK9|F-~cwx0a&YADl#rd|Y-D$L`)g2l5u)^{4ogeLbGQx7b?PwTucnSos)
zchUel(y9%RKh&1KM@__hD_Y*=m8E%3noPPVOW+vci-Nk@%Ody+BUnpit2;Ky8||&)
zXjh`jAB*uCOQhC>dR9;{{Ch-{f$TbNN>4>vQ*_b#*t#i}0dpJ$=HO(~NU4?qvr2EA
zJCiigj6C#9Ot-cDTC!8kK*H)q=zl16ae?2HYuKmte(cpVinKCzJ1w5=_j?uA>we2O
zOp8n}&NIO0VjSzFJ!88=7jTa7sj&h#@}Y*3?kLPTj_5jB<~Z~!GPOXNWb+-cW>@Eq
z**pJ<a1!cYx~owl<JQtl0f`2Xuux^HtUbNM)0up6{hvnON|IeKv-`Ya^1Y}ty5O|?
zTg%n<(ie3?Ks;ZpMuf06n{6|xxt-de%6`&7xjP@vrA8wEPPO>~b(ffER7Gn@DZK8#
zl#c!iM?2f}zn9@L>q#ps(y{Qh<in|r6mW-@eKz{XI|jUA&dUo5fY{+re*Uel1OUce
zrA*H3QZemLS;9#KM7gccPE4y#*Ff(3ucaz^!se!?VY|AIB8F~e$ovM9R(^wfv!U@0
zZA8nt?b!P4fkI3#gECXsyXxk#B!qe#F9Qkn0;!k9Irzzq0sos}`~qfbspHk*>9@4=
z9+TQxwXmLxMt^<mWMUq8U2nIUv^W%Rxm0y}zQ?4hXIRr<YiU|m6?rVrBMt-YRbMwe
zjJf7bnepi5lDCDrOt`M`y&2&|^-shA89+1%!HA6*W7BBhTuPVO%>!0-Zm9pF5_l%R
z?USrl?n@!YiwE6$<>l_V?aI#Wqj=Mz<vR>yEFn(<`(FxC>r~RlzsX%c>Z*(%HiS+0
ziw24P&Q68&aAn+b?x%a_{PmIflqYnj`qLA$udA-lo(4YcJ)xGauvdPAx1cn!DSS(k
z<7T3uA+(R?fJDadIevMkuf#(F%e8KLTYaTJJ?d8uH1L-Ah-yr~5RbVIyq0ZFgcMjB
zfDzhLiyMjNy$UmqVy-$IpLds4!g{*EDDkxHJ()As^V#W!ccR~%b{mj=Lq=Bo<wXgO
zNE*iB4!06Rhu>&tKX>o@5#IZ4`r4RDn_MeShEfpNZJDdiGQ_LG&6HDF-PnI4%B>8j
zkX1FH{^KI8l)*`!QVpc*+afP3)ENHh`3k@vgzsoQTeA3dZ{X$6AbJ~88sin2!d}ap
zB9#;<zA4QRgdrh;51(G_iSOIqud;(g^B8}jYFPS+Q?P;SNGBw+P><Gs=4Qbe&#{EK
z1>8Mupqwl$D|P&%M$pRU6y)Yu*{VD06|xN{X0+!%Z~D@L<zFc0r16OgFx<6;+~1e+
zm`kd|RRakBVI>o%q*}i}U&S%;;RH!%OUPCo5ebA}-Fo_V$qLI{nOq8o9BP9oUW~|g
zpsX_z^vn)WnPgH^Uyyb)AKgp?G}sz5E7wc=C%YBX894W#nZ&d}ub;~Dp;3an_U2eg
z<z3&lBC1#IK?yOH8{cXY_>ILXOXPiOuA4p+b+T#%!JO^FCOCOJa(;$9r;Ly4i512t
z!U<M7|E7>nHhdyCbY6pgxK3ZbY|t3;`pzqTcJH2uwqF@gTB^YK2GdhpI$VJt*=w+V
zyEullzOts``i0xPk78IM<@jyCI_eF0tr?oL`W^j=PsE1ft7WrJPBh6_3VPnY(dawR
zT>VAcjhidW5$01+gEjCrmf{h1FH3Bv@aUE}Tu<kq;3^GR$!s0SniJG_%alzRVvN4L
zV|~tMA-{Cp+F}iq%Xz?+{m2P5FXThNP;Mntx!;JIi<U3E#=S<(a+%~mOH~JkdZ9VO
za7XR@JjuF#%>K`9W#B`RHR&k1h}+*kmvx|wne^$QUgZjmM42GUSd)B$!>ayPL%Ul-
zQVI&|{fc)^+Q=-?*wdm=t`gd9o5?cFAU<Hv;)ybEjA-Jq0L?>^Xhf4LgG&XDI7&eG
z4Cb@hH6n`>b|MdkY7CjRuCF5OgsUt`RQC9v*{iIzHlj4kIg4J&XHRBaPSjpDvdQ0%
zQd>WF``xu^<CcC6SfqT9*4MUg^O8M5=8Yfr2GyV=Ed#@~<}R5nkOen-uaN;0!meA$
z`ue%npFVNTqBqMmq}bI~pQk$Z@8;^6jaTWp4BD0Q=@U9)9ke2kt*r|_Uq}RuIMbPr
zW$y&v*E#sbaWeZcVcx)6OGW9)c18VahH%HhtiQ0(buKXf@cVc8+8;-?ix)vfA_&H!
zD!pc<nV%-Z>^}z%aemu|8u;qC;K#>acyGRvTik1>?%lR+ym836U@FBh@Q!xtg({{>
zfHhWkN9nsSr~D_uhbKRV?)rEt(XT|eKX#f9##mok$?7@g8!25%4CmssY&$M(xwG}h
zB+fx4M79K9hZ^xq^f7DPCFgM3zkkL?v1{E~pN#mAw|@NMdfx<FKHYQUf)B7CS<p)D
z*%-jff^rtp%Rl3~m%MW}qzWekmRPUIbeZkA2Wv~U<El;?KbDW7E2_LYT+Za}T#9Km
zTW<VfURPkpI-b)aPhL>2zJ3Zt^@TOL$e2}1ub#uBtlP67W54HvUO2)9H4d9hjxUdM
zqnO)b>+4^RPJA>s6q2W5B(Y`^*@yzYKmX;cO$Re#n>WLE<ywE)0;W8T3O{6T+e#__
zs88>aJCV^Z=lG}ZrVn>nNjJYMJ>)U&Bi><WPI>?Y2&V$ew?0F30i=D0ktGgMPsv1v
z+cvj>??1N+j!8M=%eC`eu>8DDbdH5te_Uj|v3<5^yNcT?uMcHrB*6bl?<SzVx;<8V
zEDPx$d{9A{hSNtHe4X#Oqn8RZYmv5RVdML&Rxi+C;Y_gET`%r*-ab71ExpUwkp0N8
z$lAN%q9PVom7hpofPaW0iVrE=`ca?dh&R~nm2r$ri)Oly^qC<P0BWm>fPjbDE~e7~
zBP^pYv5z_ZFdGLg#I<i#dH6mB)P-cb^kJh(g|@%c5RGgT=d{4Cf`cvNSC-tsx;6(v
zY3O^EpUU8xB3eHI#JeZH=|^WXNHuu1Ln)G{KGc=hGi{AgCiKPrdY%_#Iq%0cEX&iY
z1YW6om#w;#uM2?s0fSmkohq^!3EJ4zEopend$ge#6Ngl#_Xeu|r#tRBPnN##=`}Kl
zIFhG3)OCI<FoNKo4T<#Qx!EZooMjHo(M$Sf!2SlDG)4c}CVM7%OfKRi_GN*S;$xo*
zDTCaGip8$+H^QU$@{-L?KWg1>(2)Bo!qHwc9lQv0P7*hhRGWy5_VpO<!E7mpW2(N5
zL^mG`LfW>t*2d%a(=4TvfLORqe?gBmZ58s&o<dW{o@ZQtp!b%C_QZJRY@ZWfcgKtz
z9LW-fi!9E5gBB2TZQuS|`O~RWyl#L<KyQW9Lu?CFU14aEUsR3gAe~B}aCsyqh(#lt
zIl419b}RF~2qAmN75%+ZobSgUy_)*bUcUcz@hD}UNO(p1P9~_jAkG{ABo@TAHh8`E
zT2Q}<b>yp;Um8pc{s1)gv8@BRQ)NNf*h}pAn^@)-WS-%oZ?(ku+bh@Re04W$Ob@a`
z)&2-N3C-s_^!#N1D19|^DRw(@<KknY6ga`yqjU1h)URwyoO&XJ%6M*#lP~^?%pI21
zpufGyfGGayQdKC#YjX>-xGD(Ksp>WT9E=b~IS!j1cZhelMo3jzRnx+}#bu0x>IxcP
zMe}4ovEJkG%VF6GkVWF;Ozsv#aBzUVc0x&M<TTy!9AI*o()ELcmG4pO7SD&{8nf?R
zG&Q}#I~NsjE#db9!QAyhX5%NWnKf%ECLLCFAoecVO)vZb^cQ&ZxW)Ms#5RhpFDZVY
zZGEg``kp#41d;0yYH@hL&tAQhv)8GMK4E-N^N$6}YPzsZxAV7K>|d8su4_B%ot^rT
ziw;$=>(lMyfMKA_R}VM;n;Hc+&!NjLST877`ZW}!fi_4LAIM$ui;n+%c8Re^)URz*
zeJF(43rJHd)Fq@<H}^%T7nd%deO*l|wp%)2duq>JzzquSKY!qtpjJQtv<2?*^DQO=
zYn%s+UR8n?W_!D`8Y7WU>}u74terb7mnq1nhHcn9gfh3=a*6EqG5o2vE+8ebI{E>J
zV<LyUR>sG_X7|DMR$WrL#ws<|O;#8Ew;1tN4mpb~Fb9E59v$`jhu!P_HgOYN;ZYOI
z(kQ<HRDy>kx2%wWl-`-0AiQt@1Mgiv548s)$eRmy5QI6=iQQ<PdB+EN(TypkP4l<5
zVE}=@6h94*m6ANWATWFZe^lMwIvSy6Rw=7Avlp>?@5i(iO!nJ9mVvpwlsSi#2^#6O
zO)VJV_jT?N-6wi3RX(kjx&u6=95kD>*XXzAl1n333a=Y+;CAjK5?nH7!)&3v`W>HO
ztF|1>o+nG5zSqKyDZP}31-P6Q!ST*=*{661Zcv`cYlpCL1<g(FuCBNgJ!Um1&<4ne
z+MiF~ZK!-nS^&BhWw~@9nA*52K~X<cVA+_rH1mY>Z5KnJjFBv8cE8i|`nJlMg-a#v
zx6AmdsUHOCt&G!1QLN=pc(jiHJ<Ys{H!=Z%53D_&Y^>w7EeoD)x+FtO9XyH|wNQYM
zeEZn#;PmodL&<TUcWd$daAFACapW*4!OwZTT6nU)hfsuYm0wqq0#IP&X=@k>PxRs0
z=!};4F+09RY5FtWh<RvSx5`Fd0DIz}Sg!aQmyttBi;<|MQZ@xmH=kZvjmex+t#E!3
zL7uEir{MlgC@7ynj;oN~g}AnryvFTw9Fwwt>KAWLAN^~YC|A%d<jW(*B%fn=w3m!p
z6~Kvfg5Q;_9^8Rw@(&1Vu01GrA1VdCtKTuiued1HR+08|Zkza0&Wm3%xqo&UyWfQa
zxr8=fI^CmNR4X_X!5G}Iod~d|%lcMPL<U>SZ@F&!x_F@5C%7#+Qx?9&MgPXfcAnSm
zK^x?C#!p+u4M@{A+-dh^_pXW9SNaODt9#1K6l5UzyfTFi!6Rk>nXuca9SPCY+=%*#
z0TxUadTx9Rabv$dlFn@7aso@@z2`g4wdc?c9FMvpyS>81y}3d1!C?_{9>%K1&(_(7
zP1>!BO+LTIz@J?w#NvinlZ#5Qadt+~=z3psGF%k0k=iVV9>_b(Gsk>KuoQ2!`6oV~
zZwKE^5zBM;p8dF4XvrLdOXXL8WXVYK0icdr>$_~_LAG5&-kmM_EJ_KuDr6@SB+sG!
zHBgaT-*xM#FEcP(oF|x)$q}ik6Ekwq=K%@NRYjSJ<Z#G=<+*)#K1sDLyb?+vuqz)m
zzY}{ZN4*j);naMdcA+r=H>PWMw_QytWUYA-TmB&xoMkPuyS|~~8M)hQwQKS*Y#7vR
zt3JsHm6kvBjn|*ui(&f>q6jtDT=O{*_gNh)YD|<(8Y9LnlaS2Zg%6QTx-|8(<K+#7
zOva<PBRD`oY^HdtX=wWMp&X=PQg*=BLr57EteBpau4QV&O$e;a`jl;&m`EttkF8$H
z_}CX2THI>u5?V3ub_19DwjHqWO{(mQObCCt`2FGbd20)|dxDOkc;Ziv>gEzBQuI_`
zH1D@2@tYv6hfaSi+)+1a;pw2rkI7b=Z9@VBfE`<o!Syjz+c7VSzYa8He<+?ikTBj&
zUe$G2GQAVa;(+&gFQs8}nMq{b2&2h>`Az)CE*m3>C=U5XlvSc=#@An(pJWcic&c3A
z<?Juq4m7MGkAY+R#7{r-Pzv(O?$?JDhlbuK9`>kH=VwdKhcQPM^ZE-S#R+hStWq^!
zZ-DC8UW2E|NJ($Jz`Y<X4A2|^HVW4Z6a#I9oJbZcthnS@9=I{tYxpy|-AJ{b_iI!6
zc}~H~tV5jjxFWj<aT%!QDxqFxI{_4Q@CQVze*^l~9{%nXX#C;L3T8QaDdys(S8mrd
z4xdH@v~X$`)0YmRrKu_%>;1?mTb{b75G2cXA<60gy_r6>{@mWS?;-{{MUKL4T7AFf
z4b=M-Ubsi_#*t_HTAuowgSb*{;|b~ljS&4vKxy{l`@aUSEnj;gzz1YbWR2>zEhk`L
zG*S5upY~!5F-h%NEM4I3#w`T2q69`xo)DB=yfBFT3I`26Cr(S|8y9^8Cop4UhkCf%
zh$3Be*f{!MjT{7DTj=_@TK7<nXpE=Jhag6$5LSNN2_h{Ca>x9y5CYN=8@myy3RzQ;
zW8?Nl3^4-Hw%p}$Ll!x*op?8ZjJfLAywn@W&GwD0qaNLCAcQg>5GLjJbl7VFRb^n!
zQi`jrn89ldc}Q?P6Qim#_rPqcQ@O+xMsY%g2T&)XG11Qx_9>fm7KUt+?mvG%!mDT4
z)My<VE#>roC_3*)w%)goceki&)mCloP3>7~g&<Uj9clz2k(#l!R9mqVGe%p)p0W3r
z7PV`S2(3+RYV&#W`~~NSoRj;$-s5%M?y|&l2SMK~T1ZazNPPHcg`S)90ymn=7))ZT
zn-G&hxB!O}2XSLdJ=suww;XGDk`PNTw<Y|zM=VW90;Ubvo5wWc+6jhLd}jk%zWS~h
zq2ZA>+0v7(upW^SDP6=QdXO>j$(*FiM(F1=**T<`lITlniI_fQ$(~3u1k>oy{5ke-
z=jKz-UtP)XxXc?<>gpzCf2|(T>zX*6`sYUv@I(;udZS@18=!%f46C7bNF!U~DSd6Z
z{AIzIn^w0qP{a_}-AXmB_&-j2^{|mx4-3fe!m^Lw%zVesS)pr_zpI7k2D?@Si+}Ph
zG>1!|v+EWaN@rxAeE#uqaIo_UMX;$qkFfHEM6F+c+4RQ*nhx5!et0!4%H})OW%$~@
zquzf}rt`TDNE2gbdUP^icjQ*yj$3)&?b_0$Aesc3n$e37N&oYsFilIwjIcT=YE&j&
zAt*8RNB6Bsg!H+PVD{Qd`qe2RnO?t#SJ-WpS3SjLnC3cEnN_5P77RP<p-tcMw`-<%
z;bxMJvL5k@hbG;)vmYwXJTJY!HaZxseS$;<SohV$Mm1^>K5tMAZU>>IU3q;NyI~68
zXeSzqz!~0$&+?^1mF-jByNPsm!EErGh~JZ$5q##idDs;ELl+qe#^taA8Eul9Vt-{o
z+#qI_uf3lf(0pfniD^di2C{dG?<OH8m_7$UlmH(tXGdU2JEmUTXwAe4<lEvfI8w0p
z@A~4sJ_bx7l~^uFnlo@R$S*(KfaaS8+-*#Tlp*Dk2rrWToFOARDA3*f0CA7wRg?V*
zyr*%vsFXv+{KvhmEkCW*>!sIvyZ&j-T&z^Xi|B>N;Ci!fL&!J`1ZJE68fA6b{oC~e
zCTDa#{avdtKZ4$($LJ)G6^yFc3OOyKZ`8<SIq{ct2b^1OJ1wruRe?Vt>Mx`<LQTgT
z(rqxMVG&%7Pyy}B8XjH}{trvB&LmBbM*+2_%VDfrtX|BUbRMj~|AQ%#x$Lf7o@Mf)
zbsLGg+FqF9KCl<GSHbEA$en$S=nF@74}zpi=}lv(0naDdeS39=o>+vBug|pg^`5?|
zKl*sYFkV9w_UezXm-}PmP$q{?Z;4)34gIp~!1=S^nSat6{ptCNed+E@CMVrBf!`Rj
zN<w#Hp)V3dc^)E`J0{hDFzlE^v-ghLY`ZeXIz|;_iTsJg<U9$=AdGRseS(h$QDcTP
z%)etCn)Ap^6VpAjnNqa&B_p+pz76btUev<dQa~8q=+xTynb$?L<f0)Az2(3`W|(f!
zDNfUUf5K;EIg*b|9}t1Yi8zizMQ^1xDN~%1lI*N@SZ&{$mHb3c;FneT(&`@Q^a=1M
zx1&=^rT;DgI2-q4Tt)c)L<mw#YZHYx^X{d3oNNC)h23iPLeluyLpn>?p}G;-A61K2
zcwvDCaI1)x_ZYt_q@{+`K6Tr{AB1)pHP+*y79i|%o7I0;R?bR^rlAce&B!^LeEfFH
zLH2k{^oup`Oi?an1o|$ur|s?Bmk$$RZU)12d?$5&*%sd=|6;7IrXPokK&cn-CVJU`
zLhp5o`CIXHepMk;QB6R8nUN6iapF!c@BDlhkO2Q+TsvLE{WXEJY`oPC6ELMq8)({-
zT<zFsZp6`+1%vz=G3wFGc>!8!t}G5Fp4--s#+lO7qVxUTjpk)b?HJ|g>SuBV3k$u1
zM`NnYTz?h&u?$#puPf+(Paah$Mu})fd(Xy%;WFqAAet$%JNI9DV7}wgEd!T(?vg<0
z1Eg$#D&2{A^o7-;%H!Sgj;ou+d$z25WIu0}XE`HLg8v$^>Mw34y)IWs&Tbh<9-{bd
zV{K;&6g}g=Yip|u&!|ppFG2rZ)4Jlk$~yV-Ls9$3g`D9+Bc$x9Jx1VjM%ov{vABXS
zOwdK`#en!I!B@P3>`h+jrjgRe?=AEv$hXZXmf>hw$3=KEi)C0Kv%5)sA$4^rL)T(O
zKvJuh{Hh*TkvNaKrGq0-mebk{^*7y(^dr&n@Qelb%c#H5t>FW7Gk>{C0>@$L+p<ME
z955BcHS=|*_APhXUGT1$2wF;xmV7LXKm8&GLUxO1%pn}Mj&d~eHmF@opZ<3(s5ePu
zjm(EJ-!%EWpE5L0-6Ex2{Hh0weebEX6T|aS!VE!j`KPF+(Mh|6YqTBWSz1`nSn2sf
zs9B=Tn1NtHWQlQBcTx!MdxW%LO+s{>aW1uQNw-ORz7O+oDnM;?`HE*7=FVyduUU#b
zTx2XBK_6|!@%^nb`!G0UD#AUQs4J(})KP^@^T;(b{I;k1h{=|2_tFBOMEt<28S<?%
zrdbv!D(i*)`M}0EbZGR6MH!!F%-;%kY4p`ppjEAXx{m`P8%wWOm63t03N5tM6chl=
zByH6_v!3;cJC@?_nLIeUI{_#ip;@}3G$>OLR$WHMTx_Fnw^Cyzmgo~JUje|<I+3db
zd+F{u8I^Fku*-lnU4zb>vscp}Gm>~9tU_G>h8$e>+A`py2vnG&>+t#{b47zvkv(xv
zC*ZxPTq{zWnG2otJcH`dhwRE#bSq2*#u9BDdRO7lwMdRRII}7$+)~b#O~I8ss<t}a
zPw3kRAvdD`7gr?V$Ud`w(!J0xU5WjwMf>yfL9*L*fd;!OZ2}8h9}0W%dljXAkj|5p
zT=-8fxc-INusmN&9+)7Mk5_&=1>q^lE_NBl{$z3AK^=D%>6aq!6ynY69>X7@&sUO!
zi5NX6&XTD80dZetdC-VMcVz-EQ25_m?c%+%c{OvoW7Av%CHOR@;5Im&Z|E)Bs80{b
ztzmm`f*i5GENeTF!lwxIlu~-ZszLp4D;}9w_jiFa|8W?t#O3eA2$Zl{9wjM_YZc7R
zKBYKJF^(M|Er1BGX(zF1ZpF*fQgdr*nE#=Q^YFn=41l+f=@&3hCH3eUn)d^-Xuyov
zZNfOop+`E0f#4y3x{OTUXV_!Yiq>xW9spmmE-VrGK`G#Q{krMR6;X)T$B7XRK#LyB
zI}d#I))AFujy@R^-J}<Oza)QH_~XMDiN9lZyeFJf-uYP<1lYd#*Hxsn7i(MnmYGU#
zg~;hkd3!+L(D$FGj}&oj)tKGI_cDvf0g&DTy;`yO%VyTv;@QrEEl%2qYrd89J0BeP
zMP-ha?Br*BgseS%Uo2w)Tqnj@X>JEv2-?&u${)$?Z>Y#<uM5WUy15(fq<)82!&J<7
zy)Xm9&b^3pr%WEwbRn?zltWPPB0uoyX&P|AT<tug?Ms)gYepUQgLz8Rhd{b>YA<|i
z=ueK&db<O^XUwNA+R~VjTL0CN4fRZ(F0y5<aC&=ZKG7uEK%o8Jd~(*b?9Ll3qDTgO
ztNuyf_=`XbcBT!UtZhgr7LkAx%o;G`6E(6gKZ(RZDz%n%^JOS(ff>W4wYd>C%yZv%
zA(IZluPY8dRVZiPXtun=7YVG8relNq(Cm;Kk|V9<p!xLn#oHfu;Z&kVNq5fW^LZnx
z;JLfrpR9~HUat&u91#E6Oy_U-q9-3@yF($X*^(e@fT(X@t&w3GC!K1abg~tUU}sj%
z_|&*dIe9v-Nl3V0yxPCH!~5?~RdMcYxRxrYO4lTk<m<8}x+RJl6xSGM_=OkaA1UWG
z+*V_E;~BJ-n4WL)&%1C*FmoDjqrIByS{Mk53*ZW==e4>s0(w9~q;$<XjC0nQ7_+gQ
zw3X4cCsu~v*RDY~6X&!#4;>w9zxoBnI1$_Jqzu13AYj)qUbRjyKGY@nOb>?A{EB~M
zKCiUOzXs67B>hhLo!S`Uuo$Xp{iB6SwdNB|hr-Yw#l?!=EcNmYr%+vw?>|R}AFub{
z)q6NseD|sRSJ#T`#LVsxsdTqjWZrPvUee?JbEm@f2Ne#%i{i#hmqg6}Q1?qkXX87q
zWJjC`7+r&`w+3rf5G@&2myr=@s+O?O5KjFC#LU4>%Eu#5^zcJgQiJ&p019Ig$d=Hx
zt#L>li+ag&gNfjPEV9ph2Pf~Xemv<}oQcveXmr5}2IeFD^0Sk56SMjEk9v6ET7|6K
z1tVVF(PhJ^d-i~MH`mn+Q)*Q%mtV7Tizs<H9hB^~s;}eDPbES(1%-%VAIi23@U;;a
zNv}eXr{#Y?skv9Ew&x8M*tiN21wDScr@zm4J?dJ=4BIi4JLs8RzE~LlrM%Wp6DdJZ
zdA}-MfC!c@DLXe~ZETi?I{*TlSI~=FVc!zVxmkf&(cvN79b7xFOKC22q$Wwaz?B$Q
zK55YD?u2){X`bs%MBwo9tCH0bjsFAjP_5^9)lJGk??a^66Got@%5=RZ+40R#)iRLs
zPTDknJxMo(dBDT3Ev(pzG5VJHms&kOY;auPuOn(<JxmI=#)NnC=hvdvkW;Yt05Ve0
zM@%vsbGDMn88U~F?p;>jYeRImC*wQQEQ=kF-7-RqM)kCeeTG5;ol0{Q0YIUgKrhn$
zLwVSv=pxHO(fL+Ehnv+i9D<g0cOAEOu#Nds7cbLdbU68A>UQuZ6CfK&<L7i~HbZaQ
zx%BQ7^lEw^wB#Wh^~a!yJFr`Rt>;&1JgIN8?|dmcwxVmbNQ&wLq}Eoyz4U~9+nzRL
z79MwSjvgfesv{<aOeI1;Aq3@esH<bxvz5&GPFfn}2#EY)&m=h#wG4FH(xDjoJP?J!
zxkl+uCy+Jc>}dMf)Qdhuo}xf<a9rLljHGTPDrb%Gz<^3(eBspVBB(kA0`R^Y!v3~Z
zPtCra?bK*5hYDU+7|mOf@^h`=CRb5ykNW^bnN31(3PEC;_$4uFHGWs7Rn;>GVj)Cz
zEveITC+)-qi*|}sFRnWB($8Eo@0GLETb!r6gR5df<;R718L|rsn@D@!XafIXw{Y^x
z4$}jJ!e*@q-^%aYf)1J8jn0Bp8WadlC5ksKif+XQC2cq|)LurLGO|T25#NZ)_=%nN
z6l-Yd<UnmhYvFuHAB*i7)tH?(mvDjiW$j<8pWz!;Yr*i}U@-8$!^<_zh069uNV3Fj
z$B*Z4wnXTj7zH@NW21yldWEr1sg4wVG>IzbzpI4BRH*dOzN^vR1z>b|?ko4YK$e%%
zF*2BkxTl>;C`v_}R}b(3_G}T8@oZFt)*{snp$zrFM7hR~BMQ6&Fg1X+J11TAemaGv
z9CRQ5P8`fw?~r6$WDfv^%0|xZ8*zn)M>nI*vWN5UWFFaGmh0&B6-+Am4`_@4t%*Wn
z+fknY9P%5AnnrUwLF;F5T&znyfI;{5UAL}gMsR!1^>Pa#;{iu9?13kh>u?YQ3Rh)B
zc=rw{Ki-3Pi*mw$a}ctT3v>!3r+Vn^B4s7A=lT*e**ff;azAUgj4zL;xz_48dgZ&7
z1%1@>mCiB`AIe7({lw&XRIIl>Wf$E~3LV&lQ1|S<rsQ#@D`d2!CA#qxBWfv^KR{|h
zUg52rm)mMBOUvp>n>vMcRH3#(gW>|+|88ENn*@6AEb^3V1ZN{Z=V+0i^+|84-oAyd
ztZ?b?oqGtf(kE-=-JM1^KVH~Mo9R>NHRF4o+Zy=MGo=8x5PeYcByVg%a!f8nibgQ!
zrMaANrU|sLU5GG^(H7NLeQ_kr4+Fxub6b?(Zd%k`*nNzaacyUiJ$IbZH+>rM*y-b4
zan&-XQcKFA<DQ+@X+!?oDy~GU<P>)$*<|A;G9Ih4SgvO=5mQ{8^&J!Q)!g}aRx^Q1
zrnoie)*q3)5H6w2saG^NYQ>Hfl%0}zUu}_ZDLQ4To7J6?grF%hrm?tFuU9gbdFSk2
zA01-*&#}`yoUFiTcMZ_V0N<&VjBIv?L_y<m%$BJG3#s_0+;_Sg#0VFRHaL-d+V|hJ
zBiWgALbCl!Envz*=GAm!j%X#^GV)syePZh7#L&O~5jPH&W4`q1G$|}V=AnAgP?-df
zVhDzi{BwZB+~nimgK7CVvGgu#?ut(Fbh+TpL1ZtuvY*&9`a%(ZP5#0BL3&2%pH0{F
zoQLZYSjjPtlm3=T&7!TuU)F8XwPn=Zz&u^ut9zKPfaiJ1JN%&9PKtDd`v8`mwA+db
z%DOMdv;QVTV3e9ETOoyE*L4R$HCSX~G)O+ZBqnJYr7n=9-q51R{q?n{v?GNg&x|j{
zuLu{$m+V*B7yOr2Cv-nFa!&Gp&$ADoP~_!}LAIO+bpXfTmA)XgT34IHg#?G16V-yy
zeLaeAXZug;ziYVp&mnYsp@09o_US_EkHd*L0IC{sV*CA!j>6rb+zE}aMZGcg!ieSd
zVC9#$dv1#zhO+8#qwJ6Uq^iDqBL!vjH`wgUU(YommNs^Y4j*72stxM$t8+reYy6LQ
z&Kz8K2ZVixbG~}VuESf`2b~}J_RoBoRi!++r}F6#@{+jdhU44UDM{7zmEXfJWQA>!
zhM{`g>sG$vaii3R^utA#PKyURuNO%oGBE#zc+hZj+O0xl%Y}n3*6Qj9MPQ_w&E2(`
zCDqct!Rzn}N{T6q9_c3<gVuP;zU*BuE`V-SnAUZZ{Mm^G>+kc&*ys2ATeXC-P3=Vt
zx?peex63^X&4rO(TE$3_?1?dsN^loQ&LdZYAxuE#Qqq*1Qllc!z;+mJf;jFc2o%2|
zupfQM)e#*TH)=8Uj^!fX56(*raMqu9hABCjqz`{+P;Ec6J*+z$Exh`2!N1sgNA>Q5
zy36h&p>Y|jQz`lgWBoZR{fj;Vt93frpZwpoC*w^^)54g74*}6oqj)XV?k6(Y*fI>t
zamrPG#_d^`#i*7cC;^CS-7>v+le;~PM<cu|c_gFL_^v#n>fWCphkxmBde2><ocF}0
z9R-uXNm&^ew~4pHRn+leNY<1`zbjh3gv>rZ%_!byduGHX-p1e%k&<Is8^gx{j%;0J
z;UniNM9FCF?>Qqc_mMxcz?Gvc0aKO2IOZf%&n6dliR4t;?sVC9C)o<5ZmGo}^k$(b
zRdehRg24C+Px;k5x$wLj(|(sy*a#Ti65d$HR%y@HTI{&inRLtt8g2d%{AK<&(;@uS
z3I;%0yRNg5PL~$`ITcn!bmy8DjIIX3GSZHfjgme8^<W;%bvY^A?sTvnI!~nv@%#9v
z`{3MhV+Vb;7+jhp&?Un%o>H@SxgQS2)>hCNnxHu}{<j4^c=SJG=w{BJ5TTck7eWkQ
zuS<>iur!)=?uAT`m=uh<3i4^z`MeqpZs?f|1mNSeTqEfql~r@K{XIr$DvG>w5<I!u
zpSm-vbmeOwxmQ@1W=oQMQyjXSMIHOu!8)tCi6}h>?YLHW{@oP&aWIEL&M%=e-{r;I
zU1f*1z7M;&Ma9w$IfzT7g+mI{GE%kxJMR@L69wn=M5caBmWnep@<z2X%zSOtn>B;1
zZ}DmB6mBaB>TZ4Q{&y>0FhS6;Y5v>}qK+L)9W?$t0IqjCUxCXRNeC2IiRJ4%0=CP;
zWs)0y$yho857F4-4v(}>n2T?^mVwmgs=4QI!<RcdVOZqtI$<|Q310m-(KkKFs@>AK
zys6scL)R3rQG<yu6Y0$6NxF}p9Gv{d=kH-Pw`S0P*V17I3NP?Lqn8G`IQ{szT{8vk
znFc=B@>#wDoy4hL4ZDSX>-73RtZ>sVaB89Zj(8{nLKz{wT=8YiY;QAi^5MXl(dhA`
zLrMT8T<Ts*QQz{`<pJI_-JB_XD!05Jm4YJ)<WAN<Hb@iM<{bNYRtnyc{<^gN5wtlR
zfaL(@R%2`=T_q6m*4WMEu@eyd5tWM4oL%7M|CUYTl+SF1^qml9Aye{=V73A7G(NGP
zO2c7c#YE)3;>C~HJ*zzuV_STgZe$*yr*@ItSrRpj%YU<}94Ei~-?f|ICLxo~(Vj}1
z6Vd5RW%><8<hMW+&V60VFqG%7&)8z!-RQAGBk<Zt^qVBZ2dgYoS_%)UQwptCYWwFY
z_gniP(wz?OhgLgyQ0Yw&B;{ZV-n2zh?50#N*3YomJl{`d`GU7fZbTmffF!3<8SG+F
z^{=~r2T_Bdre6iwu9Z7I7~cwN(IOa$^(-Vym^Vd}!lDD!!3r0dBKO+hka8mq?mG8r
zkvFB7YdY=?^Wls!INyO$-={-ytWuEZ#4Qh8H;PsK9k?=ifU^xzm!MY*1iM64T07N+
z=t)=zJR9VC*^tAN0%Ia6vSn}Yo-XWTY8;XQKN*L-q$@;~gzYeDQ}aPgMEmJsr!bo2
z?4<$TbiVmMf#jXd6=M7<V#qLGI{FVA`*Htn>{8^pr^aruEYRTU8Efj<!}ZrDE|EdB
zNvwn;gBK~IUE7>MBJZv-6+nuCgi?q*^{<ZL0q$mcW`8&S?LUb6mGQQ#U{x#yO#iRL
zi0);DN0OC=Fe014+p_K=R7L#@t>3|<#Zf4!mmrRwwZpQV7D+GUCO>A{!Y^8G#FBOw
zMk>3ppOXl>$E#AkQ(u9+3G}>e`o;_U26zF-ZU>aJmvnYk6xHw2aO;iO)ldY(vqOCO
zwU3!PV^MOe(z#2!j~27Ts1p`z&sVHVV;yo_xH%H1=jF_&d4Joxu2*uLuHKssU^j$a
zA58zYm9)x3mT1g_xC9u=7a88PXt@3tV=x&D@0=MXRDRwPRx83S${<DXhSJbJXIuKx
zrWI2a{oILHXy0O#gG{bOU+c%zyaw@k-tFPM24Ss7STh)j5jf1BuH*QE01TFG_w4~U
zNefly7o0C5(g%KwefsK8_|u8kyDFt(S(i~E$#=6c`o0DVOnqC$JuSr~yA-;PF+HCo
zQEy@)F{#lo;f&@y>kMxNQ@e5#7;@)uK+m+N*k9;oU<5}wO~5yI4Eo<7-%!TeYQy$w
z`Wa|Uv^(&PS=~*7Tw{Jv#Szj3k_eBWV+0-bm9PTadLp&b9fA;Vo$vu{qK19KHbX}}
zmOuBLM(LA5DhloAo=98Rc1P)6lBqs#y0g4?-Z#D3c{v2BilFP6=so=B{?KKQ7Cf4p
zUf+S^)NmbNHl5e#G{+p;7Bph^z8y{6*|^pIPs@g?&G6336XPDV`qY8e+l&P~h6n52
zD@y-hWKl+BX?+U@kOc9Ii?_;-_GHLT^-*%2Oj|1zW`0k6?bYhTKF#xdiVeMO*wp%U
z=VH%joz47LQ^r_vt*{8w9c}@sSE}jOf99?i-y3CV**+T3dyqCvVglDBAEhrRmlAbI
zf^TdFmhfMTSRZ0u%{8^`1Dg-wa<!HKWi8<^tw4o9b&^o&b3oIEUl(M$)RCoW$W0#g
ziMj-Dp&@L;&21-6;wip&Uxt=OV<<&BsTo6oRF0T%Yem>t+v->~r|aXAC5lN=2YA;s
zJ8xUm0?!1@y<LReIf^{h)Tw2Wi#6#zES(s0546vT_7+oMNMMEgU0Db>=jay2Xp5lz
z45kSn>e>qaV7U#~W+yhc8#t%vWJ?F0sxPh5Ycn>wGNBCF$DPivaAbyfqFLZx;1X%L
zItwoeS4)EqJqv1mU5wVarwh5}LK+B7iZxT~T-xwf+^?@tLQfB9kK)V$^$kY0M`Ryy
z;M1cOiZ(r5-2hDv;>(o8yBnEz|8h)z>;2{XD>d8GGp80+HB()Q^-Q~6GJkNby#K)4
zXz!2M0Qy^w&tGk?V7qAdh`F6LR!d0>#9bj;tkyLFaonJrNb+50CvJnQQ#=FiyU;>6
zYBWmn7?G5p@Ftw!p;b6n@0uW=gav2bWratLQ{rQ;N8CjBq+02jJdn|^vDe*os4^B8
zIPzv5w=Z0-D#KGubW^%lhDusR;f0_1{cAZbq>3!bBoP|(njD=<UcMpzT4vqo+$jO(
z$1rZYE;uB85yN0?Ivz}E9t_$o^~I?*&f2H6_znPh$~q?^ha0VmCW9-Mr<F=*XoZa0
zn$DI+-0q->E{W<bT8YOJB~z-qE}e<y>00WsFA_p%AJzmB&VQ5EhMp$KvHSvB<*%nD
z{-N*i=S(b+C85_&1`Q`r+Jbr={F9<Re5Ko`NBuJH(_gT>H(n}5o0KMPF8B1l8bR<4
zl3&?$9`yy@WvmtfH=^6kE-zNBDhQ^)(d4`xJC!esQF#?KCLesqG{4Txp`<zvJF+#{
zW;ERT0$08yK2_HaG}*Bb$`SM)VC6~On3c`p%T1~R*_dy*c#kveTzRA#SyE}`jsI@_
z{P(@6Vo}t#vzAbTnjH7(e9y4pqpYv_W>1_~j1A&CU`l)(aCP2)T4SEbLAcyh4==8A
zB3c(xyR~-}GEI<8R-1_O(JqM3utb)Y?_@ggTFNkPE!@?|dQT_Y6mR<ef6ZP+a7wAh
zQtIMyW|Pe<@_S5eA^om5PAqLb8f>HR^z;*`P}agfQpzm<t>&{ymVDm<mFEeGU5eR#
z0S95`dCnwZC-43!MjpzHS9@B=M1HXTGU~=;zWc1|X`qu_%Ejr^g8rOmuXr5D2mT`}
z%Z_7UZ(x%vw{U(`nSP(19f<R@9Mh65)mBoogr~=^t^rFE(_&nflw_*~Qzkzk@<8n6
zxy7Kuv`3IXqksW88W`Zh(d59CR5$%`uB}qsAf8Dj-z&Ng`5U_+d5Gsx4jB|n|2YnI
zWa)V+V<ZDz3<5U!aE8u)?(&!-{p)Xle02p0WSi#XW0bZp_b~Y=zDAdVu6vzh04=$`
z!_JvpSiWf&RDPZQy67;)$Rgdds-^}lr(s)@%VJK-9i+OSXq7>!91t3MKB3Dn<eL<}
zb(c)VRl<yADXK1m+$$U<UD2Gz|D5+J&Ee%+$nYmazVQcNz&vRIbz@o6<ww$GdQLBm
zlDZFQubbPAKv@H-jf}5v#K+0}=`gMNa*0PVsmCA2Y=})<)f0Nm>=WxQt|UKy-R<+F
z#NZBBGDZ3{XU2>OcWBjL`6-mFEa6R_cpzATGJ=eUkt8{@4|1U5_D2`J1M^`gBsN0V
zr|hMNv7ZU-J#ys^EFki5^k%sk0k@%+yEW+P?=+MDDcin#q{){3cra%Fv)H?Po#`z*
zK@|2vNeB6Powp1vkd+&-(~6v*?NDs*p<Sfwvm&*^d19DDDn6_^0P~~!iu~w4nwBc#
z36KFt{Cdp4rMR>=KOT~h8#{beF)@3F2_7-8q&HH0i=!V?iovzSv=4aQ7D%<5GxI`^
zobW5dQPKAY7Lg)(kz=MeFPrpY##sp{@#h4A-nG7UC#c2I0B=UJ`$UIqpBAz$G{j>e
zyOhmM7K+%Y-!ULvdS%*(Nanng?$sCinPH3ii%rZHwfvv=f=Y}L$w&6m%d@Ywzh|#l
z>~@Xt;r^Vx>=aNq2sm1#rzBLj@b}c|Umm<nLZ`P5mJ*0Gi%_gKNz_riF6e5dK>PQ~
zD?OszJ-R2a^pRL%4!^CwP>Q?$D&MaRZId!(`j&G_O=Vw20QNePZ_VFkI!g@1^I-fN
z7HfA;X_7EAo5KeA?eVcz5wI!`2{G;qZ^fvf0Mfqz$J<-;PX*7@^3idd^cn?w{N1DT
zBB91FlAzBvP|^QgTVXyEUr;>>AEAW6n?u*NUtKt<-U^~Q`7V}~s<T0X$k#{a+xEMD
zF+(ea#8GDF!2tmo(3j=JQ9UXekP<ReIQe=JqKId0vi@uS;ef5T?nDnx(EE_65#ZL1
zaA0&CnV|(w=t|a)-@O^WkitM^6DVZqDz0BV?>>p^m|Avr1n{g1YUCBpL+-Xx!C&4i
zlcR9_X+~oU$f5^B7cgBtMP^>Tp1MTYw4inQ_U*7`W}iw$&yVG)KZ*a_Jn03==SEm(
zmd2~|Y(R>`>Mkn+j_H%N_2F@^bK`FM+NXO~Nd%#Yna|-UN?>@<+U7m5w21fhKgb=D
zSGY+b8)+9(6<6k!EmP?{FQ`(WI4t3+(g2K^dh$DS-G$y{<bUfM+D6wa8u<4+Xl?Wt
z6aaG)sickk&{099?*PK5h>QQORTAz+!vrR8*3O*}el^(Ewqw<NUcH;jn3KRP_ZH;f
z4FMPfE$stT>+ueD@co#jq5avlvCY%-<xc4TM|$~kchsovs}XI#6XtfePN-n3T52EO
z{7}~{YszmGLgb$O1#sRh07zmj*4yNb6M@b=s@vjurJglQPl`3}^=g#@w*G;a`5Fb!
zAsmVyT=JR@Y*$BVP%hja?QCh!`1C60>O4#f4;eN_<cI#81=!R9KyZYH_$aE%UB2oa
ze8woj#;S2EHRM-WWRl=YVQ7#sL9^2PMSUVQUGRU`zTb@rGZ^5I#m3#NW33A}t-e22
z7cte;zqg<olbZfG_QDGffBo`)>Ngg;q@Xu=+qZ(!74B(IQvp^&Q+3hqI<qZPzcfL%
z-%)p4@7VOU)S!3--QM*OJ_v1Zc4fzJeNTQ;(x+P(9Z<q+l$E+)<`bP;4fl*|aQsX6
zStM~Wgl%3?+%gl$VN!`JQT!tDnU}$gLaDXhY|(jndwNujJ0vG?YlkG`x(^pNmv#eg
z%#h9&{=0T3OQuS^W`A=9>n37l2BYQ_D(b~`NI}2hqqs(j-Gv2adc0HD7wEQXr%GU~
z{=Gsuw;Z`UDg0CL)VT0;wzfCN5$ThR4Tz@*Gxl2ZB7Y?TYr3ZR)sFt1);z-nrX7)q
zc{DTUVWUUAsb#N|BxJP8`!SQ=v|yNC-+Zg}Zv)o1XWU$q%q0q`>4LAqB20afa5GxR
z^S25eNAzc4!aP^?oud^+h(7vk-W?bUU;28@Ez$C_qIoH2Ia7R0rrTM1yFg{Z`$Ms&
zntXA8L7_Nb*4t<NU(3o8Mdk0;^H;O;o;(eU23kz@ivwtl)km#(FO;%t?$}evb!nl7
zj7Ks2$#j2+F`65rt|Iy=qj{N9VhYH|s^1%PDJaM>8_|;Pi;_m^q>Sa=&_^tl&*!D>
z&Q!Af**t%#((OIUA`9M`bfS>LOkd~X^6nbg@p;5E)jEOkep=R$4^TAI4zj`nGzE2h
z&FtNB()|d4^9g?qGzUY`&0OoNMC1sRnSuSYP)PlB;u$Ud@H#*n0oK;+2gLRNK+APj
z$CFDKQOg|VsGv~$m?Nar$F<DecDdlhx3s4y_L9e@7A9H<_RdwuR2)#^dKIPmQ_fdF
zQcUf=x@c&$PrC0D;_TgY=HFA}STvoG=qh1;(l}3@pJ0215eEhi*R@JuEuU^W0N0tT
z0DAKFU+iC9(Pk}11y`D+CXjW~eVZ!^DO}ZiW5d&Bq{}K>*BlEAMa>Z-QG<UwNgmz_
z%9}HFhaIs@JO4UY^}%%sf>aWfzT$|o3b0-_;aNXUp{kY9N(S=PGIgAE>fcRwqbh8J
ztLsv&ErUq@8d-m@QkERL$^?m~$upjJoz|AFD8!pF>f}YTSAH#<4C^Us{t8T2DJK4P
zkkP>rV+0yE`pQsu@FrBwG50|-ZS2fZipohWhufEc((?IvI-e&bU&;E`x*FQ66^UL&
zk+o}0QxC2v#gyL=t|d-`%kBN#apqoWZO;jQ(U6mW34fP(*XenRu2RjMQWC`XOKqvs
ziU%;kDFSSoMap&@L{hCj%)K{u{r3RMexaCOov(l>vP4P|Pjo7L!^sUUao=bmVl4r)
z9_nkAvvuLMg2cRv*Vmn+#NA%Nr%HtEfhk2Edp2$|uL_Iwi<4`(_UiZ2$|8hls=~t4
zmnnn(lQjo+p1yoOVPgHHMzmP(aA1zPSr~|N$VkDAg6x|y63A}dn7W9Wo+3BCi{a{H
z3cIZ!izt*RL9^W;<Hu+)?BY4m{q%%ql<_OKN47NuK2_`*BkZix;Yrd=wu;T%U*y@9
zv4B0Jop6*0Cy*&j<pNzO9h+k4)P-gn54?379$?&zB`)rA1XBH^n;vLWtM<0hi~IAN
zhEs`i2Y-3%sIz!;Bj_#=?wCd14++z;aPByK|KBxtRj+XA76CeMiBE=*N3kT-Q!jTa
zIGalgPw=ZiPP~P;jD44mx$*R>5Ml(V^f;ewr0#d?i4{T+hcrZ5LG#6<!Bvp5Pw*jU
zFgs%sxTlDa;H=~z@FR{Lrd!I)@XPd=ojb6;s3hIPZC|>l6i__%mM7zWJal|yM~fBa
zQ>m8=WBGy8=4HfIQ;RA)=852R-}|)8Sy=HDzd2mwcRTb#u?iaH&P6j=mnK>HgeOr5
zl>PTDudiMl_0QwP_<W>wbY%}(U5oE(-8PksxP``99lr%lXCT>fAf~S5-|ch5x?)o<
z5pgS}gA-1;>>((1K=laSa@mvK@d!mCOpObeyV}K9bm;mJ@cf`ws7VNL56dm~O=G&b
z&vg5JXLB|$l}ce<i~Cm82}6Y`p<EB5v6EKR_hQ6~&r*w3!1Gv2wV16vJt`-iiwzG!
zgf4d3ot3V$F1rsii|Y2&4xIEicmgbI5RqCpQfINtK`W+D@6X3O6i3|K|AgqW;0#s@
zFOoHe7{INj%((7N7e>}RaX4yOA^?-dBl%4ynS;-RC1W{QOTfIJ1Kw=SONng`vR4_?
zM6xBJ<y9I?B_I5~6QD(8keja+Z7kYlsC@IFNc2(D|Gw+Fq+J)y4)fjG!JG;@IK{s>
z{MJ?+{G-3vAgsO_(|aeWO;x{876zM<Q-3C2$Xw_ccsyX&2spaTKPY{{YzEd=<`|SZ
zpdM<s^4DhtM4D(Rg?_Py&XoKr_MP@Jm0@N4&bCzc!jOq6MOz+w_`~t2EmXcPotckZ
z^=%^2`P6~W+H555fFv_?W*RAu<Om4%BXY6>#de&}oeiurdq!!tzJYlf<I}|kDG=z4
z5nygiQ`AC&ooYgn<W5I2MJru6A)AHc-!5rpO7RO+?o-Y_dIM=6%s^SVU$>E};S>A)
zNGtbU=xgg92(M!b?-!h~P;v+2Yqy3vYIC7zpC~Rs)*EJ-0W}&Us~EQX1tzh@=8c7M
zifxDq*l=<Fr!aTQ7!BQwBCSUq{desJaI}%0sx|X^jV;~eVSjE>WNj9UR?EwQ?(TU~
z5ZUusGW1>{K!fiU8TmN06;?l!-8CJB#jlO7D>Wo%RH?Z+eHFz-{%>Wrl&44JBBAzR
zb}mi<jMEvlHVyr9BC45qhHh`@^(!gNhjL30Kp(V08`8w01MA=Oh%rl>iGCk8F}c=_
zO~1EpO6?w&iWSZRjqoyM7HBzqKt&$0dP9;us?ccOO>Ult><pOmY@|zlfDiVhwXByi
z+3q(UU^-kD`zC8IE%U)rw_t6?@9SX|H%r1+Q>b-rE?HJ@Y4FJ1Kx{}!L7=QHx^G-G
zw84yJ{`{SPLS<OTLX2Ee7%Q{0{y4k*&OBnhqp*d!(Tcyofc~YC2wk5Ug(NM|a$m9%
z`$@A$Ha)Lw%A<=ldCNoRHmKJ0FmE!^IBM#T$o=e04$jeNLbZ4LRcc-DE4+Kkeh(^d
z`|<qeowms?_;~_>=fGQ$>!1Ct_lKp^HioR}M&|7G#60gIz0ua0mWw4EhT=R&uK&$Q
ztt{{`qxi>5Al`H;4V{Q_0@}-Wqz-4EG>yE?hQ8vjyKt7B3And0_;zEsfQHTNVo}ll
z)q$z7ODS!zn`8zd8;DML-WV@lhj^;WGX}0TVs0)I>{t}CaG_j^Qbp2>5*qvqwUnb4
zlBtsSHYD=!@tIW*8ilr4V)He)q7YKPwP9c@M9$^U7gKku(i%q@jgqP5D({$7l6Yn-
ze<`~El%cN~+dpm|(Jf^S4cCQbi>Gv7*pK5E@Ju-ycL?9De;+t7BRFr!2><iUoGt&I
zvz$~k&@t&`J0vBjVw#r1ZhCl2F;LHZK31HUl9j5h=ZZ1P8jfSEyp)1E@`6^0PF=d9
zKZJ-L)ym7mgU}ZaD(Q8q`hVnY^#8jSp7^<PL~|@8Da^YNKR2ZrW0HdkJ#AOu-|`cE
z{WLembIW#>WxtrHI|^rh+a$;ZF;m@(Hb%4HkixeFH}2ajPok-#dLQVah!(Euxl~gE
zy=Ah8CNslUKCEtXS4^kv{5jAjs)RzmZ;P{@jaBlPDtbQ6ND4o(=G?bq)C5%j7ZCks
zHXuG88EOqinrM~0Hq7qO5NfPxBZ&b^9_)5mP66m^<cc~*a4x;B`%c2FfOjpOCEVgZ
z7CdD!3?EZ;Ol)i@pR(g+IbZ0-R~}NGD5UNv$SHFt$KvW+O19c<@|<4xaewUz)Mc<D
z26^Q<5K}JH1>$#4fW90MwPO4BB84H6Z;~HNZ_=q7>CL_@daiEBR14W?&CqKLA(ynz
z_zWw^#E%Lz*4XA1)q6l-=K2<j4p80F%n}kaLzZWU3h65JeRAdiZ=Xl$+F+0sNmN*k
zLlY<~=VRuB_cXi?TuSQ_$@-#C27Gs>uz&fD>fXc2zvsm+!5ezWy7E!M*??i-eqLLH
z$P#dKqabB#k&)4oZDkF@&3BWJZ<Wn6ml~I6`a@n!31it(VJO>SvoviKXDa)bf@NP7
zI(}V$j#U1T3t$iNnQL+^6FW4VO%l14v%x<iWV1bAW<HQMH>dWoA3Iuu#31dYOy}su
zcwii0fFbk6?5KAul_1oC@3S_czBdjOs@nRjp^opbw7$I$H(#)<P5KQP3cO!7X|4YY
ze|V4Tr$mmEVp717hucr-^XZ2&HCAuu9)pBLKNP6>RUDtKSb3+ylv#C3HU8F4jmFtm
z9Kt*8;~(zqU{O^@=!6el7(H2vT~zg|M23BHXSGZ8h-w8>Fb<g3_NH5n&C(j_Rx8|Z
z3zA=n$oG@}EyGD-tc)p%PMunxEwojFiMvW69lvt(DfeEb+h^vOx%R-&?#C<puGLD@
z6H+OlnLxAjlG&Gl1ir=W#XX7QM9YQ6E+3ngBCHIR5;7~ia3ub$U3n`#dSRV&DNCR!
zvXduPf8D)=Jbow9)ynknbl^=NDtkjQb)&lY%&{iH!{K2fQ1@?rYK<P-U9<Oc=E>7_
zqw}nm7IJ@BxD<aL{qEzSFwZ5pD+L4U8XR%vT5r1a>yqSbt-*FcA)Wf#u{ft$`rQE$
zuFoN_sh~IX<bqq@?{GIe<$_0=SrN93Uke{K=p^us5KBfKFL;cLqN39klzqF({TTrm
zRU6?!g<;s*S`v7Se)s5m%tEhF+f6ozUf`1n!Qc4KZ<UWO=B6k-7k%MT)MRiKsPTVw
z$m9(}`j^`W!vC8D5NM4GpL`1P%Q4cBTrQI;J!#otXq24jt#7g^J?`N)86~E#6yVfC
zhq{0bc8S3`EM4n6Re<U0|E{$or#d9i-Ys<<pmkc(&wrdF7>h@`GfGR@gla3!laTO$
z#v3Z29me$zD@#%E&EFvmcV0fMOV5|QWRXi0d=Yy81U=63GjM0n-q!F=xCv!{PiL^C
z#?##FxR~Pk7gt&PX@xdFRcT^J;w%nH8}i*(rcF%5&7nWRmd0g*U1LkFcr&6W>QxB)
zgya2hiexj%tCTSdqRz8-C-E&e`Q7CE4*7<2Nfw+ZhM1U1b@ZlJ_+ei1Yl4ROr@!>J
zIEk~6hJiD1DPwK;(oelG+hDP&z4RH;s-T8m@CGkG_fs!rOE}v)3m6tvlh3}eQ>z;e
zVyi~HRgp4v!&t(!p!2#X?YH5TrBZyFGTN=AKS9B*w8}zS;qnj&y43^ifRV1HcF=1)
z*^9|53`1jHAPd!A?oZb&HE}gMdKx8qmJG;Nb9>^rihF?R9=Y(cm^Y98{=4?0%K|-J
zCiK5Z%j&B%s3IeKNfMXDtl6i(Sc83GX7w?AWQ{wYFzd4(OKF<+AtlE-V2FM{R))|@
zcYuJ}(DLl?CXb64(tS~oS%m=|e`<`2n?SD`kZ;)CBkme3X}>+{5w77xzmIMZ-S-bk
z{j1e|e&1D6JSBJ@!M21141<VRQa8&V2)z`?hpp*pDd5PrTHcuibRfDN;s|Ge;{tq9
z`exBp5c}-K_?J>M+*S@P_$FUoS_!1DI0McJro}HuU*2jE5%e&`he_vjz@s#Vk7&V|
zMsHGf9x}z5M@&PA4zr*ZGh3>RTEAOy&{Lm!T9xK}y0ZsZT3e_4xY37vJoXeH`#puq
z=Aj7lf7jghM5Z_>Sl3m<(R9n(5QO3Tzr5?R+Y2qMtudB<A`~!p<EzgQ*Gnn4^`*(5
zdz<;Y+r@0oR?~GsX5WOrvu&=0uu3!psfh=W5Hn{1+YzrR?CZ9Cp98i{vjY!X{g1J2
zSK@24Fqxz+4TxVk;7-c>xQ-wNPDRU>^nt5H3NlBoDmUa=w&a6E_hxiYfJb)kh{*ck
z&)ERGl|e(+B0a8gm$i5yqfeq<t-6P9F%{x4!+e(JwMjN&@h^1k(bmUmpz;46#ozvr
zvUS+$yz5>9$R#({9jh+#h{?KifqlxlcVZLDWIiBYB(+I7JM*5%Xz7&ZUo;$dY+15&
zzGT;Qc<x8>7Mo)Os5;XiHs49N!{UDS-XqRz9PQv687*%JAw~Jo_qh@4+`*!IQ;WRI
zchw%5#M~Y3qhd3v99c)b=B`}ihvI;jc@g&{IF54=Y>06ii|P!C`S>zV)KB@AL;bo$
zn+qG^-(wK2!^$FYcQ+bh?h&%wSLxj0*Z5z+8zwB{B3+M_pWMxCC2h28v!Jg5sYkkx
z!V@?1S1wJj{^ZRKKU9z``JKboQ-A&Q57IopKJ4)e9)VH_H$c!>Dz)<Y`qujPNqggP
z#+<b7BSePlm#XA__atL3Ljws_+q@G6!nABUSFlmT!&h_U*6F@~ZUEx|?p0ma9fEG0
zd`M17+QJ?nd9b7GNG27{o9u-{@o<1Zy-JxiUTH-gGTpJ^CUs*y827x>BGVH5E{p==
zV<JJk(u{Ipo!WrIKIeDUdlA)}PXaZbruNIwsyaV4hy_=}y?-8X{wb}ry^+Z(%PZKe
zJCfxkcoR*)C5*xZkp;&q2i6r?ku}8X&PbNduPx_)$fpvWB|kffHq1TY6KZ9`Jc^Aj
zjb-%`i+y}x{hxSs@Io>EAne6H$qC9m%CuE<+#R#zH+RqKUQ_tgB157L4b{H%cO)(G
zurSS_KR01C+cjQv61F&ht7U?Q_Ss<O^4s{)W*d(Gt{s2DA0w-FGjg6>JVH+#_61VO
znE37CtRd!lG>Q_ukh!RQ*R#bMO)N=*Zyh`ULc7&ci+)3DStP6b03+C5FRsT-u=OZ;
z<~eH{ARO!5Y!Q9QF78;nwZsqUcA6Rv0{e}(`JWtGpIxeh6T>{>erebG7`sXXW%v0l
zCDlI{{7OmxfQ#c3TiEqxnevZOLS;VI70mXuF1+f*n=V#!?4-BzJqmfP2O-yp_M+R|
zI%wMNKq&~H!vTmd<xlM80kAO8pqw658F^<xtD30Sm|C=$?6kM5+6`XyHSzk_UleD*
zyal|^koZT8rkDZ(`lIS2&88cP&*9Hw^E23@bpG&eRKGKf&Fw!CZby$81)q3;7vCJ)
z&?G#~)YJD>S6boPxd%=vSQ};1%!VK#gK%jwD@tWCy*Y=oXl{oj2MnGD%ohK)lb&%7
z?l_W?0<QH{jt^s|c_>xo^M+4@3}-@~-EX|7m12}4_Kv!UGt*inbcNmmU7|7rfs1z;
zx7B*9y#MF@gXOF|eQWX7feE}f3AevUFu&UzTm^4|L|vqOix!P8w7N4pKxp|?Q%(Ar
z5cM2$mig~A5H?VQMasQ9jaSSB8JX6S;>v`CaEPqT$A-pkR@0kKiS^YcNtP`Cg!G4$
zhF>|c`DG(p-#PV9!n_Jssyu*AhTM$piYzKEQloiu^L<k01R|2~leN2$k$SZ5N$ZqM
zCz){%e&m9q>rZK_T|s`wLW!<4h(Z~+>Vhdhsb}8$cJ@5Dk6(v-4Ch<nGj+~@^jEps
zA8WNSNBy036&?oi0`UWhZxZ~i=kJVD!?`78X!j}u6%s4;2TWWz*I_Ki2IK=Uu*^_>
zUcBC6v^851s26iAJ5K|2=E~hp%eiAyYNhRTNu<zpo9SmHUP`Nav7A<UX9YewB;TuR
z<DcZWR0CT)w3k@tAh(q<r1uH?;%<pFIclO8F5YB@P+Rml#SV`@#0exl3xbxix<zJV
z>vbE1Yp1GfoJi;}bt4O`%g%A<=uw)<m!5!p))(0zD8D>Fc1Lr>g_mAZjEcQ3uq|l3
zSl|y^j>)rB_5GzqqWf-`Om)p`)ynYAF^FAa$M3;nAbBedncoR}0sfPjv0%NtE-R+)
zv{R2c<{Nel+$cyQ8Yk5u(<~@SQXr=u<SbZ<)<{t=j7{@1zHldEW#_u}+hKk~vvce?
z%eov*ewUL)JO7X~GeN+6q;PR3EzytXS6ge^BfbX(tpjt@siS_Eo#W@)`i?GKq@5I2
zktk2yX{A0Fk@xvEa#5r_rxce-8hc9D;kZXDPono!OslttEp^=gxZ`i&UE$b4@6NHS
zfYNh^loM9@UhLBh;IkBmadelMfzU7A(HwQDhl+NO7+Y0RwG$5ynLAav$D)%7Le)DT
zY<03iRqpqSkna|XotD^REg0EMz87?TaJNlCh;7|M3auv+afJBp%B_{EC&6vpW{y#}
z?0kM+RvFAavh>Ad#ThisY>-<jfZET<K{pz#oL?dlNJ3laNpHFO!btYFhv9<NN3ExY
zu#sN_x<d1s8<a$t0fw?|0=rPO+2;zbMj0eDuqzT~TJ~^t;c(zG3*kdo5~R6T52Y=~
zlquPW$c<6*HPG+#sfMss)Bt<CWH@-qe(hy~{HIh}a}TR>5}KksXZJ7|&$>hqVpOpL
zN{(5s+xm1{gp(lh?p1t*&2Z`Q_R3Pmoca0J^|<?lymK1T-7egwS5Q{>I1g?#c05FN
z^&Tm?zcFl4lz~2D1du*A@Wo+sY20a3uoLFUbE`<=AGYRd+R|dJu@b(Kh~Sue_BRd9
zBeYbqZ1XK151l%uZ1N17$@v=A`;tr%gdWI+#cJ$t35@Ts@0bfwgF-+ov>Mqt`KFgu
z*4aswA>+Y}MVx}bni)&KW(LUOo!lAt<9oodyLrlTz)YJ9I%d<iU%pFhn8PERi^CP(
zx{KDrYRu(zbI0AY<4`O@!})AK;j!7Tv4QrZMfXMwf5tGo)LBT$_-PW&yS&(*lKUBJ
zr<+B%u==%0rd^_*?O;o`O2)g6s%0?#^Ryy=+MmjlZDxvKphR`neeF>s_<*tz5Y`01
zF|l!3t2N1y1bXCj<Ps%xx(HTel-0s`b-Yt&{f=Vv2`RWrzEK_Pr{Sz$x`iIlqq_-m
zc`0yYN~i@}@|-l^BX@B(RQw)?F*Bt>Rq`#RpJv9Cw<6_khwDO;z1J^HzG!Ak`p=XO
z_D#B$j8}WF2WjhN<w!t}suatidPPiU&S0bXiVRo7t#q4Q4)d7+Mco&)8W0;VBXqd$
zp@@TMU+(k{#@t0YIb5S=KU5^;r|P?CDs)Sea%#C7ujo|VH%;`OIk^Ro4$Ic5-!z?;
zLlS55;Be}HwjT2gnIT%*3|-es&{0ptMbikrRs}vDHU=KmG2}lHS58K^Mav{=#~Ga_
z)z50v(Nhw(S3oUg=3oWHmUSH#Ic=;vKyg)S%TO7nTglUJ66$BX8p4t1Hhl&d@5(8%
zFDb**7I$5~k`hou_t?$IbXL4WKM-g}e=Yf4YAM9o_=Fixad!meP6Q%Q&z0>b?#gJ)
zCe)*v=I=vjiu1Tod8On7Of^wdsD;seFJ)z+88)3-CVrE3{d1>Z%RxTFe7i@WgCjYC
zE9YaCQ=tV^)wEcC*1eeT;)R=n%;d_-KhE0e+y=x<P(_zPL_yPuZ@V~lPPb))3tl`Q
zIKVU0<HlRb4twPOZc%E2R$wLm>NTBn#w24kD=`yH{%kqA$dWrjqeg40n}WaeFo|jA
z&RgSlH?5|guz1I}LrPr!9+&7c^Za#}Q0CA)Q{@Le8g}MCi(^zT1$vK;C5<qhZyp03
zpGB;hYZI6T{zeb2t6R80l<~ILcN+=80+B(?-geJ)X&3|}cWr69dtxt=4-#9R8CYuK
z5YN81-V<Dx3k;||gHcxLtZ&Z2-pBp<a<|xcIN0#YoD!=zL#g$h*+fvn=1XS|0%VI~
zN_8Uub<f{l>i;-8?{GHX_wVaaUD_|T6|KEh#HPB0h*2?O*9wC0(b}`6rFJD|Y+~;f
zn`(>3Ua?Zt7JF~$li%}ij;tg1b=<kG^F3atYCjId)bu*ZlmGWLUuq$DF0#m}eM?BA
z|CJ=guO3=TL@w!ckU7f%$!SDym4YvX&D6U8U-1U}8G|k&G#}4*la0Yfgp$o7^1UGx
zh2kao(D9tY^bup<mqa_{?#4v@C{WPa#`Tg7WkZ$fR8>OUd7x3N`SqSe!7CuJYcqw+
zyt~=CuHAinkj^}!Dk>R0487!;kIo%SPj#6?9!QX~^{3<&aT4<?v>bIu%6y#L5Ym&&
zi29uNOW2U@j<k|1CU*3Ph$xzJGF72vMhIWba_#TxFgYcusZ-vYgv;BcwK=ELtji9f
zJvC@_^c3-uIP>m)(~=eR;8X+XzC`;$K$`M}AKpqJB}x1Ye5k3DRJEYJEix4B%P9JY
za+<Zs7=*m=YMVW+0I_G=tNm?bS<G_uUp#*{Kn)yrJEd^i-aWZX0cLl9pBEP9c#vkT
zMe0(#+QBJV?}6x-)^OL9=MNWII+5(<!$PAAPh1vqwL8UogRZRTglpupGzd3U;rzdk
z=VXxLahPycbJ=!(Ow~fE6-(UYP6In!E5R=`e>X(`xm#V8My>TBD(hD2%=rRolVIx(
zHjRT_{3ZRJ&eI4@hK|o%RkvS*sJpvXxW8y;HP{^0XY+_llT&IhH_+k#+fcfh9W@<6
zZXdm$o!=x0A>pQ5iVwx5vx^WV4^E5<j2@fR0K&P2I$FIB67^(_s*`_oJ@bIw2(AwW
zt0(8YQW~jW+=xOO0~ZZ{O+)G_MCp>@;yUZki_@)X9Aa$X*xoyLxY3y`rNXutzneN{
zuJ6-#oTK-&hAGQ?=wOE^!6fD``6M-G-%3kST*=~kX%>>~z3K4wkv@WxCQ9UHuC?fE
zKbwoQ9-L#vb&yVK*~!xJ_u^W{M-HTDnf*pALapW%eF812O?lT(xWIbHd8E$#ZydJd
z_M@?1XdlCuYVxlinWz=JoB$jXA`RjnYDTv5o$ST*@71~MX&1^CI@t`|>m8Dt)#C>$
z3e~d^vK=8e-s(-(;qw!yy;*e%?(uToUv=^nxo0l~$-Bn)o*|#I3m#nWwn8r77%v`W
zWA<C_IFDj~|5LtZ9Rdfo5q!edAp@$#KwPULjqKdb9V-sqJb2B>DG`tqqGUlQYD_Px
z=M^}mplYT$G9~Oc{!~9o(pVQqyn5y!()PprdHUqtcJpee6ZI~FsW=g7L&TYX;SB~{
zw@p7SHC-EIxBkv|AmLiBwUBIxz9R*?U><aNhD{MX{5m4B{AZEhJ~|)|xn9R3uHBLJ
zcaW(tl^_x`o7uk=@*}5I?+$x$lefqjJS>G3(ei9$O0XB%+PcAna+e#Osy)9B+(zIi
z8TG;GJ`wT~330S7@z5EWUs~oZRr~}CnN$+jjshlNl$CX-H*8$zgn%qA;YWZa!>%ZK
z(+9v^WwuNsX?AH3vfnbZu@vL>@a5d{Pm?Z_Wj|Qd<)8HSORi6_A_tGGKX_^FtCqPm
z<w40;3r9x$00CC~+vurTQ#+HMSO2IcA!*@Xs+ASD&aJaYuKEgevDTTIA5XkWcIutW
z(4o%8Bs7#3{Qh~GT?2nubm=|D!>TARlL!BKg*=}7$ofn`S;&lIH?nu%5iC<F`K6Pt
z^%uolf~?W5B!1VJA2Zhsckb2Ua&||(<j2&uXWrn%j>V_1&CgvDr3c>n+yQsYKh!A^
z1S>=idVNrMO^EC$FH83|cldV8@V&m5l-Kr8W)r)jTPdm!<hO!WN%2L?cZ;03)f*8d
za7$N@FS3~LGT6c#3QX2h1UI+5H0tfli%x3J8o5l-jE0;9;lyTHUBIHb>>xwk26az-
zTQYBIEBlJFA+3vP$@|?X=q3hH>mWiJiW`fA;T(lAI9BaRtS#YI*A?aXnY=!wHluat
zHlz^!xBBi*{kI;|@7R$V6>(F%R=wY>{D^qE^Ak4fF3BP#@on<ybjuecK7kM^R?H3w
zC~?ki4Mx_+DX43lBV%So1aaw-N%f-HqyJNL(XA(asV&1ys^0Oho7_%)ex~S#WA+BA
zZ`^b|E<Xy!m6{-Ia)FLaYJ6hWL?@1r-D^T@<pyVgXCPev-iHLyl(8e`H&_Oli8Z@K
zMxHc;h(3EADb;K6fNTV8goSu*ds_0<(nd$_tg`7E_%W5-6C62G=SLC)HIs1s#BPb*
zXA~qNTuh%vPNpCvE$L-#erL4q`3`2W-_l3=#34vDBb6PFY_VO=cZ6L%<?+O9Ui%q4
zvk#ZaMe(q7M&n>AnyLGRvN+H99_ML|;Xi!_ytMX9U!R=YOrI@wi8F0?A;($tz1*Lg
zDp&t3#$z!NAQ4+&{xZ@hf7#>3UX^^DSa<YkUqh*`G&B2)Yz{q*Cdf>WVfv#;zm5Xk
z&nQ`gW>Fu0JZ^?PU)uu3N5^RLV~vXyzGl=k`7KAyKFVaFx&xS1V)<vrFE1|A|FnPp
zLQ4WUNotPCnoe&PxAKD{8CzL+0cul?|5;;(fT_C(vrkdHFnJRl9;oR(i7^P&fTJ3M
z$`4_-VR{=Ht2GzI>Q*0n3*DJD+6s2xYmV+J{edpgR44HQiES65@H!6;b{Njm($sZ=
zGJ+&z86KfQk_64S^X7f2b=7CtELioXmrkS2SGxyqw%9}$i1M5gd^-0zTDalL!jDWg
zQ}PqaU$?qLMkZ5iN^1011W4U(G*O5D-QFPZSJbzC$WOr%j}P_1BD#D<*u7-*`*y)z
z#4@?70K!C-{<8}KtbSk+tQ)5rV4fzTl&YY{l1M9zU4POO++vq!1i(z=eYq~tuG!m`
zGl2H(4|g;a5=PG7IVW_jJ%$F38>rGt3GMW^YnnSVKq+|(-k&;c{qOzk<&~wN6gTWG
z^R<NG-v&Dv2pUkGR&i@ub!51{c?<P@-wVV}>eZBu8FbyR?)BMh+X^fG?+;3FYxB6T
z$SxfElX<xs{s=tV-0nYWFr+G%RaAYQx^p%o;HuK%PwTw|VY{PWJ5LtU`^snDzK7}~
zUTXPHI=yyTP5a@1oh|If&Z2{H1L6EQ!xH{DL7r<oKU!-<n3wW+$t&jOqrAm?-Tv}?
ziCSQXm8T8-6KzLFUE-ohWoCNDH~8@juZ~4ARup(M8B%bMp;iPs6(~hdbcf^CBhKQ<
zPM<EpHAnrbLTb06^HbdP@xZKT6*H4)|1g2pFMWkoLUpkAsD3O-(hI4SV5J-p6=D(~
z<;s-u>hXcVn`%;jlEIhmU`#1H>R2ti;Bn6HdV9&ORp9i6o>WTv`yma<GxQz)?H|tL
zb{DxG4j{$iDMsau2QulbjS%UVELgZOk4T%V^XP?v2&2H~aq2IOOkD|WCUaY;gaknq
zo`-#Eu@X_pab0ZD1Ssm<7tUf9ty*t?x?+y20721Usjy$}P0q%k=gqZUx(LeU&;a_#
zUDi0P%s%cj)RXj2CZQ;RCX18UI(Sw4W~i=h+(@(Mf4CIYve;rcVn*n=laF5C_Nf=I
zHz&gZYPc8~-ug?Xr0IB7_}z$B4%EvM=HW#BjWt&Uls+*hQv!5s_}hmZWz2Q^M-?@a
z)j4Hl>B@ev*ah$9#SGnCUl2c5qzFm1PN5MTxa$|O%tY$tFMRu&b>$sqj#3)3NmY?+
zxw>bsERfB30^8E;r|6mJ^2z)RL$E6k8?6U;?yl?Wo>b>P;UD#XyQVu(i{jbbE2zmt
zO8vomQcSagI~id6bxVRHa-eq{{Zrc0Gpv)nLMJ_imSXFFhsqC2ATdAux^~3In=<~q
z_MMG4MYkf2WoeeR&N<Hs;+<zPHCWF^(BmE0&j>__*C*ZB$F*CS;_p2<zefJtO>$9O
z!d~1QABx7x6=a-5L~NQ?!E{*-FUQ&Rzm8lN<#W@smuM(KbwHz-$}?S*cYvwp^vD^;
z(V_vm39O#)-}e^=Ql`&O&e6MEpDwxP+qf!r+rPH@9AV=1{mi$B)bHKltG7>;a|JO0
z&s}AcYFs4*)Vp-=sD9HOj6zmx$+)~hIWC`D4dD_P4d_;S*zooa^!1-U(%o4fQ{tCk
zWA+@*F;Umii8(71epTHIFLEa|H~+VU@`mdtV~RC%Tt0ZjK(Y29MP5_6epTRBx^Twb
z5!B7q;D}y!RYWtxVMt|QxnXLh?wZbPw?Ee!m9E~G{ABB|!O|agi3Dn&Ss6`)XxvkF
zGoO_L59@)Zj}8~!alP{=SoDKL8_tWo4|Y(I@VEqLcGu=;dV@sK>z%~9gLtRxzT${)
z(P7v{qG0h9FjTBJI4n~Rd%?d!-KCB7a=F?MREspSGXwzp0|wkmq$7I70nzItKIe<z
zN71%C&ZX6%-xsBV8cT##ez^Xa2$QX~OEEoq1QubtVZ)1RW+`^Y$<imd^6oG_4SXrH
zbFt{|6al}j9u=vx&~_Gh2V1RM;*xd`Ifb8qN7o=C8E(S+4JmZm$sR?LKGA<%COW?d
z2Vp9ki|oT4erhQg`gN_9&n?1YE9O`1h5HfRE^A&AsC1Z+H~PYd?1LFli<ok&yg6Hq
zaIW0f1efUI${wSzP7L}1B4RK~7_C(FUW~(NlnY*PPlt1|G+rYbR_K%l*wq%X5?r57
z8uf^!=mC~CEHn7>S%7D)d?K@Is#>cIL*S|5t5i#xJq5#h$xh>fcmSce94Aw@Z=j03
zx$^6y)J$1>IS|dv_B#&OoPX$s?kL^lJ|BOj`@#$FP*ew^c$vSjZ?>D|fiM;t*Qf~g
zJPB<brq(jpQs%}shLH|B4UUbp8O54{yTiWjC~Mt1;%9>$yAoEmsE8+%b#084*(i}o
zIzU7DPdk6nwMwen0TR;2oXX)G&l5%t+MNdtk)o#3zO`3T769o;Rcbzt@;T-G5{A-g
z+jMlzqHw_{3TqT!vG9-vXNVBy5C-s0usgd(L(do?_u)mYFwh^k27kQ&gZOI3*Dxd5
zr5QH<VIIT4Gd)gaK-ZIqckRv@PT%wXc4&wh`@dh_7ZT1cD?gc!2t4ybEj-~*S2XzG
zV^pV&=Yi(&*fWMiI*%B!g?5v{b0HivcXiZVB-!h`*oSt#ubGBd^|o3s>hL#=+zL+|
zziMXHZIjC>;QRJuGwmOhz=Ki-H&DVCj@RJMvmZ}#b)K4j@-yrmY;(0&Enz9^Vl(AP
z<3ei-r>Z))mIMa1S-%tQjnE%(J`YM+AMtG6ppnb=mTz@Q3s9{i&e5zFf3NVQ{YUj5
z=`w3hY23;;-rl7U^&XCVD3I2BWV>rD@J>%3i%*;o-bqH*nCX3&jJGMH2e>ja*8m*3
zKIA;N|4C?d;2ZrQ1sP54QM(iHF8h9u8+mPo%(E-Gj-VX6Ns&Fx$XJr*KIF=Dji=q<
z@p)#X|4rw!VaHMR267ZGR;hiWeRTfH>-42cS37O^TH2ENKdPgAm8=W<jG#S%VTro>
zi^p(s_Nm*A`PC;UG!KWi2Sq6y+e~mkTWU}v;2+i7!#XaBdEW$6$K-`8Mm3?JPQ+DP
z(wRJDkDSe|d+K10xEy3HSi|D}OS;rsC;37yDD|9&VI{=B=dTic(=rR#a=9qs_bb()
zE4hDED3z-xryp-0`ucgn1W0ds|51ep7X72rptOITUM$K*DX2-4S~Bz4<l9ft8=R6?
z;gnYS)$UCRElQsNZRM35_QOA_%dm}>icR~y_Fr-n5C2hlQIk|3{iFIy>0hq9q7+pe
zhU`&|nln(IXNLqayFVm9Ip1i0H%VFloc?~?u+fISHGBu)-5$+&sR_A*k^*R_{IOB&
z;j}dmM6xNb3fZs_3!+Ri_IA2pg`{?JzvVnC_VVRX+>FYkPInt`3CVpl=1fIf(JqPd
zs5w@boVDR~YVj?DS)49zIOtc<+j`1}H@05}5o{skiG00joui+hwbaPGM~PR`WDUwY
zAKLP^{cp9lgqnoi?N=l7I5ztFo>6`!E_X2V?u-Vf@j|xtsDi|41ij~#?9~MML90sl
zbJ}Q+=eF;jCjVDo(-Jg6PT!IyLt-iJ?;B&L>wLA25sWGGQ<SFL)&S8;?8K($S|H!4
z4}tF=mCwk`V$}5KbdIF+f)ViInA*W)?+9H)&Y$vd!$6(L$eKOUz7K$hTQa&w*2Zd;
ze`}fY_gu2ZJ7x_M!jZ{m4aKny)+m?$(P&j!4;qe(6+QtRSo$~?Qou5sBX#tjM*Utq
zc8xPy=;hIXP#1;!DA+PwyR094-<UEM|5|*y7Y2SFHR}Dp@LTEF=aN3TPDJ6LGal$o
z<C{q*i{ipFJoJd#xxY4Sk%i7N`|SrriF&S070(ChT7Y<gu?M|v+r+E1M?_=(T=(B?
zX?DcC#mnRV=Xx;;zj5f!lp1Uw+P9+rf&$EW@Yv1vYLKr&|FYRU#81AVzjIS0Cai#v
z247!m8;O;u!B~_yiH>+Ta(G0OZv3MXmZc<28eeYqCSZ>dYuv*EUIb6OY!NT(q(hv8
zNS)xsgXOc1!qP`*T#B{YL@|sTH|NSK?K7Ne+lBiao!GsDIUCxmt=9j0`xl2|WXed9
zWGw-}jVu%LX(wSk32%U>b?`3QY;I`DSlb6K$E9FnGdQ_Jh(GJg(@SOYS#mdOvfju_
znL`?3O9-QOORZgmyoiragtWSm;`KAO1Ws2(2B_yy_HttGc)vGuZ61_seJp{0<^ZHV
z2%J=o$0q7R<FeygoPwAZbzM*lXyZHD%0jIGnED6&%D%CGa_xnLX1`thK^tK%O>90_
zY4<}D^i}Ll@0N*wR3+qvhW6kbU$4adSwee3J!_<@<3oHt2oWGAEo`B3LLoY*PFuA!
z#UwWsV_f!Ij7RPm$k1g+3RSgYh<tn&W6TkLcV6kvevkFrBXYuAqRFh;LUbddwuH2a
z=-tJusO~>y)!zz#Kxf2U=E{{CCVRnIvD#0XmJ8WLe1CXg?Y5@x>_bBjVO*L90j*3X
z_*crxPu^}e+or?W+DvFo3JZT(3lXJUM6-FzB&(8pgk_<e_eUDKzBC!#jmiJJ9*(<c
z5ZM5+BWN@kl5`WmsuYog9|Z-zK}^pSEkDzGYf>!qjm;K;Iwtc|x)PPEG>Gc)CWeyM
zJqx-&Cfd*1+a4^k`O%c3m6z|a*G9<UGMXPhja`Y)tIMePKm?4xRn*udxUnyO@u5hn
zZ@YHm>I`}SYHfFjJp#D#5J-D*#_9+50bSmyy1SGet|FZFZyG&4;OlM4xH<@Br_XOh
zqQa+()}p9&CYVl(K}9lV9TA61GIg%b`UIIi>6dqJHkFg#<v)xwP<Scy8>6hB|9KJ6
z&?*Al5|QHn{Y%yr9~t3lxzM)3YyR3a&A59)=Fn)f7InQU=^xcrvVrkh|N76O+eh3Q
zR`%>C9its}esZk}@lXxC;{|K@%TjlW@bNSOKwxYQt*$CJerEDE@<JZ7v{ujdGXJI6
zx-zj#e^y4qFl+@3tbhGp;*dx~-O56m0%V(OHfZfZnEL%pu#)$}sUIEwqe|nf1}hCi
zwetF4$m8Z!snA_E{Wt)D_6`H0d3ul$b)q1M&Dr{7U;I4lWHC@myEqP81?MH+WKJ>c
zP4*emiv&geK9VLP;L79%f^d#g1fm5^8u9-j<Qe6K7tiT-YzBt^9yO~C+Mp)BC=&i(
z8R5lZ>t3W;#{Xnyb>GK<@&)L$FT8$;kJG6_k(B!sT%%`_zRG(XFZy>HHso9(JGB*h
zAhAgcB&|8~9!@cvM`60|O2~xjex9CqH@1bk)liBqZ}J7srWd_;eg6EufhZlfWRwSR
z=lwP^$~51dQm?SqfQVJ+_^Rgjr)7oE%kQI@4@fEpga`ymxK1!l8$xbY{iAaKY}3@P
zzOZgAlPU0NlsnghqLs^+#B|E?T5T8>YDdb@o^~Y_oT|y2BDhh>-6?@4WtA5-AhhH1
zkbjeHmJ;=zk%m4B0x}>>lK6e$t&f+s#DY{d5&qGV4PC^Q{!IUolTG#*QOjH?smIaq
z8}^w;?|p(>mPRCJy3y&Mxi(rT4!=BRaF0eiJiKIV$1B3=+<?Ip`8-OSy9!M;xxbVc
z@O=yFqx6sJ(a0gB<bSf_0#VAU+jsbQeZooKpzHHI$)<Su_eRgO*L%g<C?ihmhPAOr
zi4nYH-;%kE)B2`cWutuZaoncjPP+&YfdPP+&@x1y*;%99?V~*%)XZ;9RH2S*pk-{*
z+?bdn9hTYz;0-p~A7gjDI@`F>Gk=YZ*KHwZcFmQwyPsMY^32aa0B0jGcAhcubD8Ca
zd9_G_5X&fo-T^ZRVe64yZ+Lv|VmY22Z-Yy)HE*^8+viGtnc~mSrjv*+uF|NM?CR(E
zi@9;R$gWKUE*yW3AdMk|%I}s|Ecu5HBJ6TTe}&1U1jzM%tlLV-GZt{*c<Sp2)AG8o
zOPuC>h>t9hc~tK_q60tSuo++P;y$uf7d_H5&XY$1U^x_=`gp8Zms;TS#)@&2p{sJb
z)n4@{O%517)uLU5Qnwfx)_GWWGm+axaooeA)>dnz)+6j)&Qs1Tz7MHa>JS;1fr_`N
zbFl2HE6x$1MQzuu9nZ_2cCk|NeR(abgqKOJKX9sSZKOG8X`Rvu@dT5`m}2aSoDu&v
zW^9aYk<zlzVlP`1!{v37Uv4%K9Glv-<m|V7m=1E1G0xw7DoA0eG6}i#42M<14Lhi4
z2alnm|5eXj)*uSv82~qzcn@Y;(EB0cJtJ($I-<XiX-6I-PG2zbFzbZefIH{?g?-uR
z8Mi8o!aLa@;|O`>XZfQ)d5L&PNj@0zkTtPrgzHm&Qi8k`<OkpGgL1~!l8coxPXut_
z<A?k?XLdfe;hdhe+0^ru`ozxCY2jyaZ=y3a=5Ealzq5-shxI!fBCHf%JE@N5Pr!@s
z;hXd)^H8~O<BG)xuO*uNWY)5L6CWO@hL+YYWcPagWU6}futO<&I;Wg2rfb!o0MIx_
zfZ!g|w+}#f6d1OKbuYKi{Vfouz?}}a5c(;;_IxeT{7*Ky^qz@veit^SY~Z)qEMbA9
z0SQGQx>?n%ye{#B9Yf+<keGwbqXG_-QVGA=1H+8Y>@uLmg;;vER`6tK`mxKgA3W5l
zI$WXd>H3vOlu5SkuENQa(SB)Er149_u=Y>Etd^JyxJSHWjVg^RWXA_~1Xy0`(}Exs
zSxOTPqXnxR&%6@05~gLUgJm!p9<CsfyHh1#iz%(nVG)C#X7SQ7bJgPQGyI}88fadw
zb@Dh*%Mv>M$~TN4vQ)uR8*Z6Hv>2EO_7u6#-QNAI-U~QddMlB7iViXlFak~B!?ot1
zFgciNDqeT05$JrSWKH1<5Nox}^Na7!Mfvj!d5hi-7jSVMj4H0CRn8)H1}MLYZO74i
zQ*6Gl0LfLH!P2OCG&jPtb!e3{+T(Q|<l97KXI>S!^u9VO-0mOM=(9=ePf*Di(vY#s
zaSxPU9+o^QTeXAkwnP06d6GbVwknKuTIiR<c>IbLRNgn3!nVXSlX&>tu+5x8tFP%_
zIvfnv!Fdeuf31abz~(y?E=VruR<52wd);EM1F9vLj(it>1aR*cGaB)bd7&6IAT}G_
zg%J$Xrl;j=;Gb0$;8Xv7phX+)*~uLN3b0!iXnSlT%~F?DxM9+S)QkF%Tkx1Wu8_2D
zKnZ>Py({a<#*kM(Aja>KVA-dqac@RbB3l>C_GRoWi;`YuX5&ibqMHhAw84oBt{k14
zxae^)w!sH(kG_=n`uXM4A{FP=^I`8dn4RjNZP|$jiEEpo563%oWkzyS$JSGwAL8KW
zBWr~fDTGfIMPskt{o3Tx?EQd}k{5&tsLzj`#)N5=w|5>!`q(c5TjlY?%bDk~!{-un
z>wJ`b<=_3IcBa$Y)#36GWO7%xg2bzLRR;k&r?G1RN~S@d-I5kFALcX#Jsh)j#$69m
zYFAs@JI~y+FDUfYIdNE(=x&Nqe?6albEgQlg}GuONUff?@2QPJHiO?S9rW(S#>N9L
ze`l&Qr_M7D&nHnbyA%D;^gw#GJAr(A2cKvb&ZGRr&7k^CN;Q;z<nuwj?IZZrz&pkA
zgK>=a*h;NW-o|;gfso4ffK&78J-N+kw6k-n-T8`3in~*O_0p|*d^111Y=`!nAakN{
za%f3udBh}g%iI5>_=RotE8!LMyThs5%gALk|Av&$4I>*2&%3GwIyuSQ4_!<j`<gAL
zL^of};ka&>;2XBSID9(pew{1m)}`2S3wQeSFU#T9IxGKV&0-HABSq1-q3yxN>XNvm
zk$oPel!NE?x-mQIug2uws<fH=9g&WH(ytW@OhYKCO|pwl2m)ZD=l%9NbOzUB?1*nZ
z9tR+wDL7q&Zud}c+c8qs52DuPcB1)zJD9gSIqxIk5dGVEH{Ny5UG4<?35k#6GcpJH
z<_k~pM1xm3ON~Z1|A&f7FB9FvQMhjQ2{I{q;fw-I!Z5{x;~z!5P2w8GKd@QFvy1wi
z5}cKlfD^d0@B0SI5t<S4n2Ga#`v-EThYh#rotT`(`kj<z{)1fark6QFbdT&~4?<`}
z+?xTZd0Z=C-)hpi@uQm>?aMKtT1rM(g{C}Ca&}W-rl#IvjaVVTL+fZZy~E|IeXv|S
zF(w*2Vz2JOiX!zyD(0_<5Zp90d%(+#qZ>}al1(vA8Bi+;C#=3gI%nN)U@rWi%iTq}
zog4!XuDc1TZ`?ZWELTZ1>5SDGyPpL0TxI6Pps0fglzqs5tJ?9!d;?}l&Q(StPSE=|
zBE7m$MsR)<?wEcgk{8b7>RN8E+jV~$wG^VM5%uW1A_W^OxK30M;w5XiyU*m&2MWz?
zpvv(VdGG!tqeZi9>VnM1=W2W1OR@-sqhep{q03SbcQYuZuRUKgG7L8rZt<n|s74H*
zzrNYpR*!)u5IdqE9BB)HM(x{bt$~v@2p`q!>d~cYqiutiD~u3;w1)VSJ{}!k{+q(}
zjUx6-(IG^0&;i-%*XCP#D<%mB?P#>+y5pR>XNLh);L)AY#73E3;z_^Ebd3{Thi=?(
zoNzQ>s`=kDS<usM*h}SpJ^9(or97>Y$D+fzAIh=CKZx54Ez~@k(SITx%a7lKu|o~D
zmEo4`Iio>pf>u+i(#H4?GQQ|so6CxFRwoYXKb3X>zQO=n_ESHbc7zp8d1(73-}bmc
zYIr_jZ}V3H3+fvw{P*7HtN!5xW`R#oD`1xR2wGeT$&Gk9%u_V>HX3xX97_W3?(#JU
zI?8XmrbY~S!qL1O6aWy+|FNlTZ6lLZj--m5xnZ||L;f+GZ~W)}Z`wCLb3tuf*LM;y
z3h?#h>ZbZR0sO`rbSazk-Thty<)WE~^+sWh3o1QJM?A9XC%bi@8qKZ@vQo;z#4p+C
zNY$(c6kAlrnNsjj6|WgBa(|v7wL}_&bIAF#D*R3Cq+dEj{4h?J&)J&U>SzyR#HVvC
zuMskFjrK~(F6jRF)PLz;2oM{b)*=U!DJ+Nkx@+a=QKx{Ju97Izpc!U0Q?7RLaj@Hh
zrbcTDJ0U1-<yjFxYgY5;TMha=c4FGER$$^C;8oFM7LTWj4ip)K=wRWgP<NYul{wkz
zl1Aiqbu^yE)s%Z2QUCankvgk?Ll46B@h~bjrnB%WpM~zZLyt<ugjC6$a$(eON0x6d
zZ}O-)n|e6tbuZ;ZN7h{~cS6*~jPPluM6f}il!i2_4Lk*4{jIL{cE7de8K&9pUA<r-
zD37Ij=19@AkEl7)=$d2ngHN4pE}gPeW`C62w(JE9sSB1h+nK`5Z^VXB4SLrz?hsi9
z&%@%~!_NYw<KqpoJpap2^mNL3rTZ<a8R0r7gE9t?gm*m%&(tyOGZk}kd#x$h1m|E~
z2Rndg!|{b3Va+A;5x7_#X4RW90!){tKZgo%07i@X$!Wb_@gI7ys>tR_#r9EYN6ncM
z{c7V8K9;C{nE$Tds%+?5U%JvUUGh2nk>;^cGo>BE1-14rKegt*4V~PwpWog9;LGp6
z>aErroLM@MVt@p)Vt^LdMdBkvBFDaQaOO;SHrp+44oz={)0?Ve;H^F#i*yp9zs8-x
zV)-#$TDYn>BwuHC)JZ`R48hYq%6KbEW)ugEse)rmr0#n5E3_o9)7xBEFaH+Th4h9S
zv<4|i=IHsN=H(xp3D$+YkFQ{t30KWHe|uomfGgz_(1?5z=*^5f3I?}?Da_*6g-m#Q
z8^fk4t~Ruv_wyj=P09$i*>q@Lw26lCFMnyfELTzus!MZzbC|oC{VsDOs%=WyvbUOk
z4mUZp1__>m-Ypma3ptC($XfduCssym+2<-0h4-1h<_ks0m3{oN&OeXIn_P(u;o$L5
zYdZrUM^U!4cI#fPA$N5_2`ysD^GwTg-8d)XHN9!OVOBLinU30&3^lN?mG;q}gUEpF
zuCKx=Vgfs>(ka#oHz&iZV02A=Et^op((rXHfT)E^hG8<^idRqmZS;<r?pDG(N>Oto
zo1}Q~&L_NxVP|J_UA|%sAmL10zA`~=f~$Y!Ko>ex8hw~YOQd*w>IFV)XiwW*n|@l0
z(^wqPDtd-)pUg?%snzb5Pjl~wmu{v5CdJ3o{saOQ1qm%;PY0R)CJ;9t8<+}9=4d&$
zBrjBoeWHbyueREF*Gf7R9w}7v(L(Fn5L(3p(NLEKkJwI+`IaT_VfzON-_c^t#-(+C
zFEdg>e#Qs4^Ke;BQzv`v%<d!6_$JpnxAdSD!)M|x94eRA{6$>`u5CzPa;C)hX;&J(
z1u_$q3p4lGQ}W8e&`8$ENSuR0)?51)`!t<Lu-}=Wr|@d|%qWjy#mq-~J6ACnk6u5i
zaoo0HwWt177!@SGQFqe4FOyGV`6k;oaCIQPi|BO7=}*m`F%76Oa-0&IX0T=~d~8||
zPt=E0Z)@#!T9aCWb*{VW)@q2(0V|_<Q#-=%MtQ233C&~&YA2~rsVV+vgN%UWvTvq}
z9sEQaz`ORt2qj)fw2P$4W6^9UpGPB}v(?$NJ>OoXi%2Y^3~^xPuiuh*jZ$q4#iSoE
zy+RbH503Q<kK8V194@{OFF3Q@cdW2~>G9N*E8odS_&arErF7sa(HB@$UEZ76Zvd0q
zsK#RG%Re>|+c(hCrwTV6nhSj2pWdsg9W`xUB2duU9ofRz>edSB-HuWQyof#vrEIq+
zN8`Aklt4v2do~Qrpd~u5jTe>)%}E)27K7&VBD7Q6X?LwTOl9*d_7;DBb{_HJV%XA0
z#vBV1zX#WuKA!M5U~@lYlPO4@Zs^+EWt>)!Z1{|Jsu^JOjF!zYqyzS-i@9MSRS>*x
zg~s&RzU}B*k&<K?p9@TJR#94G7ucM?%Bfu0CMURft8NxLHEkL=1uCKbOuGGaEzWh>
z8*guy)HwX{tE1`k@eZmh+;-aV4fi`DkXVXAingYtW#lh28hC^~rg{8!hL}~iLu8{i
zv4)1%-(&Xc<i~mROlo}HTh~}kV+2IxcRg)1AO_x_=xEZKf@TYBILr|Nnd&?=Q`nnf
zZ$rsv0wD<h+tecM>^AjiumINaut#PhiUR&IuHvqW$s^j65=EywK*vVPIIo_Dh-imj
z?l)FW{GLlTPnF9in#Dyywg2e8yXY%L>G1xLVzy2%uhD`kNIOnxBL3ud`%7RQ6{<)b
zd5#I^bN&^->t53v_#UG#;GaA5kky|mA#t5SqR+L2oo;@P*g;YjPIgW>XUdJ5JSE#p
zwnfK}N!p7=;eHW&TWG9Pr0>&p_l2snD)#2K4}df$+j#TG%X`X?4rZD+kzpHZ`7R~5
zM^4+m-3O~cYq&~l!8BLo>BZ5Nw88aLAi*+hL80e!_Wx#x6t!i>s1f>T!FLph);bye
z)7RKc!<3#Qp+|3$egXqvCx^PRUFu^t`1iDJje<Qo(-zo;Tqq*Yjl9ZP(d|#8K_9EP
zz{Bf#&usco;P075{rTLm@5ASL?O*H@gQ70Ev}{Jh^onr(h`F6~*yx%4;8D;*W&uTV
z7zuG*rO612y;SEC$QzCJ3V1iDAW`sT^f`>8dOW()Z~NUDI6+>`EIMuP9?;DEYEy!G
z!diD*O^EX$?8<4Q)%-j<@R^SQVO{g+i1!NiXl0O)MqZg)ic|a$5_q4_1kk`OOS_m=
zYANmcW15ZnxVP$__`b^w_m9b$d9A;_p6oGGCe^3XkzGt67)>zfZ^>_mMAPG+yZW-8
z4LI5_X)7+@CAKg>@T@*Qee-c`$xggCw6%x~cCc>ekEwIQZ*02YB2Qa4cQCFU^!f13
zN~hx=og&YyD@~{s|Aw<2PvT1-aqFZ#%6YLDr#Rf;H_HTO9trvrebO)XX|L{$Er{D7
zj$D_SSR{?-ECJVwcs<8PM7@!UXoN!S3@xbF6=;{8A-TZ^x@M^hp+M|U#L?|)-h1vc
z0dAggAh3e>_djqb?!vRQC#r?r%NQRl^%cRxbhhd&SMPbJI7h*H;J={*YtQ2@4MpeY
z%kHq=u`b;-Yg&;mxHQ97a=rc#XGRvi^++!)2<M2XDG=5lj+Ozzo=g#um)46;EW8z#
zk8HA|Nt?|RZSVqbUW!{Gr#h7>vElyAaa5hbiw-{4o?JKT*dXa(Pg$3>VHW(T^u9#9
zqGeh*zV6%lt0T^WKv)c=OnH5pVVPCWR04dB{$o`|f8dA5F3Fa+AVrb&9+M{@wDJ(i
z{<*oJp$I2a`rYE;5itm8?AP@?5i!rXE*6Svf7+ptEF?F6L2<q6e(|((oWU8(@IKd|
z$E{tNHH?*;-1=j{cmQYjcwB2SCDc|6Fsq(S58x>^6s#(rGuz<O-B$O?RN029715UG
zi15Jn2O`|2FWG%tlQ@t)mhth&ZXU5TybH^CW^*~kS<GIF87?|+tM>hD9X-ttvh|)$
z>^5@brwLcG32x%d*rZn;54P+t%puIkTvbOkV2qmh1$4hw<|vx>9{0@Z3Yj4|vMM7-
zCth=O*BJ4WSAtDD=$q><^FBlbJknaTyX<j~A$wgcWv8)4XyK;ljQFydlel#2ld9sO
zgJ?EuV-wiGq-YYcN@E6850;J4*g`q!JB15mYIlF?7ah{Wp|g0J9E9Zj{$l3dEMcqP
z;%_UfP#l?qfOQEJUx_?A5Z($TB60U;Y^F8jekz3A&?Yx*a~vr91?wcfg0eA;Y=7#W
z;4Lb}L^juH3h8JX0}6;_NwC9@BxY3vjYc#7QFohu$^B{VvQLWUx<=_SU0_+X5U~s2
zemtd+5X)#Z%^14Nzh_NIJ-vEA)Id3L#fKX(Yf;d7chUG4FJ94xuE8kh8u^kIj60j_
zm<!DYrLn}axbn~%;AHF6X297Y4O_x8-bwT#pMj1g^$GN)S~}VTuTFZIDCTcMAIFJ*
zJ5jh42NtPimak5;RNrddS$x<iZYls_ve7R2>c^_+nGu}o-nUNYa^%^48!1|&LS{sH
z*3DL3w2dqj4}E<JceUIL`6=4UsJzH9i8eF5qx`mPF4$U9`qO+(J5E*uLt~-k#@(g!
zT>E~JoqBl`P>60!CbJ)3=OZMOp{WMR%fQz+M-SEGS#H1j9c$TK`jhkwQ7XmYT39mG
zbP@2u#OEG4spd_X;U@QE!=ryxa^hmT8Wz)=i{csKD&h*b=CbJlK!~ZZZI?JUK1c9)
zZjKcWvvQ(MPOdM1=KUk_G7>u?@U|%Tfcd!ds)1?RidoZ$e)LbV#H|WDY?3NHAID85
z3zr?@191^KNk*8>nKdZa|4ngkd|aBM&dvYLnPf9{14eumRA)c)YDjW#HS(<-fK`c7
ze2!nB+q$qwMMH}z;J1@tt4HAu*Jd8~B5hG+w?SeaynoCuXQA(xb#LX0I*hP^6=Ix)
z|LDuDR&Ly^_It_e^8CH~fh`F{WLeMtSmFDhZ<D~5yGq8=gQ<jO)h{tW4DzL!peHJc
z0Gy-DZYM=Loz!`gNuyk&-8S_E-3Wxk^Cy$w2CNB8)qQj9!aw!rG}3g<e7bZ?lPz}Q
zjyU8!<aNY!lVM)=hD7}o8i;`zY0h;c(&;yNHuOH!Eq(%7ky(Xt0rL;12+69xjD=F1
zS^uc+B9Hp!Z1?}qLQj`w^MTM@*&8)&NMdu5DOCr&^-5g#=}-lY^6ufi1>L)sqdYh+
z|4uODmua6r-@N&?`8R}v+<H~Hb3N6`ND%5A%46$Np#Xj#&3&w0atnCGkw09+&~d^1
z+sn@AH%zCfvPWT{udv?rdt5N|x7O+We@=2koe)3U{--yM&7!y351i`}Y)+bsdv8Bo
zBh<}!9{1}deb>UqDj6^m!2>o%uOAm%cHT`08;zAWG{4cBQ`>#;0Cm<>b4RAokOJT;
zZk{~AEZ`(N+z2-b|J(YKkE0cce`LG$K=IZT-x9Q^2FU$50EZWs(Z(#8^3BSEOVXy~
zY7<aH4_$WryTtMx^N^_r#&r};eieA)Ae1c(o^EM;YQmIb3^AN={v=@SXo{xz7k)A{
zR9&dtXX$y(ym?_wQety#r>0A|>-@0+Ci<k#hwk0{e%cmBcL68l>15v*-ME+1#@(7r
zDNx8iX2zWrE-w`y^8%R#Cu9Q-V2-8qrB9-?L^#gox*m+?KVRvQ7DT|DT!k!W^rY)e
zePKJ1rvmUd3W!FvJ6~j~?JBJF!w@n_K1G&D$T2JEAC-^=4MR?~(Bv><<JM1aN?^pQ
z?4DSTW2|ewlHRZ|{j%G~>`RytSUSWY)wyGBgq&XA&2GC-y5k0>ZxvrnGhnOBW+~#G
zng$8Ed@_Bd&iz(*YW0q|4^t753!J~0*Z$Uq;ZU41PN)^iBl?e-YV<2k504g%rMbtY
z6}IYcVFGhIpVbuYanZ9i1%Q#weyJ--CDNh2BGlb`55vbCN5XLKv1JO<(L5!m@hFi8
z83k>+=V1cT4;d0DQsUyXlmNP&+IHu;%l-=tHJ@Q5#hnnVEhZWNW|P)l_2{1Sgt3|f
z#copRp&GZcu!Vz+8=$p-yT*H&cM~3zT2o3rHqM87D=m!OPGeBN6ymo0W)a%7NOli>
zkBQOg0(O7(#RqPI8&Z$GVmsV?-&1b7?V1Mlc10C`vfp~%3(_3X(dXE^7gB?~SV75b
zB{;|Bhf>Dw!jpmtPtk8Z6<Kn?Er2gcMNGJ+Rp7{*Os<X>RUiWa)aefrW;pI6Xndvu
zm};FQnuEQ+2FW_<%{~Ltb6>BP)r%_0zKDsN<et5tlw?^N{^yWt+@zRkaVG5&m#)C1
zD{eRp!Ym3r%E>Qc(Kt#!KDSFiUXQ5BPmcbgB;nW*t1rpU>ZlYRsiU4Q`>Hc$rZ-^U
z7_{-4FH`ASw|~MHOOpf*=oPM8<BJIkDk5Y6q4c&_f*Vj0QHMKAzZcD7-L)+%{>M;S
zlc4v#a-Hf;legOsh6Fu9Wl{Lef;K{!H|?`Abr&GJg}kY-TMy^;92m7t0@^E;>x9{O
zeN(w|6fbHO`MsL<q$~Yz9FCHzZA4k9y=1KAvlR4OqC{Y74f|0{>s7@=Gj>m*k?NHA
zPreG;F#{51tdaa>r(RALVoNZJGnIC0XP-aj0pIsN0zW30ViBT$)rn`08&2d#g~_@l
z7RFl7Dnh)ey*gnt|F*A(gx{QKecT03lE-E2{YlrZBW6fUTLB$#?9(?*!a%5wd~I4z
zC&9L9{+M}<zDu>AYGX${1^u)Cs5Die(p9<?&y2_Jc&wq~G0j6lEZPThE2--+X#{@*
z(-+v{4lK^VCv{W&Hv7=UxJ9XRys#-S!GY_SELQ@y*{kEcA3^I)v)B~fMK|KP`fxm9
z`5{x~??8z%U$83vyn2nGP|df;;Zs+1q12cGEyHNw?4g(xH~$r}Gl}mn2y5mgZGf5}
z(Za5tcu(260op}s)he>k%dI&;RsJ1snMCQ_Mz<5UpFgF+P5jU>ZLS>+<!4)B=#4qq
z^AE1K7dHZQvJWTDYasiBDtdcd!KAN8N52=B6IJ{-+cI>Tp1&Il`lX_}HXZUlh>g4&
zTXH&5(YrQm>g1)Ox!!uaER$#aA5}!FTCCbAWPd>UU1y+dF>K%rj!<U|GMoNv6CaD-
z_(xTC<yz4WP5KhjEso9mM>WO&&hP*^8=ERL5cI+Ig#W?~WsHwlo2Zw2uk-q)kJ6LM
zdMVHUt4IFM66NG?&WsmL1Av&5gny%FPkT0aojU&BZ-zUdWd0a3fu_$^Z?VZ@SH%)L
z{N+?`eAZfu=ucDt@Do`bqKo=AqtOx{;a}RmO7It&e;PP5Vh`ANiL69^kO7^uN=}7U
zS~<5rIq1^*A3$y!e#T~1<V_Uby;-)qjG*s;)mgxCpE2}RlPrt(vhS^mm0k-Fxcbq*
z7lY5_<N2S<l|_EL*=2e=P9XdFq=&G%7T5RVg{w-#M9SPRPA1TbHxRzs&RRPDOe}vI
zj!4qBNUjj?4~o%h+`56}4F!;XfZBVNPbi9})8F1!UQOda?teV)9bUj$1ssim@urPz
ze2@d*{OvE{SNjmow}mU8lC*g8Ajf<C%SMAQQom*wEBeOh@$PPp1jy+7x=olu^wUqT
zDji4uQQ3&Y*!gw!#y?aN1TL-rw5?%``!TvjARWJ&jzr;%2yhuoVseumsNr=`IlOzu
z>K+1&jIz^sQzv2)pp08(Dza=^Hez1>HKN7nqv9+M&4mbtYXl2GMJy-6Vge2jHiG?n
zc#~u{Aj6lEfEwvR(O#6}4&Q6-1emV&&T$utIxT2&b#$G+yk3{U3B6brNQ|ooPfung
z@s665v^E&1@S~PP)q_~pm1rI~dEaeT{T2H*{sr=q?9|1o-8aFgK3fC<--}RGvYr9W
zk^Ad-zs5-<%E%CLU2rSCNjLp>>WRe>GZy3>O6hoG(_+wtlz+_<i8|!jhN@>n^U%T-
zLP0$b!6s!R*u)Jg!)^AHYYm6<%XqK~anf<N9O0ao_S)cz4_OU$4n`nni+GB64j<&)
z>k({hEWN3}Y?ilcm+BSb)%ll??}%B4pU8PSVaRAW41HztwTHoLn?=>Fqg+NiQELvE
ziZm85HoxwT&*QP8ZB`j&8odmy?$V9bIR*QA)j0K)E{B@rQ(!?43L~qwFg>-=%eju@
z$W||AS7$@g^-!_!F+R7yUW(%n01Kb633-&BOW7h|vnfU7IAJWi#$kAm8GQynCVb9y
zrRrQ4usdP)uir)ZnLGC)7}~`wG=N0s0oT7H>O1#tW=obsc)GJWK>K+<Hj6l|!j{rU
zbsyOZQB$rN%2h<W_eE78Fl*s^L1ao{vn->}v$DD3mpyjHvwcbtpFtJ{l^h2$o5WV}
zaQ?nj3H6Z>j2pojYHD}a{WnDLq(=*E3*40zll11|oGjLenoj%y2rcX28tU7{3GY3{
zDMZSyP-wWK+@*Tbvbre)j-$Wx7)Yow?Q?Vx%ZTIJ|6d)`GK#)lErVLh$pvE1tY1OB
z=KQ>RBt2g3Rs;R35UXheGxU?eAl%EDwwW5O68;+XCmNb552+=bO*GR*Z!!`M|4c-&
zck|^qw%>Tl_aEKPxh=-qGKq<yr{vSa5~F!x5QdD^UmP<<aZjAiM@(9&tXv;eXDB=9
zx@(qRja`fv5v%<ey?3$5W^BjP%va{Y`6#CMbHY?@u3@*<i$5ZE`?!p2ho!58H4rGL
zY}u?`Cs9kpY1b{D7ee4S=*gUr=S#CJo4_MpRk&)&;;fWRX|!YB*fKq(#74b|vZpVF
zy*Ekgvhl@GFTb|4*hB?mx(8U}=F`oTaxwof1Dtj^2->uCrR2BrZ2Xoi79RJ$ihZfp
ztxYFVBr4%8K0aJQHD+!#4Rv~J%$8mJyN0UJ)qAh%0?%5<GmO1p&lhIcA^r~^NUy#A
zRvf?S8?Exx*7{zjj$$(UK|>Flp^3{*(0;W`hV+~2k!q2AGlQsB+}5h-NUj$6SpUwL
z=mDo|J!=xUjo-8(!Aes@!WG6mFeU9ryj=?seBQX78s`>e=Et5-xqEcHvHm`OYmt3#
zk;e7Ki0d^$6kGhuZ05Qx;}mJmsQg5_YyW{Ycad%4B4y~Mo{&8E`qw4k{*`!_R2sKZ
zGVSxUytCH_=3b@0Qqi-t(yEkAmBy`@?yv!Iah?04t&o>O>b4rwj(pBn)sW{y?kB_V
zWO$>ow5b#ma+i|Z`-dJA2N(YA69;!*jNbk0cybV&L?1uuGb#D;hg*VU+svNh4D3v1
zH1U5&JQRqQK@TpVFZaFAKdPrEQbF|v2Ahf$i!mwmkj|re;UzcIY+r~?msN?6f|$2$
z0;_tJR?*Jkhj$@&6Q%8ZKhnjEaR!hZ`O@wZ!00{MF_KoSR_yHp7RW_>L!Rc@UL;2&
zu6<P`;;OSHsR#s+4LnKKGSaQrIP(y{)>9z^taX|)D&D+<sz)Bu%9XO|q5`e*H-M-q
z)x6T14yexay2M#6bHIk)6Q{w(Ewn5AxTV@->wiR;BRRR7*>>F6M3*n0Pa<Hy*+&|G
zEQy`a{zH=~uWF(4r?=}7FxC)TW=7*|vr9?pz6H8`6jMIws{d^;dpCXZ))7&P>#3or
z=o9KW`I6FyLfpMPrUub5>;j&TjKFt9j#gzBFv40-hS?A|XItz721{w}`K9Ss!HWY<
zjPPAs+c>)`B{-oLVXvw-`c$}KPTa7h(&Zb8d-@&`H)*h0zH6)IH@hj{-!jkU%An_#
zn>_#JlP7^?tOPr`v&hi2t0l}J_)>ct4E>!)lwzeH)7Y^605vRB%mqpEwaCQ%ne1I=
zciA`Pi(M&f(F-;Rjmq=MeS@kJH{WjE)0irQr$lGHlf~%+IBvLc&v+i}MD$=8H8moI
zIoi(ZN>hESbI!#JlPLCa%}(G!)>7Y#%QXJ2w3P}}jboz4=kQ0Pe(C?HaA2&R_-;=y
zg2$Aj#fEoWONmWnG$;M!{9?tzlNPwIZwVQDoM4_t7^iK7W<wGviE$Ces9L*&-TQ{j
zEP{l5#E3oUSCn6Izy~c`x)-FW;!W3NZ@jDt(5Q}uVHeU)8JPOX8t$9D;!pC}>rJqD
zHC7uDDHkq;^pkC{@sT#zW;tpadYiUbcYsfg!X_G9wqL}C-%~i)qd+UhjyO}AoVOlE
zoEG-Fl-394OCA;QReIFIt|*OTBx?=v)!o#^hlLm~tMr^j3h5d2lJ^^@%m<xy{%E=J
z2eAw0hOhT?2Ui{kt-Ny>Db5+}gg3eKykakRBHMx2=<75q{!y_x&@6FF>^Y}YoRv_h
z3)&ePu?tm{$8m*cZBmf+L==A}`ndhj^-B8G{))0uE`0apA60HU%cbol`(LsG^sJpx
zUd@usvi@4XK|*T%%H*?I!%}yNEJ?K=o~cJ}3(5%^rC7>y$=FMsbCoOQ(}UCQ?}y|^
z*?R)NM49N<X%F-AAR;G#!{;?=`gv?thvd>$^$)22%t4h)#1Nm3hU$bBQ8B3RRSbSg
zZ6Wji?{L-(Z9>vUr!BNBF_3R}-oDRgP8$)RX@srR528O+A1=1Y?dF_PTj#MJH4?mp
z%rm0p&D{!;HexqApq07^HkAl}nKGEnYuGCDhA)zR(Ozwl{2<nR&Ucae=<T@X1)5*w
z>!o<F{LP@t$D>DoccZ%wBZBBo;p&4VHUD;td$#CO%)76bBGL)7M8m1PjgC^bi0{3N
zZH%_n-xT+PkNz4gH3kap_QA}CwqSN#6{JZ%r1wKg)5RsLV7F#^V`=-!lS@oL{O{m`
z%AM~6rj$cu(q$h^6DCl`atX@{gu=HByaQX_?m0jF)%JMi^YCm)TF^?a5eVa6IJl7c
zu+=@kt7W0BK>7{ZJAmdNRkVU+1lL3Em-U7J{i90W4;Qc|^9kM@K$VmHmj6)!@=3V}
zVVXT~@BZQKgqgXWq{|<m(?UwbU@z13t@8hfzIsv2<r|=eJEyU_(w8+hLDFFxzei&E
zCAyD}6SFw+l&>-kL`m%hJu*F#bHqGPmN0Z>5`N0{^)$Rryd~nY_U{##Bw5O@Gq~~l
zUAk<u)=rYVh4Dz?aFW4wy|;6o?aq8M2iAn8eg$}0P=<C_C{~u$_+rKJIRGKSM;GCE
zS6imDx0nqPlRoxGzNHyh99A1JQ-TxX{eK*thd-O|`}Vb;(yG#y+NFxvqeiIFZ3UtB
zo)HxiYVRsaTZGCm653iZBDUC6OKdF>D>Z6VsZpb5`Tp`euUGzpJGpY7*Kr=l`#|X5
zm5~<wZWQU3`HrOOS!gxkK2UBErf(wGg_xbYOR_D&TAID6I8z>?bK~G80(w?6KEsm%
zLmV@Fddo$8f7%9V9;0d2X4hD~-9g9f`>o~yqpWu$yq8A*<!F8o{FBQhIi4~dvbts3
z9^_)Gj)|&dY!n=i9~Af8H(iuO6AuSNHSe+dG5ouYC;U^<+SXsw7*~*P`Lexx4`w<}
zgiVSUV))0BFOmp~;+Jp!{>)}@*xqscbt2+0vt;y7Rd~@@POb%7vVRtSREr^On#}e3
z$yc%!+^)uAnF$o`r4ogGf#-)@enU(BjI9Zd8mi)vQa=Z%0d9~@McG!^-@)-4=xDA^
z((zKVTX^QXk&ON>@S_)Z;OtPV<wEfqIwMVG%F<S{a&6KDehp4XWLoOU{Jxl<7c~#!
zk`8N-t$yUY=h6OT>E;v9Ru)RvXR8qv1>SEXc=q3t^LH)>#(L$xP#%HvS{TKLSU-bj
zJU38mH=PgiZph*JQr%Yic}MG_@8R4_*F1-1-H^VqkWVEg_iM#xlI}yAR^Qcae8BjZ
zo6X6dyNfQ1HJ5f?J<}*kE--W%z3^GzSW3X}``OXm>EX}#uG(04qaq^ouEzROe3IXV
z0n5DKE~bTC^_~`H|9C7M4P&J?#Id|iTNH$OP{#)`Ui=-W>Wqx*TT}akJ2pE*df<j1
zDPnG=VvDc0u=V3ll2Yy7(C#_sttuwD2!?eIuvUDY?i~6+vP~CaSUrYnz4wuTFnis-
z#j=NeGM|K16clp#lgy^svo92u)>+oh9nHVjFmv*Vh%hM-^{?}KU`&mog{kJRji<uS
zvkA;KL7WC1&;u;1xtdDjsGMea>O~4CidrjQx<FRW`8ig=uh%dabe0WBv2)X2N2#Az
znZ$JX?No(icQh!g1|w9@*Uj-Y_x}k*s<)xp8%Nis=<#$(wP~w(B1o-Z#OhqL9+Gk3
z6<?ZwR*oQJZr2;NT4Y=PmVdo4Yuvq4$17Yb{&?(+a`~M^K?am=e0O^P`w+<g1PPMB
z%UZ|2`X5vB%Lh@?x}(h8JUjZuW>zvk_X7)^mjCldmHcW%zH+nT^2$9J++{G%O+K{=
z`so%;ob5!JNjiKoeq(CUaBhyT`>@xd#a2nwB<nkD4=?y5sdHp=xa!$?9Z#|P;tse)
zj`fv3$&vTv*<+MSTCjtEZihAPa^nx|ZvNf^NX+ptP?|_EL6u;o`vYrCx=*sl6}CMg
zmR`g)WO*2LQF60ijWcJObdq>kyBu9ImK6M0w9aV*MX~MHb>gb<C=l?~ii||yTCrs$
zZq{yy;)2kY&vv{iZ~nq`rFbvKW27yM_T#*h7nBz!T<Wx|r`+U<FY<dtg+T$B!f$gd
z?v|@z04py$RVm*;Arz8_V6nf7NHBX^kg+?_pyG^moT8LcR@8XwD<5%0cZGQfZB{aY
z>nx8-0?S6Q>Q*Pbp|$c$yysUd-1Ak>Y8l^(;iE3SD{?)r`!0Xez2@_D7<<~IkbQqN
zVhK|)1yk0;mHl+qX6(7dzk3b7m7D#~loxGa?*D16z_Ky5SI{FvrgLr&H2^8u3TWQm
zXOxcgNN+!0H8*!MVT8i?%|BU0CxFnz4%N*Z2xvjPN}iLi#BQtItw?Sk+#{1T&jV05
zSbQr}<AZVxFh=8UOeO6f^tbD$-4qB4zNh0<Ug2+L?a=S}X5j5}tNV9M`>?M~^)lXi
zkIigtph8O7`txPLMv;${f><Sjnw-aU@9t*(LsRV3l%GO1x5(bJ>@xWuK{TzWQ~HAT
z7k;zQ0-{4__B+c)C8);Svc(&r^o?Zj%11_YNJG|a>SEI5;Al{HpOvU+Fv~CQ#bFpx
z;@jWb{Ex+MV{AexH@Wnh+<#wWQ8bZ@ed1pc^^8}rb9if0R6O1Ndl$lcP)M$}`0BAC
zO3%2YVc1YR{k>hElBjOZL(cTmD$s!F>U>?&<pP=SyN<4fqn|9sKq9vh*C!j~#LdyR
z_A+L#qAhIqxoY=%zK;)Vb#w7)W;1sy{Ew-E)AzI!A*&z<v2YJ;3CBG_OaaZ`AcQX{
zBaE)xr!R@e>438~zVhM>b7%Rj>N@M^JzD+u=*lhbqpAEN!;-dEYNqYp8y1&-m=*q&
z*t#RKnfsM+6Y?rfy4wxQcl9R_=<dJYZ&B>`m_8jYM!I8Lt4kD~+fJf9g8ri5Nd{e|
z)@w~E&Mb#3N4hmB(EQxn<TEYLvFD|)<STTquc9=h9vtK>ERA2&vgyWUQaQqTdo&~#
zchrOS;!e#+uZK)-gcq)1>deLm1tXr$wr(+QLOuYgoe&FtKdJNvS0VJx`VT9yNoG?Q
zpheCf3YTfsD4#1iE^;A3`bNE1)hW_J=1DE<QsT;D@(0?U&<59o$4_?tdK_Cd?<Rc#
z4nd!qp0;PtA*f%YtV4hjj5snMg^SgYSul&TQ3@YYrNKE3Uoqtw<W}uhexfkK&cVy9
z7u@(GUi;Fs^)cBt;Q1q}IBWtM_Jf|%U)Ld0xOM5VmDAn*6AoBm?bO{Jh+vtbV@PIn
zt-M3u1GIAKTK3-bZ`BXv^m@co7{^n=ldPHehCIE;x(ovPg<4eoD*=d*R;$fjqV?Ts
z9uXafh5bHtxtf76S&#r<(adSo(#CSojR}an2_}~}Ts(KGkQjMx&g!;2c^6d_N6uig
zaYS)Pod$r%jnF=6dMO4yA*~w4{l6SQn#wdoqt)r1c+iak+(VR@ILC{lvW-+>is<FK
zr+D8+wf8L4FKQFTJ-#uH4n9zq$u={vL)aWM@_a%}U<_}4bik}#(UZtmbb1QC5cha-
z3DBU-{?D~w8X%m)=DjnYfxoHipa4k3s&&hTuosrF$8;;zQhZg{M+>_ZEbnin3^`O6
z%fg!*SJgG<cvA|Lf;M}M>neqEcaq6I0-o=Bf$$I?!5x=a$fdVH!Lw|+8aoDr$lWku
z?tP%$dTwjp*xI70$L?#vPelfKv3>fna{`$TK<;F{B)TkPUq)6a`XG_(e4z<+>xZK6
z<k=&2TTVH5L+9`T))RfX87+)gDyf0ddd$+ogSNK%PZC)vP$xqxw8s23oMcp+w3f<o
zQC>m##o=PjKXC{Ts{-~0;m8T@jOwYPJSt!=#1!Zdtqc~ICMhIJmEKMNk8C~pAePE6
zXD$-vKcKghZZ8Poa@`u1Ce>6-Ju0=;+b^C2w5CMeP<R)rb#SF@Cf?FmmtCZl!M!p5
z^$$O;?|RW^i^4YVn|s?$zHZf}f_t(;e#)Go@eNfQev|ZJX6}Bo+2FT3<PTaxMZMMu
z_5lyphQKs~yf!W4(#E%KgA#1yG4T(;6H)oq@EJxz%I#G|^*5BB{Q9Q#Vz}guUqcd(
z#rKQN)`FqZ1_X0P0tmc(86-%PNw@&6q)VpYyh%9b%OY)Pwh$|LNwPyqZmsV>f&NSJ
z$~sPr98Qwpj;*0}FZaxLSR^F9lKH064gy`AA>rCvCiQ(A`E#=T#q!1LVxessE*VhG
zNu{9ONA((oBXu;o$~D#gfo1>Z;BPzKcH?5_vnCI;me#7Eg`ZLiS<nowxt+{29pYn5
z%2Sr)Zm5M!F!Zq&;dk3%=d!J~m<{fR`rSSUJ#2x)_x+efO+8hl_;74xG=L<QU6^YJ
z)O%%(hK9G#Dq-#N^Ff)#)$MoHX!#!_Oie1jZ@>Ot(#%I$B&^A{wYt*`rf2rmjkuzn
z1A--#(%dbf3b#yXX_l#FQly0o^tb(67#F!BR0{Yy+O3+bc!>t6DKitm8lHsoZ%pW6
zET;5GP>G6Fij~)dK#=sRlXYv1qg`X+AEiq==~DXxieBC`94N`4O@cML)+$UY#m>gP
z{m3&x?1?3O4zY1&2k;&8uX7A$77TR^QEibm$(sE~Z=2=yP0&(ed4jkZPFX<Z6`iD+
zrRiNc?qec48=dYx-z$<U4E%c<W4c=Km4-6d{<{4jF=0y~-)>cHe@;t(6+e6REn|G{
z>Hz&(95FFH5gVZWb6JpN*!B5>(%k1NLS0NR?M2-xVdRj{YR6;#$0Vd16|=tm_k|>B
zJUBR_2D|GUY<pjw`1`oMWqGZyVdwEdU;D7@ghuAhqYn7FCGM?ExofhWiM<oy+V2pN
zZn+#a9F`ez>l0tw*=PF%_UZ77bX9>CK3m%B$lj9iEZZuFNxIxQWbRJ4hq902IbhSZ
z2aXD#W3d~Hw;#QjvQu&XoABQ^g@eqcGD`nenA|U^@q>YRTW`XjS^jq|D}Y#It>tp0
zU}UdMY{lUnbt(svwIfQ9o&O2s!i6<xF!GJ_l}8S@?iPQ0`h1tJ{43_aF!;*>tOuDV
ze86>bP4;4qwc(r~WJ;u!oo_{A;^Dw*dsVmg=;ixX*VNuTx1VXCyl~iw*uF8bd$iPB
zh%yH}_M5F<h85hjqU*gO0x(bTKMqa<!lFF1OL?V*3{Zt)t`l3?Aj*1?fNl6$Oj92w
zT~a)0XOWvX!>W5))0^#m-!*w9RIzETd8xV6_0hkaZ6h=vy7JOTG_z`p0;5Se)41`q
zAfHr=l7>{icmzZN@6mu*Y?xEaZN=4%>!jpS-S`4y{MxUEd%9BzmOhSpsx6u=>fv{{
zt!L1mVIChq0T{H!lc7*Lv$VFLVU=eH>OI|0?=P~9!0I1*#++CkTN@M+PN6xBJYUn<
z8)oFE-UK?_XFyBPJ?<a(Wy?7C@&)T#?6mJ;%){M-j=g%HDOm4cMC9Nz-Vd5&U3-<N
zgh^M>-Hk{8asuial|B)|-Ds^pyy(#dFh{CKwg*}y(^aX~Nj~lh1B-GFdHwc`{m=vH
zHT#h=%SesPq#eh0Rn^<nlF^xM7ndlNkU9=3Ok}ma#=LF+Vr29-qDO}W$s@8VCpMFh
zIuu~_&Pxw`3aels?3kagvmd{RH+jePEz9(dr+t%IN$cqPimVm7tYuE4NC7EhIAWF(
zk@jah3ff01P=>{`3p^{1Bsw2<Hyi>7NV!bDzB}kIO)e%=Z?;%0iPH?a!BsOwgPxeB
zpE-zT6#?Hp!>VZcTUd?L?XR)Pe!86_g#gimzYITllaU;4k>v<Yf(TV~6WVGE+w>E3
z`(TW;-Sw<{ASOT1v8Us9Z2Jf@TA$WkGP{^7j?D%eK`pr7wzak~NU7FT)x}X}v{1r3
zCzq`_lfIUbBH>2>i0DwQjESwmptZRk*#Il5?T1HtWe5p?tWA`|j7xoyGUb?u46q4P
zU3g2UT5k40&pR|9NIXFckp#+Xc~WOjUF*opE|rT5%l8iWm)|ubt5yo98llvhg?`O`
zP=969_*MApYRBG(|1qV0`0H~<e!+hDb&(+gZJj>2ns%u8sq7ZF#y{f-<7Qm&zxHg!
zulKZ8wX?I2)Av5`r}F=sD5YXJicL$Da;%k+zG5K!m!Nm$P;_u>K~zh~ekWnijC3Jr
ztxf>3aRLP-rdvRL1QkKS4IGn*e{m_qbJvru=h&;oECO4w{6V!fpFigbWf0-Fn%wX(
zZLyrJn!j3q7_uMS{ET8w;^ICp8%aA<Dli;RA2le>M6@1tK6v@LSi*EUi`d|Oi+ojP
zM0KgkUG#<5aYkWo)o^DSFduHPeic3*Vj^STY4flpd~d$;@dg}92%=mB3VmyBpLq9G
zAV1i#;ci<+hw)5d4NPyONE2!Jz&>h4{k5mn_?7;79gfJ5&!l<z(AZpKcedI#M0ok!
zr`4xx=aANnJM8eCBojOJ4Wj6>3>Qv1yxZL}eRwR$Yn3Kxdeb4S18rSF(X{RMqty#P
zM-7Q0I($^$m?%f2;HGn=#CI+IKbSyn#Mhb}Wq04ZN=|hf^Q@_9im3=}9$;25@SIWl
z77<bl?fk(VWTa;E^qkCL@#I!ANPj-)cmA7nj+FPIwuc+>-uNf`M5R#-BnWj`zoUMB
z+9VxAqT1f6O5C}WK!YYu=fmX2nL!UzPTG)vZ29`s{Vv>l_z^GpW!TIbC%I`o{3<A=
zFtwd6<anty+pZtLYTGIOulQg{d{HPqt%1pp`cUlmQp`{-E0u&tJECd>AKlhHZmjQ{
zO0^^3QV0qvE_p3lECHq{KGRtY84`qO1Yfm?`Y^!$i)%=!q;hDK<lykLRJ>pc^WB*>
zb&CKnX#4k*fEtlvDBv<SA9FImyVE&8-+MVwl4T#(JhV>TTP_XPIBoD;1EtSaQcm2_
zMvD5Dm<$7<=*MzWE1el!<dl*VJGZmh*s|HrTg;gOr^wOQL%EtS((l`HtSSuKbQ6^%
z@Ie7Ic}*liE4RI=XtN*agA{F3F>1{FKHWYk1KOEXu=0ZlDZSBU%NWV{{ddTJC-%O{
zdv;cRCu4f5+CUmtfhCE1X9^(=l75Jdu2sT6#DOO#zZ%!;ZryI~-~I!2`HpO^LCRSV
zhb*cKZ=c}(9d^PrCpZY%w<BrRZY8tE{fY&cP<F*VM=tI%0rX?>s970&AJEcCwYy#T
zB!=PGG%hl8YcO`#g*8`9mc8?H?@Tz>n446wvQ=fTj*((eV@=ubu_TQI^<VC(+VLdG
zx3bYho|Rd3?C-`~!|JM1E^@Z*FQ(9#Wxw!EQrSpD+rC%<!=2=&&rPNGwOmmP&)BH|
z=qe_9SX~L+DrnreKASbs0q?giMZ@O#j&>t+7)~P2;a6=5NzCCd&JIccQg>jQ_1BRb
zHr4pT7G<>Q{E(q8CUMz6%Xn0HH1BVS+EBMe??ieEo5E&UOB1d^2k#QEL%06h-@5$1
ze;0Zy8>Dqmxtx3``$RN~wnd(ti0xh?%_P%07Xe6Ig#-f4eGyX*JE-|8_=H=1)SGy3
zyGV%BwrbAFcfVV8CtGg4!*Iku{PJhI9xghb&&#lkunW3}H8X=F9}h4_gw#nb>$uCz
zU4kTM|Hw{quB=R=h)%(CVdc05U{3CNxRx(`xxXxB)MP}i6529jx{U7|QTpar?7zaX
znq7CsUQF}%=y_-CHQM)Vg)q}VzX-4O{e*i?oob!qp_!E}m2!t@Ap^+*Qe|Im#QySh
zdcK^oP7TE=C}99i&u$r))q(Bm=mEth^eq?T(U{5HqFBf^H}095rUAo|$^#WiUHOSN
z&-~pdH_)<&1#Qgho)BJZnPxuS;>b-uWKkm6ncvL2gnOC9;8uN}MGB86(w*-`>NZH#
zeC(2QI_goTE3#&f)(P&u2$;P-vdE^Z+maGL>E9pvO}ye;uK>`=uYReDbooowf_<VT
z7FR@2E{;1@Zmj(0N;T=0b9GI(@V@I*$pQ|OE~*stoBNGR>P`;*_HwK2{g-yoGYM21
zUvgc7B7TTmj>W)aN5%d*{V8^xu$JQr!{3O$vp&7w9LcxldLt$5&muwEGwHlE?59@v
ziehKcW{`m)EA#^u`1)qDV|EPIP8SOtHWII|mbLzSc{^<$*MVB}fD>D4$!42x4^Ll?
zt}&18M=kur4a+qcb+?gr6O_QT+iAsAh42c$o0481(0YLC%a#1l*3`CMugRG?%od*L
z+1QqqHZa)`skO8N^d4~&i@qKCz)?8JC(R%z0twpGAe<H(NX~zJg?!T4Jq|tOtI)_t
z8u+Mg=t7I*qn=h7^j}ZAAL%*KrznvU+N9$%yQ8j~|HUM4e~}H8V(pW+suLoe#=NjO
z<nIHmexSIs0vIX#<I1;gO#`(gx8WCvjE20p-Pbpl!cyq(W`uripcvnFl^NkwtR8-s
zYRI?|aHKc5T=n&lkJPoLWw5P$o#6KI-1%c$Xi(F9X3Ni=W=b1d3;H2c6yURke_brC
zssF%cE&3|K<~K;OZ}~8->x_$V6%<hm0pDOh3jeBeKcoF;lqo~PetRE3V)aW>{WSCJ
z>n)pYza++c*!g|>)N8Gim9XPGiRldQZ5adAcHB|=Im)JU;e&Z#x^y2Z)FBXlxSP|a
z9~koGX~^6SqJ)5J>nuy`ZbtHs@RW8{)!?iFFQV#(&u*Ly$3(io;QLRxRtXGAncnCy
zB__QnSSU{1h}9NYH^vp}IdUWMku!^O@(FSDFqb%B<?n=jV_t=~vdX*dwxZzu)(++Z
zPk70c9skNz6fh?+P4U8&<c;hjSHsG?<*wk+Ii8&>6J)m^oIpmNo-8uC9P~+1@HWkH
zKI_g{hjE=2$D#ehl1X@wUxmPx*H?4k5kTw;3ti%K|90lme>U;r<t03SWO!4Ltaa4X
zm0gjMZoWvSdELD^I|_)-64g$;wD*_!E1ABsjB!BNtu-Uoy_Mny##&vt7gak4Zoq;?
zjHAuC3<7<@AfJ<*v$<Y2Dmox&?qcX1UA<KWBu#uGwfe%nnyor@_}S$JQ%*@j=W11G
z3y~fr&Omp(SW^XGr;$3i8C}1QQ1G|n@EWQ`9)8W_<TbpehVq{1;S+Zp;o}^Wol=Wo
zgnjD7p1Z?}Dqu>@#d66onTK6gvmY!roc`32l}(btFaZ{My?u%{tTEe1wf%B)JyI=6
z@LtD?hMTs$Ks}2w2J@y*uQdDtS*dyW(F{??w5XkAY|2Z4$ui{hCdTNYW_Gq>P-?#$
zZ)QSO5p<9w3=~sK9gBX1_igT*ITo%yedp#dWn_&&!9a!5p5N*TSKK@q?SccqSErwf
zeIytVB`zEzJvw`sU10|Wz2iwl2dfA2EJjaNNVg`S#rS%Hqsc=SAybC;Syy}PyK;>J
zY88|A>lEZWU<5t$z5cxpxYh4oe;+9TKSi2}`_yFv7c5^Mhqi&|TO>>2j9M(nJWHf?
zsx{g@rQY24hL&BL+;1r!E0x@;<}f<kAs`G8yvfJ8N-0!E-P;#k2H`^i1`*NkjHYcI
zwi4P@-iwxUWb^VYa+bHf51^VLDb^caqew^6UcG7LkY1g(qqTP|;oMR3$@_rc_IEzC
z+`F<O_4&HE>Sp*HAHyyS*8hl!2#Dn)5%$lf5+o65ME|;zxK$XFY)2ZwM3F}cD`r%B
zZ^_hPj?nVVltTF}XHsclFeNh8MyDuaY#IxH1A0iMAl~|>dO-M{87)n{=0eg4Lo1=P
z?7A*bi5Z()P6U3a2&yX|-%Dg%GPDXJwA^mW<y#baO=#%<WZE!T$gg8~mkFy@**xmf
z7d&nel!8ZI8%du>3NU0GKD~+|b+bB|u4UQW&C~WKnO6RAP*>ZxS6&Se@c7|+wcLl1
z|1quF{EtaAX6@^#;6K6b7>1&9%AlB7es~>qYti+e3-~ZBSSG^u-!NYO$2~1??PvVQ
zG6=N<%FgW9pSwBDL>gWqz)tq_?C<xlN~fh7f~HCVJb#sE_6M7($~*W!9^Y6mS8IGW
z%1F@s5(y~z@^tb)8=OO*_*x>MfAN-$*rH0>q;n~zW{N6X=By~Q#G$sz=WA~CpaP`W
zgsYcFpK~`TXlAEpl=p+Q-@@0@Zs|)3Sbwh7<FYA}>ghzc-*rEThdog16ALS1@t&AY
zP!#!;Qm#%aJ*_bh2K}<w5EAK9)U{#%=}<0l^ReNWZGAK&(+hHb5>{h=ssxE7XM+~v
z(J4)yNIp5>waGK#7&ulStoHHK+`UocSwn_kClbo$?>+kU)`0B(JWcWGKqzlVia99x
z!0(*dRpa{RQrW+q{p0Z)nV)n;d&OE#`0B&pW09T^EvAaj$xh4Xp8sl^U%_df^ixOk
zp*<K}6a-;+xVRc0ogG4}fl^^qEu%I)C`CC#50$DKz6>`o^@UZaNCOKDE0Q>J%$;-N
zjlM^%NP0@oJ|x4F8ju}PqfTPx2>S?Nzo@c;-mD7H_kBvG&!==x6g?to{K&zu+sP_n
z)gemosQ-UVW(^-xQIF)8j|^@*2f&ID*!voS@2qnjq#N!=ciufoSIQ`f>W1B4A>TE9
z>ux<&7KrAx5I&gej4K)`)wD*aQYz|i`q`=$d3Q?IY4#~MzDBl0y0_iuoBRocKeh%j
zrLGeF2%vGn|1o{j0OV$m3AkNDsXInO{mUnTbf}3YU;1j_^y&sW^|NvV$^*1#XjCl*
z=QN;6{Y%Z5xo4<#>W}Wv51E)}{MRvd$}6V~mF)9e=YzwQGvYtVv+9@v&6qd;V;bXV
z*>>4)gGH%~{WieJ{y5Wrn*K4RG;?G{d$xG3AJ+45gVB5>wz}qc8<6>Qb^4~f-^px*
zAHfqW_Yp95Pu#Ymvu_LU-?%iEU-+xZP)LiBzP)^FZaC1_Vp9B67v5PGAmWl1rSfVR
z$q4YSR@2P?$8_d*vu>4FVuLTPJGI+a`?T5=eQ;+sk=1_u_Kism3V4>;S$YW|Z9uz>
z$-r#m7VtXx*oNwqkK9MlX7!C>kcJxat}4mPfcq#}8<>oMd?e6>T~ZU$aqEuGr@QPs
zy?bdYLQ1OiC>9Q!it`$@PU$GfOpm>;L|Vjl;4n4Na$MZUQ*w;gxaomorTA!!zgL$+
z`s@ecDQpH5;FT+WT`OaRg1`rPsl@fT{4K}Vql|cUabTs;maxUGR7&PM3|=E8lHY$w
zMOKbv{={RYA*u^Y{588SWZ$5%jW0=1t$7N#)Qn8MV?sm0WZeXu6&i}wmb>j)YOHDs
z=Taj*y|W4Ux)hj=B@tC|Hc2&QS=`WlEP9hZwT=7L@4L<_n4(BI!hEk90{WJJ<FP7l
zUS1TWoYVZG`pQW*%4K&bs!rCj-$M_8*r&+0C&;>U!Z5F2Y$<?L|HvO+w~INHTulj%
z{XE;fmltr{=uS$E5c*GW%4T>Ie8_F&d47-3^GvWK;3DF3X3;iz3eYD=0N`L##K;(m
zw=4wb1r^EY<v0?bCQG;4TC}>~++75ExuEinf}O%*2yWzTd^ziplUd%@!7eVtn&a&f
zgca6;Wrq|1$&L>VA$pTWe~dKPc}6iESwrfP42Dj=Qo?B(FP2`YZp(kA<jwQx!%w4L
zlnncb^fW=wnRRMI-fe_l@KR@U2vkcqsv$j@k9>3*K<E%VQY0{&JfRFQ#E9Zopt|0m
zcGA$n@lr)8K2_Kdc(u&npw;RrtxO-RtmD=(j_Z7;W{9q_gchou)`nYVl}qYLl~u^J
zdSs>}f2NjTMT#<`w=QIf6Xm9#`fB@;kDTV|z9jx6)ih>$Ee_^|vbKElad!4!mfqHq
zc_=)eS6(c51wJ!L_!dD<El8TXig^Gqyv%enbLEyiqlEY}q>wAyWg`ijM^z#(E8hvt
zn`P6CAORp9@xWr8KQEJqtF^8&Soc%`SY6VaDv{oO!xaHK(5{Y&qG_NNTJAe*xV|c+
zW2Rm<GdMX1xh^B!p|(HdCsOYzR;Ha71Uwk9*L+Um4asbg-{}y#%KcdCo8A17Rhb(J
zcfGVaqfmQMyllvWAOrWSu8nk6g4G)T-ReC4i7J%0%XVIc+Ge?l4`@Qq3)ELOHQqlW
z15Mr$&ajISgOzB|{eB4F^h{XT@xr<}{3G3K=AdIaW27+Ys#)Kpl20m__9Js=90HMS
zv6EISA3bEMx^6VJojTPl`wqJ1TUJx?R&M^wy=MUAR=bR<UsC?>$J(cA4PyRCWa}|G
zMC<-xy&ftb8%q%Ji}JKN1ai~)AJ&8S!3RT<@do)A-nMloj{?JPeLmgme*;SvzO*a$
z)eHBSmNjtvm|I1K;JI(Qk`z$+FS17M8IV<LvBK1qsZL4BxSYjHO+$t4Cih~}4o%`-
z4HFj$8!nD}Uselgt+%+=7>e~7`dgQRDMQhcd|i21X&=!3%H(Z>VDZdX<LFEJON^;+
zj3IPCu=%}6jpC&+UhPY3!OzBg13gZfM#L6O$1=+{eW!TCwQsJAPw-vG?qY*fFW%L<
z)^aCAuO+N+NlNa11<&Nuz}c$e@N3R~`+YU+ru?~SBJBZHO4EOiWSOrd+^L_g{za6y
zInej9rlgk<QgxkFxjd`m*Y4yI0;Q|K;|_>D7jB-os<HcLE?qowt6|$k-|nEYYM+y9
zam(n!*33-^1vxRlS5)yT@lAop4c>gIOOJYQ5d8<HCSF0g+>5t~-oC7^<vME+vYXu^
z|N58vrMV}6KmYbj{8KBgJeOjBfv0`au8gfVhHL{ZaDoJ*I6`(4MsARN_Xp`Z{~?dW
zT*%OxDi^+`ti7e*vMKqyFAG+J0oU_i<f-j{9dbzeJhhtSv>vpUf%W27-tY{p!?q(~
zjwTVy%qeuC^=*9DEPtj%De&KVu~EcA|9*v{#0&=8!v9_B<)w(&ww8r|N-fqeM|;5j
z?OVc!8vZc=yWyvU?SyoQgiS<4TL)Ye2VT#|f9W(*5?vKHEY$h4$PFt(UzTrG2G1QR
z9-aOPA>k_0rMs?$`7~|eo#xzN_fiZ)1=@Z$Wv5uoj#T=7LK-ULPz$Zg{pvco<@blN
zM)`AYt=xeWE|yZObfz_mvPVJP;MnN$hF;FJvti25$){GnFXoKp0h_-78@-!Ak$m})
z7gL!5gFzfk=APi@=gZmk2Sbr?EP)P?G=FufR$|tWBd_>e4d3|V^01OPjLQ|$zM@eI
zB(tg~UNq4O+MD_I<mAURj18+PqlCenAHp{%uWCX-H~SvifGNm|N}#0gq}uzW?6c*!
zFrx33L7NAYzwm}j48_sn%|-B=iI%-nr6f=mIk%<_tjb8)-#eN0VgGMw7L+kUy}LC;
zxD}vQb6OyaGI3V0dk{#`Q$u^D(*mf_Ed(zP75ox|md@3C!ae<!cTt&<^kNpz<?LFs
z)Vmq+CTA<9LS_jhRR$4B@<!6DqoM=z>#zc!n`NCd?u<1%m>jIE=4@7U>jL}IB@ch+
zwtcr|P(G`~$)f?Zxd#>AO4giYVji^4!@_qz<d~7h>*h?Fe%vLYtVZ|gpZ=zbE2_$@
z(2;q3sx~KeVikvgy$ZIsnmv+gYlshkEoD6YVBP~QAIp{7gUU=jWl&Vcj2I~uDiJ=>
z;4cX2PP-lT{%FduZm>zhZfuP?uP~K1NA|qhfVT2Ye`bEsTr`>R0iuR0FJPP5#~68X
zs<lT!!#`2>kxXc-Qf94NC*6|V9vF*Sow4*B`NC~Fx36}3g^5i1(+uJpOJ~XirWEL@
z5<-za>X07r<WDY6r<RE-z2VvV>{*44H_aH5xpYmpdG02?voga=@5YAc^!LxZ1`bKw
zDNahrjfN=AKJ16UNBlB|@@s9|Co!Tk^OaT+??csE(FW)ZmS!jJB0uq3Jx42xDLL1i
z{+X_g6>h{CyLyro#=Y57z3oS<!EofazI1=2j?Dj<+QGWA&vZ9t|1*XL)IBDWfqiB}
z(?Ug{$GfkrnHM)y?VcRs?XPYn4@iQcDH~0;iJjSuf{VWTQ9$Z?BUk&$f{8T=Co7Us
z393KpmLT0eX*u8Y_STC@V7RpEw;G<JfRxam)n*NFEHHM}ApjG76EV#0ZxyDzX(vI^
zjBQ!pLba$CB&nAstlE(OMS9FrtD8s6GY%F*#n*A=qO4k%N4JJ2%RCXS5?57?=^O3f
zd8Y?`9(DbugZV^`ngqf)pUE9CXWQcB=`M#dh|Kf;%^E`s!=-+Wzw_}+YeD9qdQ$aB
zL&~tvMV(qFoaQof`@I&Z^=g){JXH#%H&b$aTB~q>4KA>X0;hwO)&F}9q~VJ$KC&1E
zIixb44)lHs`L|H&hUTeNFkMdZla-`FY-+AD>+N=0yVZ-IFi`8rHwAIBFs&0id!-MM
z#7Z{LP{lJtxLx94<==$#d^6bek8JM(H{Wb8oHiB^-Z>42zOB!a)~w0rEm^ya__49v
zDNB^GicYcZec{#MaN>!=&J#PW4IRAwM#BbwXyzjT@pqc^jl)t8Z6y{qIKL9S3zD6h
z{83BO_R1UOnYR_pJuJOukNOo+ePxcm;uGddoTBZS2=W#lo-0uR-CLHO<m!st*m_k~
zrbhw+>{P}03JbW6o}9QHj%bB`v;0LBkooN!P~2Z==7+4dN><WX3>DI@Icw%BmgY&s
zZ^cD9siTz$#Hzgvrl(FvyZHFCpQU#v!lhDlQq$}_Px#d6!*Scuf)9IH`sZri9~G&;
zmoy6Z(wtGmQzP7^k8D(A|DC6p=7>&|6fF+B74bVL&lK}012cLvadh$3q~=1`(DX>8
zXX)N0cd?@n4LQ<QSHppFS+tMTy^RzibRacCX78KM_eF{!_v8-Z<@aD%ILx0UVSRGl
z9bs~!z1S*RNDpx(tBnvhm#8|MQXLK=x8_oYBAqX13QUSp;${`w5kOEaC_gzwyw3Ml
zy9dtHBeO7r78+jkc+L9#ltXz52)g0&bw!s4*M`%4lvBFs)~)HCpalsMxZU()vcGwq
zWanK$r~sV&0ErrBr66sB+ro!2`K3sQKI8i9wuMprxmxmGG9;IdDB<h%`6x5AJoges
z+AN1b!V<clZv=^C;8}jaM+f>)THmRZBXY@Tdt#fT19&Sq&OurIf@<H5<*gJ%3ojnt
za;cf;<m6{qlPgN24E|aRIE*L@Q@3_h-5@m3&jNf;dkM9Wpa}C$aE)QP$E+Nw-kgar
z<?pdp4v}z<<FTU8sbgxa@Q~}PaXFj(II=t;F~M2PMqd_Zq-sc2;v`+I#Q_BjlRc!a
zPw(#!>Zv9w1;bgb*DE8VAQ?L#`@*E;F^Bv{Hg$g=gWx>O<6a}wffAu+NSty<=2yu2
zqEi)EMX6LX$s<5L>5%8h1A+HP3AI3~4U_7wf}#p20{?hu%JEc-nWgqQM~e-ClPas}
zH803s>#((v2QDZfq656^OYdyJ!&gP^6D}S>c!(_KFjxAm_tv2mlX~|rCP!Zqu)GjD
zr`C{eW;}2a+4D7ua|35RV0h_Qs7CAPjel=e<?sI3nXg%YZ-;-6`r5%SV?Zkkx=)~e
zSEEOC%ls-8t?RePMC+?NWV>q!766M<GXs6LaKwJKY|xFS{|NDL6i5GmW@T1l$$f{i
zh3>}9DIP^xTb|>}$z#g@p>uFP7nE`ZD^mmft0PriGb3|I`$IAwR@Y^HqK^6wJ<)w{
zHiuWYaB^7L(9Po2>?74u!O}*1aU)sP0%KI)lk;-RZt_DLtwo4=??11Si>Ygj4(uUw
zfr{_&<s;R0!?*twO$>85OcnAO1wQUdAWiRs72rtWMy{x8Ii!Knnp9wUsfQk&CgJzY
zmME{06lcQ%LRqfoO_i!1ewq--1Cq|oaSJ0il;7;ER&9p+o2pWhO4SA+xm7@Hfn^A^
zRQvyzQEI^3Kd52!PL}py`UH1&wYPJ9xWaT2w*c=PqsVH=3e)o%>{sKSi<=JeEp{mQ
zMpdZm-3Z$3c%rGlx}r%EzQxdYpC0?=|Bvao_G>~$SVTusf)GbKB%<iFcH&lXicRCc
z6&hnkoh{fuRfUDe9BR8<-)4N2X%lNI(``g8T`70Tl@|qTr!mA#MQ2~*<tRw+ZXHjP
z`uvr^Y*RJMNd?`9^tKHY{7J)ITQ&9xsv@e^%+|g}(&}!I6i};e2M#V#D<86Bt~e}?
zQ`h<#TBG)*-AD&au#;Jz$QDYNy|+aA&+~{YT9EA{a9*$Z_{X25c&o4hfX*7JW_iRe
zqoH#iIeg;L%S*Y^!Sa0(&c%apSzc^QIFy(Wz14n{aV!e5qkzk9AP)v>>X2bOZSm?i
zm;Sx;ro+`9dCMY0`b#FZC_HvoSK<b4%f9RX#U2?t$M)xT*fTu`b_4jV^pu%e2fQ|~
z!2Y!AP^Ky*pyjq;=QRywV3ytGjRc;)nSeKD8BLwd*G_8~rROm@Lfgq)o$%F%<jui<
zUllTj*Y0s&pD0O|m7n|Iw;b<?HGl#9JmlUQfPo!FW!f(e8;qt?W>5lhYo?F`f14VU
zZ-w=bg`_CGEf=+(&zF7?Gf~dl^HGdUGHW^*va>LJ2TBu<w(6jrHDx)~Ss;R(yB^ch
z{m9bGWOl-)NZ*?{=iKaDFQ$wJ!Gv+=KCdRuOo`JSkh$032gwhca`j)$gr#m6X)hL{
zF7-JWciHvUPj@Irw=M_g4_fP&@|a{&<%xfH6Dty6Q_ao6Vj>yuQYyt~p1GIGaefw6
z0ZLv0SB2t(IP4ZU@q7v6#euUOQOYw{EQqSm_6#*E-l>gt7$EJO#KP}q`b4OZP(FTr
zt-RkZIB!v+6kQoEzED=eFRk910QAW(Ajsb_nl)|rK~g&V)Pt@+>(B$c+<%@DbbJOE
zrotJz{Pgq*#FDN$U7n@RN4(+tK5Cf)UK*>xLEm|>>nHo5rv?d&Fl|0qnWYKgypE!|
z6XKv-7Dl>a!<)8q!Ox^r{6*`@LWNp>xo9msLlbIVYdxy}fvir~w*tt(a4HMt^;O4^
zjO_v?lcP(_kJi5D=SA0mTK8-t_=hg@Qu6*yN~;TterUNog83fXy-S{~H(W9=JgB{f
zLX}O1%@3(E3cHDe*>4}F(lnP(oPMeV-$s^NjFdorV_QZad<BBqMjYDhbt5Da%gzs-
z%w9)007s+&CDjGhz0f{Q*&U^~Vn*_1Tf@rrM@oQDcaYgbNRJ;~px&SoxubfI=_VQA
z<XN96gRKHFRf_`3mQ!n_6>L?56*hzw<1Y;MNs%-3Kx02GnjT=n_V;&jpF2x*_0*pB
zJsH)u5Hzj0EN$;K5K^un_PqE7$D!1=b-!tTpP|D>;q>7Nq`xpI$xyMkr<AYnwb|%{
zF2D95^JQ~S3+kVsoPxDdXWYKaU2+vvwomT0r<Q;8cZ<Hd(F0N2^w}nL%Eppbac!{~
zzs}nbE1Rf1Gn2u3Lu#i|EYXs%HC=-t-hegKaROH1V%<6JGYEe04iMR#SOObZ7Wh)w
zb2=pd=w+Sy@H3$$OGazOY!*IzcMceu+P5gDYB-ZlDrp%AacdVlveWTkIhg6MhL4gh
z3)V!<f+qF?LPgf5fcZ-)AI^PkH<CfB6{ggrb4x|?OH!sV#Q&t(B=*|&m-=K0suQ}s
za-#-stY>QP@i)7OZ#i7|`v7-R7}A%I%36L~=a<=Ts&TPafOoZN>*X-`>38?@3PfSX
zA(@#{`0Ov%M)duGfbw8(+l3grfv%fxg4ZKw&4M}=Or`?fjr!W-|HmX(5W;!Zd2|Zg
zlLg9WaI$g}9(HYlK9?yZYDBQY-aPJV(LQU`y@Ibs`Uy3xV|{IE*^fLu40S9Zjmr!i
zYKOZy#|Y57uN?Q{SI%+UIR#;o-rv8&2%w^GN0Y3PhaorM)pCOt&6&;a!Y1}SMHg^=
zGcb#Q#YJ@12i03bonB23nj<{y>viRj=APD{wyU$Tl16$mxjN}@OD&NbE(2`jD)~AZ
zn}{k7K1<DMEJDqX^S@i=%)nx4gr96P)kzVMrx<%XsoUM|m^#P8|1r5XqPUmpH})wo
zdH%DcFwPL187*XfoUfjejtZ92d&PdTG#VUvX^d<f@!U`xCe{8o>L)rlgoE1S{+`JD
z>^YJz61Ri^Q2Q)vX9b-CG1Ez2vNt|W0}G67Is#dm?#(NsZ$09)Zw^3&x1w6*E@Kf_
zPu?_pGly=?0eLFkvr?qP`3#`^A9*l0Ut~Q4T@*Ml7#Rbvy?QwPn$fb;S!~Lkmm@Y-
z`q<#iJJqh)ZF?kc;eJW?RjaVRpP_n9(jgrmBvOJ<vA4+^XSVJ~^4T^LNtuQ^dt!V$
zcA<D*1@-yS#o*c}p<rWg{Ld=OiPIs;?{(L+ChJ>-)#-Cm-<`T|Zl8+ksifwW{ujjP
za6oRT)Ph~Nql|+L7j9%I^1s!;8v<>Pxp>cY-6=#VZTJcT^O_LZPw8J|S)yGwmFAL4
zT(Z-9DSV{1MB?$C0{KtQoY{7pnQ!S{e>l-YmCR)$R3RA7^#|Q8o|o~}{=V7uG@30U
zyeKEgP~NbM{MVdmi!M=uS7im(N2>>f+o{rGqcu3y=|i4H+%O(z4vcB8Xc7W#mH&k(
z>2dzD7Cn1--Mz&@z}_Y>{^)1r$D|?y2zRp7*3uQ#?{<Z_&letz-QM)anerrRDlD8Y
zVLWvButRT|+9>?z`={kC>?+CmqLH~Z$*udKihh1|dHDqXOl)oV+hV@1oq;*k+E-t~
zuf0Va$9o-7G;3`Jr0gO}-bsUxzOXOlPppf;rwyWFv_}ulLv|Ky)#T}EQXP}yX7)C_
zBKXW~<n6V=n)EiYMki}OHh^7cW~9*JoH)^Ix-V27nj8B+rdd;_ohnW$2_xFyMs9u2
z`NSsfhhU-HpO9|Z7UdbQHydUdq{D3n2&sgwHZqd9Y$Y^QV}iXyCws=`m(Cx%XNrNK
zJUS2Iq?w~mx%Cj1*SoA+jKPt?4y<1NXnS~eNM5fbSj;(e9}tf{u<X|6H%2FIa;pt#
zN?)Ex1!0O~_{FK!P4He)jU3$%UQpl?AL>WznWF=_z><j$(uhm@ixK6b?|`i-;MnDr
z%MD%@<(M60yw%?!n5(RsnD|2mdY6wT`{!<*poocxIirFWJ`;theyrnHzij@E=m+R2
zawdqsY-{Wr=#&n6b$xPszrTmw?u|ea0-?TzgPRA>|2ji>0d`}02lC4Rg^%<j&!#kg
z(e?l|BUJ9}Wb-fc9kWYkwmaJ$H-6?$mP0c+&5azww12*!*9>PF(8p{(@D~YxD|7_+
zn4H#>6+H1hFrZoBD%Pzyu(gPNslj61pc;sw)klP{_v|pva@LUNux-ElAO90$Cq?kR
zn&3K_Wz^c6z1~&rtl2x#;_F>|75R;o{Ng795WX6Zt!Ju%sp)ksascYAWc!hBz~j}A
ze@G6{P>Qs+7|2}noXcq_csE=dK4w;MUXT2{ozbkKN1G86wJUR+J^cd{b41Lm{`Nv@
z)Bg@E(X0)b30Eq1K@~%VuIEMURwy?3k#=di=C6-1!AMB8!N}WdzGElRvKNi#LevV(
z<~OyLuy9N7a(Bt1O>0HH(N-*<o-#EwY8E{2RnmzN(;H}?iyDJSKkyo=G|^sK6?<*7
z_<}3{enF`WNon!#SK}8M_F?yLjmLD|GWz8htr&SYaiHxK)736kcIWkp|F*D8dxYDs
z9_oqd;Hk^;3hmTcG3igoPB@QYjqgbl`S|{mkahK+HT4WH?#8>Bi_!g^iG#&+7uHMU
zH&Wjvv<Kd}P4uks`{+s^wx1L@@u07xB&`~<>r_35r(7PhR~-6?aJanF^I`iV#ryJ;
zaowVa&nHkT(@c493QgexZ!Gij(M_ft7B=P`Htdo*+G)RUtsfZ{rL1Op;YxcpHz%Vi
zf(+61JXzSeag{3jM=V}M_$$1$wbo!d<pHw}>Sqe6K%#w8T?a1c;YVPOw`up@v6@6=
zw@Ai2A<!4!jpf#Sd+{i0*$F9dx^pOxNBoxP0+?{yF%3@f|Lllz!p3dXCfe}@)ai97
zT<|?qf6RuR<a50BDGgkTO|gIrYP*&EH7J`@*$p+!{uO4F`cj0m5Ovt<H2XvaTOU^>
zy_!<eh<9cre3cy);+$MifE~iI>l}A-<B8wV{8&Jo7-5eml|DO_xq;;(|B_y<pOruN
zX&gcQ)?=r20U`@ojdqTKe=+w?M=oC@IuK)i;9=(G-zb$5=>gH|4i`I8`Zu#1c<L@d
zI3>+1<dx12on}<zizT@~>`Uv}A4lXL9Ya}0KUzhBlBI+G+B(H@MN!TEm?!TD6x#Ot
z5;_+ua~fXxmOVJR(;py&6_{i2;83aZtfPR-LJ6?QqaimHcJj_!h`gJJoz;nFDX?oa
z&7g6Xe5aYj*umf1<r$A)RXf{A%pNE0Psh`9?7gk`J+I#A`nO5f?*4N$*eYh;-@@nt
zAI+#3KET=a0?pf#Ug}$BHE=E*v<kKP9Gwn<0>Fy1y-1TuqrhBeC3CNCEpVntiUq+4
zCr{@vcHguU7h{dr(q{KN4OkptD(YXCmWvlXIBP1Q5p`kZdsf$5oPp}Mb`d*7SV4Xm
zzl;k0pX!4sms2%I7vm9-;WOK0@3sa~M+O_3kTm8bde5f3WVSb98@<$Vz5vG)aPHEv
zMbP#}lI_w!>Mbs2c0(KSrSA5gq?5!c^B${X^UYmp-5>mm<e{-Qc}1yze++H8b&uq=
z%BPhb%#!<xgOJ50&8kKj6Li0ecWL){M0c*VJIG%To%$b>*0KOGnjfLUU8Y;IGcIe%
zNEx;w3rbWmTXN#nXhkWpQYfeSm|q7xun28vnkJlVuG6O-{vhhJ$sbnrk0DR262CKO
z7dILwKbsVf>6<m~FP=b*3M_G~KFoL3D&d@97KK=kDSl#YaB9t9(I&w%;qN^<i#8Ng
zE>J`T>fNUL+BXF(?c0Reip~uPm7n@2836!x`nEnywa=30fX5k4%FuxlC(vKsAb&fF
zmTxu}rk?-3l8|I0*nnHcd5<Uts5a?6<e(o=z?ylDRjf|^vbs(S|FY-*pjkM_%DRZ{
zQxYYk;kAxg3TL?qbXj|4TCE;n#xJ-D9bBjkyWw<WAl0`fsVILr%;*XhV`{!ee=((6
z(JE*QoGEv?KQX6Fle_shHtT+YyQHtbAz5bn8%Mz|PvBM(xtvziihcUbzCwB!S|Rg(
zaArYbZw+!=(KqsB5mQb~s`C-fag(+RT&OO$cireX<vPcd-(Y?6L$+o8*Z-K-WV}kw
zt<Ci<0&@AiXA;Vb8vh>yDsWQL)50oUE%a<s!B7HQl*^8*d#Sxqb>g~<S27B@<00_&
zA$zR)O`2AozXKdc)06>{{|QgMmlmm@1T-0{SftUVrt_R9$pSrLAIurQ{#h$ig@kg6
ze*IGE2X_Rf3}Y5AH3)sIx7X;g*}idx-AYsRWp-K=Bjd*$6#OKbfniGW4xG9zNCqp|
zMj_REp+?B$^|qdt(p&@oP%*%3)O(}$<#F6|CkNKPzk@))&3mu6Fv95`pHL^ye*h#g
zN?}UZu_j_tNuyRL^QU8GSj{KI2#k|-(VK+@a+0+tId$|qQJsDPZwmyC>g7ZW99NsQ
zHqlD5reT6qft06L>Aw@5_0-;L^C?6?-7&?xoh`{QHbQM)M)pHgCPLD$G|qbarG*dl
zO8WVqI8vbZ2Uij_-6YG=VCrA1$5Jk?E8V2!-E|20z{Rz*85tKtrK|J`<PpbgSd<yy
z0!}0LC3z3dQst>z3xfm9X0{P$uEhoCl_LCs+IGjwn6az?QpeZbPiuhZ`qUw(yY#dz
zeH-gs-m8uo+-y^_IAz4I0WdxK0#pD^UER9IailsupalM@lb%sgh);S2_leOU48JYh
z{dilU@X_RQ%gIk%gMkL1pj`Yhdl3J$T$&;|0Aq<=lKGPzL748Z*9UKR@E6uD(Yk+S
zT#_qVRTB=O$bPQ^nw}0RF?kr08PY^o;o2|MuP(-sq?oJmBTNv`r_IR)ue+c8-20m|
zk^0@O2UZ3YUu)5BzU8{Hby~}lk%}m5zWBIQB9$xpkM)ooV9DJ($(}V5Vd7nyFoGXd
zo?1WDcI%V;7&6!S=oc9~X~SK;C2_e*UL$g8^k(ax7e;xj?O@2f(6GwV-B6_e{W-b{
z_y-r*iJL3@>Qj!89+@h&5#=WhzE`G)Sz5Su&}IzQa@##rEkG|peABPqHN#5a#nVKh
zK}M+XO{^YW(g*%nZwb1dH7=)2{;bqCGd6p&utZaM+YlyYP}49|J1p<a?$0nEt*d_z
z`-}~M1XVF#nq+H?F?M#s4LK+%HSyRahbe=?E}|{1^BtnX4tk_*6*PIrYq%ol%mqgD
zccg(*zsM}2@$=-yH7cQ%Q3}On%VyA06P2w>X?<I4Whg;M<jQorehlS;JX?(~iZlEs
zfv+^<JevL<0ORKG;_LlKa>)@0jaxJsqOvHzwf0@0wg8LJ`BR^s&n!>9I{cB)SB8GH
z$hh@rJbRoqONx&zm%a^K{|RDF&$BNdv-*dW&t(H^CK6K}Wpc6mZc`<jf(*fOinMM}
zUN7^&M5+7f^rxQ8SLmQ0MvpBsoQv|jTe9-YWbW82c7(DOdv=_MSvyMlDgwESAf#7M
zvLket?zDWC+YnNn9duHTQBwfE;`W|2@8$iwm4xy{A?#)H$VJn?za;k&{*R*b3}n0O
z!*Hiomr`445qlLuYgC67v$mKmqGFcXv#ru1Rw71FMa<YEYDKkzShZpWu`7s8?dQ$=
z$=59B|2ya0_jREL+Ecpo9$;OpEhykD8S#s=^*AR|AafeJz0hNf1g!cL2Qbk_g6gDr
z<o6UNg^?XpTtFYK%v=R|gkZuD@yi}tP&ubUvn)hgzv4@11w8l8_N9M*3j!%R>`oK6
z4_J;Nq#9Q^(_5Lv4I~&{Hf~gUr`Xus|HT3*i}NK%&!>Y;3pt$&^G%{+Y`!%eQ1Q5@
zF}zH1BU3!8y4_v%f}Q@vd++<FoXTi)hua;G&<bbLIyqBu+?*}iW2_5VIlhAsf$|xi
zz@8YaVaZRx_|D|E)lw`rPF+Cr1wwcJIM_^nH0>1j!n>&q+aqEGMDiAks!O$EA6v;w
zfA*Vm=m{*Tfy(4bI?``~QElbitUhyqmL}8rBibCRMkjia8US<$2f0_Zl#&~ebJu@z
zWJu?Q?iyIgS#V#~5a~IP`9&DiPzS6oIOhF<CF=o7wOs7i6(XF93%6cfT#YlASgua#
z3{(d=uWB29R^73befPQWYudhwoxy<lDnUJDcH38#ab<6k^4q$(>Ds?P)=yJv<iq@F
z8G(6EeYOV$ai=Ze4*1c-@>Z~~-ms*-TUa^;#(y6WX0?iIT@9ldY1Tlk@vT*Ot6ShD
z4T8mh?d?Q6SS9Ptbi%_{j#n$=(W1LiZI!zxPQmwtA&#s>C-Bwz)zTy>wshS%|G#u6
z420@YwH~X{3)-8pZn{5gVxD_?jJrOgO@QKYgMWrE%8_w;w8VgD)10nzhD^o9H?)qL
z<J)T=XKqh^J9HBhY?iRODY!-!Ux#_dx>jCoj?QqCsLo?><Kl^+obF1^!fPa)N`9v3
zIk^qp-dQgq(TX!!kMg*8p1Jl?k}6Py#ahmW-+)`GSc~V+n${QGk}|o(V;7`8-Mk#~
zhxck(Z^d^fx2{q-i2BjZ-)bBR8H{am)e2ZPM^`Vs=E%qjl1!zUTRJc0TMbLoYaGLU
z{irf@YIx9}elv!bdQt&)9w<&7QtL4LNjATBDM&+Ii^y9&9oID1CgcpcKfJE!O*H3i
z2-w)(>#KcJG%bOWNh%WD#lKZ)%z?@>#g}QNnELg(J3@P=uF!uFEb%qxawMOBEWoL;
z8{gZ>&d3n$l|-8F^7j0wgk_X1u}wzqe$uzlh0YD;3)k2b`M1>f8G~~TI`pfPyFQWp
zX#^Jm!j6-J5c?GJKHM-#sQ5@dm|1Bd+VTw>pmY+VlnN;+gTY;|X+y!ak)W>=nD=a@
z5u=KnE@?CHdG7N=9eUwGjKxaXt5gMy54LRfY^zu0R~~h-_G=tiq!z!-PJRxhz-Z5H
z@5^2V+UxVkZrFCgro3WfurHtRhF$m;L2&DU-lwxDD1B2R_RgU(s2pGHc5O-H5#Lm+
zTvj!xv<t#xSM}&9YB>I+(;@5VY{yssH`fc9id46<oL~-Rx9-tbzg|AfG|PWEE_Dao
zR#=e8Wc-deX&zoK0a!P&M<y6r+#7bYtlukj@Yl+%o*YnmsY5qiqc82PAkZyRxX9k}
zgy~3~NmaJ*jo=y`3y28I{hnFA7+3%A1Y&I`=wi1Y)?W2Vgg2Lr1=lvp>_+(js?t1k
zg5Dua(tvCDdu|@ARe^c$feS8cLi*YU$k>9m+=Ck2iPQW04;P0_IH5r=n<jPx2b(`z
zTCq#B^>8Igaqmj5U0D1vJLr(_&cHUg9koI%Qya=UHixnv*imC)ulKY;CvXmENh0x;
zdHrsi_oRTi8=cMLMvobHYMs-O5MTn+_b5haX=bD8NhavT_A(5Oyxw8RjuGYb`wQGo
zaRL%$HAM9lKZqZviL;s68TsA$p2(Fkst6twNG1%v%+V9B`+H5i^oF*Dy>8;iezHrS
zI!)@1Kj=^zEkaC`ANlUc_#))hlY^Va!dqcXi<D1hj5Of&Wl5#^p%SOcQSaWQk(%45
zD%-0`avL2zv0CbNT;x>2A1L_2BwMjyzPZrbn5XGQeHbkzbc|wB5X@&&&2E?n)R|bP
zb5-nmp%GhMDv6aikzsHhxz9S?e2u-(Owa4*Pgusx$>=z!4z69v8OhH?4tqM$Xlh?H
zxF<?f?YX*h-&h@MbN$e!x}<x6#&ZHB#qv8*s91}@o1ut8Vm_zJ+D$_iPe@1$BZqcG
zRoqV45H886t}tjBj{Bi-{$e)~zTwk%xLh6QC*lnxgtYLQ9kfZdd2}oSGQJ+{B1(Qr
zYy$=uA%cgp+WU++mqZ;$?%yA&`@g1>eG%H8!d*0i`J9eG<Y)S#`0s2V@oy#t<fm^r
zc9;m3n^199-OLMErucLh2X^?(=`tV5+K(V+z+alx==nR)$Rn*n7+eBp&u73v7r?T5
zVwO(4?}VFZ@2~;Y9iwn91NdFsBF`Yiy!#P$nmt;Cxz04~zUqG$JkQ|)4ZL)QrnSzR
z5p<ivqUWI1=R<I%!|7gq<(dgBNo%)%uN?+&7z%#-R6=oGq*f3v&~D+mkeSsf-kloX
z<r0>khtpwyvDJcqM)_8F7*O#<4)0K9w1@yM@y2-a)BZP?#|f4NZ*P-?KQA2pG>dAf
zu3fOtzMubrZ&iRbR*jm5u9VK=b0|W*wtEj;1r<ohwIn}+Jk4n?FxB}J!r8cE02)N4
zZc{#ce0l9PkFBSfYf4_;{=T=cS`!{3Q)^df060dw@YfnVz?zL3CJ(g=$Ry9JeYL9d
zYpZLx!&M{s`STY;N@2V#v_;}ZjHiGG@A)KmoGqD5H{8TQI#K_Oa}c@U7g{4lEFz|Z
zn*Ht)ncM&wiXZz#RDeDq=wV13?73sdLZ#<<sF!Ps;-lr>(;s1G#pLWI@|8{C4GGcE
ziz0m8<@%ef_QWNQXpJNXffANoIOV`+zAqn998YXuyRzmylqw=;UpMB6XFWU{zW=!7
zN(RkJFb~+6enx$Z1x^bxx)XK)_C01rCm+oNerSt02pPDg7%&(yQTm_785fb<7RPLL
z2}R9!BVB4xHAl-Szf){)mprfJH`icsDYdlBt@Y4Y&v9D-uKQHGGxBekCHl1qPK$qn
zrqiV><?nt1i84=47)5iKzWPT+>8UV>PH6DElb6VHY0b!v9dY6Br?nf4|Fn$stS*~#
zzjUFJ1>qJfx*mPJ4KMe)C969{CY~!ISbtBQH|R)M*t901CGwYN4u^8w@#x5cwV?ID
z-Zd*xH)v`Oo$_G6&kWvEV6|YxGZ5UNkKy7maoT+4NyG3yQTGuOjCSeo-{W{Z$@P*q
zSd<P!O*e&>A!|!JvbttS)GV$p!BMsAjcyfT{h}!y+3E^Sk2e>$)3s-=&0T>*g<hAA
zrVDgDq+gABPwD_o6r8nQMfn`-Y&%KyM%q}30+Qs}{0%>&nq(%Mm^1OQR?uzu7S!k$
zP*a^iEcAR*d(2t!5$55?L;K}Ffy{bWj{gFf#3vsw=#}uqxTFi3FFw`d$mR5#JGRer
z`canphdB2xY#Q;rrk*}tbe<+S%De$LJ5N(yf}^1zvvLp_m^UcJZ>IJd(_p=f6hfFG
zM?Z~wKF-`a@Je<Yib1s5_xO}WdWML_{uBrl=J&PUY|VtbigN}7B~*A{)_RIfXO#Yh
zfKt{ZB3={t^Z*Y<#AcdkG<~~|F=fo~>t70D3^)p{m>VFdGl1u(vrV~4UCTlZw1!^X
z7G(6cpfuBv<cVkhgxGyUOA|S}`)_70i6oEITcS^XOccUn9jr}rP^La2mR0;!?zB0i
zhIJB{=fL7npk)<c>-kf8<!3uqNFaEjg#j?$-G-=Jx5}u6sw>m8Gx%kY!m%9A0Y4V)
zHGtQGdHnmtO756VyhRfMg|3gF&el*P(HQON1*uyrrzqv5WEzpJeK!{5C_-gU6tHp|
zQc5eBlyir~a>!boAHKnGqMfH>CIz7`xh^&FJ^ZIaLlppu1sWx`%Uj)~a?t0sKB<0D
z#1K$nXko)|vU)6^B?F=RTo*h8s@E7=%FE;6$yLxem(d}hOOg^<ZkB8J(Lk!~d(xC1
z=*2%2MEKNNe79Bx+vX=TZHdKVVYeo8L6f)+U=KI{YUlxeW}=*9F!z=vr~bH{Q*l|!
zFel%jm4rRgZ8E}ArZYcOzci0gbZSaPBq(7ro~L!>SQW$*R$CmdII*;LE^yiNOH6Sy
zI-Fl+;MhW7gK(5kMk|b29fSg0mo^XVj+9kwE6mFzY(UkTIDbQsi9v4=*>Lqtx8b-N
z`~KBr_0f}SPZaFgtY^0bZ6DFGdg9C+Su6AHQ`O222n70yA4?9oR?R&oPU|yA5OJoP
zWJ)@=8@Ab@Dj02R-Q(!G0r9!L_~41%3pOnC^U&&m36AXmd3gp{b=RNan|Y6~y@%bq
zXXOjF`5^V!m^sl7M{j{;7!$u3qqK<Plr|t;`cBHcJ{fVTQQ$qukL^wC+nYf9wUcG5
zTm_Pt%_Z63r;pr`mojGbOKBCb%W9dsaOXJxmbtL&{i=XBBpExpxt{0oeGqr@l`GXY
zKe~$&p8%YvAa7p&rTaNE0lst@2dTJlQ<J?VrOlQ0+Hn)KS~FYJ0l$CxdK{KjqrN77
zhwWwXQ)|hOiz&8;7K5ccGS#K`T*@bu;i7Cs9Z#?9X!)vWF|O)-lQ(#Bw3rVboHSJ%
z5aZYJJ5pNmu1W!#e8^t*Xx<F`_U(~%ISOdOEw4mDLS0*N&~gv}4;XTyl!k&tl<FpW
zyytt-s5~vA&@dBf=zW+>c@#awLNg#q&T^6={3c2#Htg2U;#mWSFL?j@%Gcf5uy2pF
zwGo4~PBfIW;JSt(UC9Z}3OdJ`(gW9zy5gg7LFkWvC`M8qevK6=?<5lugc9cIn<qk9
z&h!54#EEFV8p1CrJqUl2YV8rxq)|ldxCQy8w2bha(uNnjyz~8v!^4t!n50Y~kmegp
z8KRIv%4d`ocex#AX$g~6PJ0(*oK<~h-gr@6gfwg<l*OSn-0hF#<2NNWYQRe#-W-K3
ztlyMG;Cfl=%s1x2s;>HJKel7tBkzS9{U9j$^)~77b(>s?WcnS<5H{|W?+WPz#yz0z
zTf0g1g2f|I;8+uGd0GBk3H9l4UEElbSFoibXceTW-EJGQoD7{L2$zob8^K<?_!T-d
z#ey31Y&Xe^XdjSMkbNdC<PvY%GsVEJF+9T8S;(fBMOq*On=5@<1(Wr^-;3osok;uR
zRK7zqCcq)D_}}X(aQZhhTOnj6wNqa>=ftPG8x#UZp`4!awHCe#yu;DoG5Ql3^Pq;{
z3?XCs4gkrY6xzC=fcNMv_9LVe)%My@7G}1<wMpgD-(o&=@Q-|LUr(_$(m%S?hjs^@
z3h>;n((Abd$JzXMVG(fel97faKCB#Su7xNz*SMPsz?R_Tdvq=e_1wIAvw#ud7}ESY
zIk$`z3t{gxtoNO1fuYj?)7W3H0=R}5r6SJX>5twJsrNH^DBY9?aaHiq>2z9Ed%+kt
zvkqe^<@}9|5Hth2*I3UZ7jtWJ^KMJr@Hp8_xE}^md?TeB{fsHwO$4Ab?RP)#^ngrj
zWVO8+hIGfl*Jz7tNGYP9oevOO?y~JGDqg26U?e#fTH~L=ey&HSE$j01-8^zItMvLl
zzkoZNc`=Cs6KNb6dj`nY#_xte*lUX%8pT(+RkD;UD-_~Aexa-w0Zgo`eg8V)>wPLW
z<)-}B|CJgo)qrrH{pc;c-F+8W50VN2tDS`a$c@+#Ov7Gx6I}+o)`ccwE7$D&TPg9f
zc>X_LGM|oEzTcFaj5=A6g6n5I_f9I&pE`TAc+CCTLE=^|?W@k5bD?~$H@W($YkD(u
z`$Sz#<ReqERNU?zJ*Asxgs+`M4q*$4jpbpR8~>agSv@`F=67@wW|?ijG30O2I&^>b
zui?)b$7s>1Q=!@H2cxI5b7-GqG6zFsW+V3#)!&leq1>m-Ock#7)5>{E(HHqvku#|Y
zSoh)aIv5hTYDO__iw!YNkDVvUuJHfpK4^Zr^s!ZS9Bv6Q0)St8#%fvBeSA2XF}l$c
zb`)*@Npc`xFQfRbdtXKU^*-S#W9AqS8WW84Xz_@f$+Q^;QKv|K*tGSw(vwv-b`s{u
z&S<MA(-9XXGxDsLqzi=DtmjjttJ^`b5*Xw0-DJ3SdWk+I;vyvi0H3ITk{(>yr_}vc
zq~q0|5#in%xZ5_vB9tRu%RO1ES6=GeyU7Hk+`y$fq0QJZZJ9v_5)~XCr<E~3CQ`#>
z*~|u!uuJHhiR4f#TFaEP5s{-IovY9?Ncw-N)W}p+mJ{qs_G_LW(yn!qRJoPMExn_z
zw&Z1WfUG6>ClnR^3F;L>s%3M9-6`x=RW321+&Kaf6jvra4{p8@V2M$mprX8u#y5aX
z0N#;wR%f~eLWI6PtX}6*dmTIpce=Qv^Gi6{_qxlyKaT<ME)b|b2~=DX72EpY82_^3
zfa_q4@Cm-Em_a^vvSp<gobl0vvl)mQjk*h>YV^4I%N-#iBla0b2ff?jL)JAW@}1^J
zuP&C*rX&&&PD2oV{NnX6QJx9;E}%mMYv%=he2lK+n{@7;-v+bh_>TWEDe}GIHx%$*
zwDrAZHxr|w;}F$uOpZgYPM4cWJ0f3t*j`gWZ!W<)F~t0(xo)zCxPPgY{$$_{M4&R=
z620cb(YKK-7{54nwL679`QdKd2l5pV(;gGEo~qrm>O+~P${2b~D(qclRK}n?XfSVb
zO&HT7$GkH9x$ZK=tJMuz?a3@#fL6oN9d+Eu*olUMb0_pHAQ@<r44{*eceH_AoT6xH
zE}F9|)7;~1r%+GUD6VW#Rk0dK<|xH8nO1-A^%-DmiWDQfIe!it-PnKUG}J6J+_l&K
zp<TyjIHcR(OqE~Su`D#J8tj92_OC^_zu)T<YWLU^puxSiW*~1;7ca(ih?_3X?`mWC
zc54dt6cB9V-KYu2bm>8lkzFumQB=OH8bx$2^Y;c3nN17csF4kwW~2jo*pGl(cq7Fx
zUe+a*Pr!=DzNI3qeQ=@-e5skKYp@POkd$c*67{_0qL{j~*%Xr>0jBN3Gp2W2!jbST
z39+`61G{0C(uDit-QmA94q9NtX4F|Y73NS|uH1U$NY)*bj7fn5J5wRskxJDCJLWDr
zA%`6m0(fHD4;X|ECFLgCAI1B2d5+reUJ13~`|9!>so8HfR8;~|W0g*sUahCqy#|)f
zuR%4nzmew0?$f1Fd_W`M&eK`iA&2Q_>5m*Te)oqlUCr^?h9Uyt;-+pRP>q<$wDP=v
zogM_qiEo+C&i5J@vl%1wiLp{CE{xRH1A_4{;nOsjgW>vCxm*VGOn<B)W(LRwF<TLx
zXi=ArYqfocX&ZE)yFTvIz5K5~W@AE3c$^yMdCg2eqiX0*x>f%e%Bl_fPb&z@J?wb<
zNR}hzW}sxM_l3aq^orG==)rvMV3fo9fpqy3dnEf;lX8lmzO+@IUae1&*3#2*n(4N8
zTFLbMu+%MaqbphzHzO0518!3%_~2bTTyMX^NNE{=pCAXnBNj;A1oS{EYu+}YK&Dyt
zO=5Z)ipl!ZnHagn+_KbXO!?*7&4?VB%^()`Ormtuss`;`L^L%$pWgC;@}*6g<S&p}
znz_IeK`xfTqjnz+3n3azabxk;?xx&gm)X0Pum2;AGy!W(po!67Iqk%v(Hq44eJ=Gg
zZ**Kyz<Q`!T-FbgE3KbT!OMM!J~cq!aP9Xv<yt_g>o*$oChsx$7An3RPAw_s2`@VS
z^KxY&tQk)0PA#pT^YMIx(KyGyv!AT(oueN05RUBrKeY<2T0c!U+9e;XZ2KxARtecg
zZuuL33#o%g6~@HulB1w_GJ%0>md0tnvP!;~=NVzojeY0#VrMX=?|2E4xIRh~buhRE
zabKMayZ9v(#<fDoYT8cUNLxqvvuVKZD9`Qac5eq>jhvmXR(R}Vm)Noi;0<t&a1*~B
z`1-}kBAxJV1ZR!N<P^`VS5hJ611p*0aU`)jzqWE}lwaeo9{p-(Be7mMlfgyv+wP}V
zYbnh+PSm$PR<6=`wV7!C;q8Lproi*C!|RrE$Rrfa^89r)^ikW@Q61jzM4PSP+o&<|
zy9%kZMPq+u10;V_;yY_h=X~&Fjn=Vi2th|bLdXSMA_DSk8*4W6S*rhvTL!<IKVcRo
z_;!{yZ`&o_u2_q3gh+0}u3o~#KJ@#2)x_g;$(}~=%>(RQOP+Qy+;=l)%f2K}%xi)w
zf1c+pNV3(1L&1e|A2b$(X-+Ms3A?k`q`tTkT9SC6$(L0@sKz?@$0n;$TtO229a3h>
z(|uRLXQFS~H1$D0Ed2wi4FwXfu@+NQ3{h5;UGr((5EAyO`&|1im(^;sZuT^6nUy<x
zfHo~|&e`n^Z*4}M@`bM$#8CcV9#^z;^W=@NQ*~3L->1bOYJKpEs+k?~q>*HtC9mIE
z4H!`u!HC#;zh8Aa(&L*78^v${Cw1D?J{kT9v-JE!>vGxM_@-NskI*IJ;^aDV>rWtD
z@p0;Qvv)6~8ZCO1j~;Z;w>HdXB7UoPCG>n~Zo=2^cgr7(*7@ldbt{5Y&)<1b!KF==
zS$Jmz=85)*JXXrBH#b#-WNj?dPN_~)GV#xE11Zp`=ijP^7?WG%DCmitKJ7fH(vfHY
zA?{_}!_7Jtn@ac+0^K}VZp6W@Lv~+nUPL|>xjz|Flk@eGwgRYb_kbGFc#!P9#m|^&
zWLW;^+kY1dBo}e-tTVZS6*R3K3JrrwkKYLBmzJFs-{Z*{PSfXa(le7MPl_UZJn}9g
zt}}?#Up>I9H@1G#c{-m0kW{SK3H|~zr2+IDK*NM|_e$#@^|WJLsM*W-#d}%tS&bf~
zo#4Mf#2=@j3i#V0#N9m8vgu`#w%JE))<j{07jzTy%4w_$IADL6!laXo#-f_*(vZzL
zdKyDA+S2>)SUdag_FU_>$bCM%YkzaHU%r&M697Otl%kKm4u+OjSvCs7^PTLa6=Ur)
zl)@&=oDq7BKc(eAyb$uI$yA)u(CZ=U*OTL%lxpd{I9^edj?M$nGfK47AjgZihtm&D
z5Bn%)9mVLMt5kFfWi|8!LYj{<t;>C`X)^6Zw9Y2ULC-KwS@M=a+Rh5l#$Qv|UMGXA
z;tm?0^z0Ol9j+{5ndd97%pYg)h*}HYkU0M}mc7A@a+#N0+Wyo5&PPewTSJJi4w@ce
ze#u6y#*jgNN|w#!kbt^RPc(A&rY%cXs}-Or1trxH4z!%E)s;%9xN)#v{NG_teerj-
zE*#Ry{_-@GU8)u>?v%dQvnzMabz^xURkyex%PnaDkZcU&qmKXBA-qVOJa`0ceJ9>1
zmS1~8B-H`y+9Vg)e=|8kHv>rhqw3a>Pj#}Y3q@H|qII|_eFa$P)1QtgbbI;PwkeIC
z*(8tRkO|>=_s>3Nl<~!;@C{XxcxhKEA)n2EsOoXqmMyz@9G6j{ca^CfzkihOm1{wb
z<cJeTY9mC*y@H>8RkCj*W~uGO?>@XOx6t*8D^3ppRg_f)`QN70{KLyKS!ZRh*zM6)
zPYKntD>U$Q{e0`5%!3t=FCD&5747cr{CDAQs>xAM(9TE=CBNHnUdl!GkJR&S7xyft
zdc^6DdYnmA5i^;rM09&1Cf7$c<Z?BHm!*4yhF%Khzby_XQd6h71_}uStW(EdrvF}J
z%c-qQ+-JC1Jl<^PBbM;yxs<V#oK4Ne>$7E#E~ZsnZJ7#mz54l-Q5B_#&XDH`nMa}e
zZ&KTIYju`29IQ;;BYrhr?G&ars^YMk=l}!P&{tvOB`-UOk!H{PgtT(`5M=X)da{^1
zys7I^m7_$xFX*i1_HLkvATqBX|2ZjE$k0eBt0of>fo4#=n7GP!BDK>KtMfB|bP(yV
z#^{lVQ^jd(OI>Hd4~b%@szpq^L#17tgqV-z;1<*nGM0fYP(F`fVvC1=yvwLlRvn1#
z?G`h;N!bUkwp7RXe?<Hfy`&S;U(a*sRctiq6k{gss8(5)HRXjh9V`|Qp6>io7b<&u
z0eJ(8zo5^Q`~m*21$Wc{1u(?NBbpq-5PNN=Mpw!%bE~MPzck}|f6ymion(rss#Yss
z5=!^$6{}6@d%Ugni!_|F(-N&ZEI6djTr%;>K~B?qz)woJl{ZV}iv72=t~e~Xha{gN
zCpgoZ{X;tl&8StO(0X2JKHIHZ_FFepDq{k#P@^}@t`YpZm?ajEd#5irY3y&o<Os>9
zSTZ-8kI6|msQ-6irC6H|PB%o>+c=qHNojlUf&%?_;YX|w%ItUFl7JOKbZ9+Am5Q3r
z9olUTt#M^UcQni5w?p}4?qn~Tq0V+Qre6to4EDzGRkUJ_Y?y+as+n|zN&ScD$u>#F
zw`;0Hs(U93;iC_(Zf8H(B0m^BbPf@tBp8YIn}+_7|0|oP@*pMCHuE@3F}?^CckuTg
zzJMqj^s@C8SU|1Guem=h-z7Rmv<iAKMSD6@@UP57XeLCLs|QayKy{x?s@tQF=<3|u
zJI7?30j?siU&loI5Xaahz{Wc4*WIU(JhJZC)i{+Q;Td`n{=M_GLS2C<lpMM?TC^mX
zU33EQO?y<EznH^M*1(LQjCys&-hJkPOcFrpIZoTdS)cQ1c~+Y)i4V(ma!ka?C#JU8
zx`K@5!ui~iZ<kfPziDti7l}1CM5>y*!}6UaM%z7ggvaYn>}!&t3m4!CtY-`yV&hBI
zweW;|eQMf)x&HWo$OzrzrIn%Y)3CUsx$}U!jOru#4@j%l@jf93))R5q2k`%aD?tiz
zYR;0*@?PeFrn6FQ&sjB%S`L@`j9p^BA><m%8#d<3V(N0Zl6~r^(fRONO;bY5wypGP
zBKBCsXAi9zFmuE-;|C}$wyNX)htD^Q!3aO(vYo~?*owqS_Bg1O*dCTG29E=x)%|xx
z`ylH>F)NhoH@a67wtGkA*+QA=xz&09;7s=Lq5*1yTB*SmA(Q%BTcYcj9}`^T@4!A#
zflql{MUC2IH|>A``7ZTwrOf4$Z&#I1lT+STelUM3ZN)=-ct#L!K%ew<JJl3?jxQDw
zRpJ?+X3~3uV%YlA>T(|F)r>k)?=(SQ0=JUHiA-&VA2iL{s03<!u&C$tXXdUpnKfva
zc9^pOrzY1zvZ9~aRJVW<&*4T9vLKESqU6RkkI~p$$(2sjwkE&78O&U7$pg@U)YntU
zT$XCpTgDH|6gEbs<Kv71lWM6Y)<>HuW*0-)C56eJ+kJpO)7i$EU-eBmF~PDS7T=^8
zkJV{hup<IkR-#24Cp6!t3TDvO0I;asBzB^uX>o9nx516^!X~&}uEvFRy-Ym+ud8;m
zC5Y1u>r)+Nh?)F-r6~D%i~?03$_;OWViCk(OoXik+D^A14Qo3FmW$Dta@B}?A;)jp
zWG41RJ};v>KFd?ow9y+1<pmL5*TP=!zm<{J+XsekD!v>_EzXbNsTc`T1>Lc2hF5H!
z@Z*W6&k_pf^6Ji^;V=JxYWtFQYHLtVn}6S~20ul59R(|g>Z6b0kdakwqn*FLkvf{o
zEe1wmr42!G^*ic^cwD^SeU7Y)Vopak@gK?$r3ZefeyHRZ`WO}^*_D?^v^#HjU@Snd
z12j=Rn09Prz`5=%y?hb!z&OkB(V6_`&bFTBKf?l)f3M%pwvmeKhh@&B62S~7FPq83
zEEFkzn!mI514-e*@K@sHt;7Xm9HdQVu$B6nrHn+nO2OX^pL|)H`Peo(*DfNqmpm*r
ztW&L}y1rm{7WZ^K3{@6)dF!o5AvGdO9?0-3Ae$PocmDFiDc9s%3Sw`jRQN$;_PJN@
zdB#Ipf_C-3`C96=fkCO4m?>53P6`%e$h5Mr(Y4fJd)e_6_x)V+j^y6+T*#EoS!F@g
zM_AFsnXF`Y5!X$!&<_1&@e=&Kl-L?2|F#cct7W@3v`depQ?=9coi{mZO`G61tgzh<
zRzb|t&$xR3hk*lD$a_9)ADEiw+)+6Pgn%30cVi_oQt5zqHlJkC#f2TITt%JQKY2ap
z!|c|xqdOu0>?{C3arv(Vp8`!CDd;wCdquKFp5MP;>c+29BpJr4Mrzp}<Q%>!_q_#k
zZLue*YN(4rZ$AR&r@JTxO4`|%o=h0}??O09RfTmCp_#`pS@}Mh86l{|y-CgRAlQ^J
z<(UA!sH^k}WM0Bj(lm6q$5YR$5g@@P1xe6b*VBj#Vg3dMAFRR-)rab9ZYvcS(%V+j
z!p1n6o|1K%g~ah$w4e;>xt~gvyNaVV9O{<qh}P%TC;pZFHP8U!^p<0!tVfalZNm)?
zSci^)iyG1~EPP1mSL@{|(I|pi$p&%;bEQG$K2cp}fxY^5=E-tR=dF>7O}Es7Uxr8~
zsRX%C=Ds!ho>VtG{SjJp(yb3rX`kFu)ZULW<V+9>vDD8!&{0^NEaY(g{OLsK@mXKt
z=?i#}g@V3xa+=T32HG^+j&y?>62<xN7N4?ZhN<J?Lh1+j&TpgVxkj<i*|W}e4Ku=s
z2&i=|j9D>AB!eP_s2`RUOu1yVp5`-uQ*zX1`5UyX+8dvaxP#7mL%Hm}g_?Wdw_byn
zB_Cd!RgL}c0!EdShMrZW$%rnF9d6=PnGU~(Nc{>?c%ksL-b_z}QMtrEzl}MS13;ro
z9jE_y0i?RBNyBM{J)mSCr=KSu{`ncsw7SO-P1$=Yq`j}GKe-@Wl!}D}CBlDHVyB<}
z>C*J7RZ;q)fO4I7`Y{g^gzIJ*-J6h(tN&zhlDsCSCVF=CNPh!gE*49t2X@oUbnw;f
zK2VDO`bVQ|Wrvi0fOZj(ffZ17<)cWOD7gFE>KK<n#YQ+v7-)0mj~7f~6fzCslUn9?
z*~T4t>p1vT`$$C4e`uNaf}^)AjIfS^LBJ6#>0DM@TfOu<@7)RXM$2LENM`kWJKmxA
z$Ye)&TtX{Jo}@FoLYZT(4)Q2gYT6_Vu&{+*t3w$9?-XHhq`Lhx@7kTjHZ#XKjt&#5
zHXD9P^3@O^%TZ&FsYAPZ<^W%tC1YA}ncJT+*d1%$c`M(YO$G2_`B14LdBo-ua!psj
z)cAq^pZZ~iD+`}MAo)0+49=i8bsImh$R7y3<^vgb%vIF|nc~u$8ivE{7egyLKpoR5
zkZOBtu;k#G^-3k~#gndq&gXaSgpR-{ol0;IH*YPZiA-W$yET-mEUT%_7i*3iOX&V7
z`gKK~bw*DJ$LLm8_hH!E-B0B?LT?W7>*$_ezG1moW@>7#XH2dA=r54=X1~j^P+Wo`
z%)Tl{eQx)x5==Kg?Z<Hc+%~joAx0|71u6S*nsy=|(p}W(Ml7}1BT5=_ZE&5v&$nb-
zY;c|zNC+kYYij6Pu!i!kF~3uRlEOZ4&%)+-^&V*vf;gvmA&1$F)pC>L;dJEFXBwge
z;|93L3TmZ7qRcA*h%}T>(c^GIIHep?W3BaosGrV3$+GSzZ=e#FKnvB4HCRhXN&RJq
z>n7f!z}0xK;t_`?Uz}&c{v#s;@sh;5J5=06Oudr}!d?-pw-8bwVO*H21Jr=;b9Kn2
zh^7w*OWMre)C}u~G=KL3&7vSaVYaRvB2Bmnc~Xz4Fn(3X{UA?>m2&TQ8=D^S&fsQT
z_`5ovnBl@uX_w;J_pD2a?uO+)R)0LifU~uOjU(xKZ|qVI9;hxvN;W+rMySYExl)?q
zS=E);P13Z?N%g~zkgp*g!}J=8uWJ4J8oa}1=lgB+lfMT+nEoLdHZ$`%f8AFG>eB8|
z-L4roG<hPPj8t4gHs?knFad?Zi+$RX#fZ=(Ik-A&Hc4hxbVy*~XS=qEGJA9DKlYhL
zLq;8Lfq|wU7<2q`^?d~YOAeQNl}a?(NfDsp*6Uv{e3rYP_cqz#jFy;UOI7Zfb>yvX
z3QXK8Gj>YORv&wn<dhSB_Lq4WywVF#_A|@!65u*kIdAwp?6ad7_~_TZhGa396dhDm
zOx{&TQ4|1hb*t#xOzSb2UB*ogFveFK=&_cOg6H~t1`9hmgas87^MX~&nK6sj8NWEV
zLFi?+cAFx+gZd5kfGu73jMw_5eSLsp3W_*PFnvr(sW7-t+fQxY0TI&km2wd8Rg7L{
zdmPekMrtY7$9g$2<Ng&KF?A46*DFDb|6Qo(%o#HGWG^Z>V&evT!F_@C9YphR>yhFN
zTa>Fsje9KzzI;E}F&E|ajMP-#Z{v{}bS9ceIBmxWBFAg1*S9+J(Y5=QLD`hF@Wl?U
z8t*$9Lz}T3afJUayr0;ziw)yFbW=Q&`gqvi`s^@`vU=A!3I8N*$w3{u_7+7Qyqpns
z*e&6hA|m@VztCu_!7!C-I#~bWg*)!V9u<GvN<XgP0AdCXtwNe|#sQ_jj!QIok=Za#
zB{0oh=^VdORO?U6A@-TZAstA{>3pr(iYr26bKSvgqm%pVq;+1Q3kd)>5fhEIkILsR
z!7w9+=;ipFDWn@*rL5}4<NJ*JHY7jCB)}xQ+QVuLb>Y6u=xsBNYsRK-RMSYM!5{y+
zO7aYYULbHjZaulV;`F<o?znU$jL%)1!N8>AcRg$X2ZS;@aEeIQlVy|4{rEXpf-@vN
zRwheM=|{c|?wzTI{5W64I2Y>Xg*`Do_)Q<%WOCuz%z`F2jZ-(|R=sBuGoBnPsWCN=
za4PTBThxm?z>_8(541nVOww+{c2r=F&&W+JY=|CCKO1#^bN`lIc!&C%G<rAnQn)+P
zto`QojLeA|TF$pFG+86z+M9Ra%HO{w6&Hy?v74}OPjALJEf9TqM60kMrzCWpx+d;%
zc!ax|#YR`MUANVlbV(b3P<@%$hAD(oM-W3(y-ur7RIoSN!%OdAlB$Y(uJhkVEh)J=
zM09|B`WhvtKNNZd3j#%~2z5+zn^@9928*7OFsFT|UdvQ{vb6sR3->}Da;AZBV%Jqa
z8)hxXL^gfpgv{4X{W+TK`6V->nRWcLPZ7AA!}LuYcCgM;T0wt&D8OklNMtAjjS1~d
zVTr@WG`I+Gr<>n*xL+cSJ}}8ca}@cBUDZRyVzeAq&v_rXjecWv62&J-MEpA2H7T_x
z5k6oxnM(*(W*sQ8!O7D}`z#2mm_?E++5a+oqkM|6Pty36;926|ns@4g-xEIHu5zsb
ziM0h;)MmQ3u$K;^5q?(s+y2Mzqoii1?0YqLZ+~(^<mH+U4l$<pdC(v<&c!mA7hU6+
zW+NTC`IHW(&DN)n!tP{xA#|^@j!!!wbOaafziyGf<fk@24I5E;qBo)&^sQFqY7Y@A
zCBEz1dY94x2%^K*C&%F?sC(y0;<WPqSa^Hvqhs|Gn!t^x(5iSe1y?$u?75)gN{Y<e
z!PSohWgDxG>_gl$sraS|!s{NAY_%gTJUky2bvN5^@A-QX7Vf+)70p22fBmsTzV`7+
zhx#xBQ^LQ#b9iaeC^t{?<B!LX4wVZPam@~q5hmL80fNBW92S?#&ng=q*3eBSDOfN5
zgFIXIahCe|dIu6YrZiW54R_g()p6qC!FIYkBXVX(Q18S(^|fp7iXPR%=7Z$z0U}fP
zS(TU?ITPQU(iZ@4%jTu&faTeidS)v*i+3Zze10~)!lmgi7^Ls_Z(V7T%s3S^sRhRE
z(54i-u}AX}S5z5>X<LSJgW)Q>E3FKd4>MKuSni%G1?#vywxTP2@Ff+>kxIeEs;M%J
z9GtzI9nVwYDNG>Wxh*5Kw0|uCqRT<gk99%`%(85M_)Z&_ojFRo+s7hj#9?)MbH1Oh
zmU01ZVe2J7ecGwc1E+SOcICg0=_D$rlW}GjJyo{U*l6!YivE%uDROq`S_r24(Runk
zAVQ)^{9f3j*Pb{XK<{=`mz2koo*m)WjlIUAc^`Z~6oV|~ybk_E`TuBP6Gk!_sxQ$N
zSGV@u$)(dgN!xYa6io8hRbt)YnYZ@ZEYVNRqDqLp5z)EZLAg?fSqW8<{FdZc9Z1D5
z3H3M->I@tt^Y>b$_krMrW2d3Qtv8}!Lk>3^Kto)fr{rB61dSUuLN@c8K!RN4n)XVq
zy@%uiYuUmQx%RLb@6(&55_V6nVt~aT<!O87ZB$&L*5e(2jby()bU#~X_Sv%Jac;m*
zrUEhftbc(dj$j*!TV`&!s$qASkvg*j>!{s1tVumcfbWF_YZ!?m!FQm;tg@P=z&j;}
zN3R@{-~e2@5B#Wyi`lD;Pn-65syw{ceYidiJW~Bj_+yfzQDH{Kg)cC}SAG@NFf4jH
zWQ0r5>O$7<CKOn^%hW#ZWlVH9Fj@iso^`z~cu^tOV3k&qT$*D1bNKPy)y|OvTG7is
z#U}jeiy^24!}pJ?lBM_de#e$TKA*Kbpw+|+AFPo(RVm@e3tI-q5$9<M8g|j=7q=4+
z|3n@Q5QiZQb4gSFXYZ1ksnW?L-LpUcT@X+`l0BzICYIQaF|>>Gea~Yu?ay!3JFDkx
zTXqMk^jobLv?#v|2${%~Rf1i?bmf<VmIu)MxCdQeYp^S)|NBD6j4i+?L$zm1)2(if
zaD0p*SPS0x9wMk_dkc^O$}#eB&VUXjWR97^)}}hV=@~D%p~fd$u@H4LO@t1lodPd&
zFOz>apZXze9l<_AXQnj$xOQ0YM(VBH1v6aMbIS3VcdV%{{%w-DV$!gfjTFrR<)tq_
zL};%2Z2u}c^bv=f*#`a2KY44b@GCO@pRY3E8k(uF6lr+wr4fY-^`b|Qa*tJMXJq<e
zQfCr}{<P1Ap<Q)M^|EUG>E)cmFPfOcgcn>b244U&VhvV>@7?l4zyd694-C#}_*2Y(
z7Y=FkQQ8Mo^dRCi{OrQ1*c^^+Ya?~8&|v#c;ja*`t!bVLbIYF(cloZ%ocQ%Wjmdm^
zmIs~>|K`(uQy5uK$wKe>REW2%$f?`SD=o0k=(`gNpFMw`A56|J*V{4cd)HZ;WrV$+
z&qfM=A}mOiRWVYxMw@VBB$TfTuIm{}_1J|3l=oRNNi<rLzWp70;3^wH5L>xUAbW<7
ziMS#9wS(K`ZH5u7P1Q{WzqEA*9e(2!f7Y(1YorC)r>swO{%gaxzPGr_oOM74pL=09
z_ODMKErQ(==qSdtG&`~Suq<(2kO6<%<gc^O7K&h9R9GERb1pb2b=eZNop$WfgiN_Q
z1pU5U`y?egdEm&oZnDPOlfUe``LbjD%%fuWYCV~fsEt8VAH<{VQmzi?yb7WoYoK_)
zwzQ4!Yg1=yP+zf8o6Q{3$E2KOb9lU%KTLO=OUSTI-nnx~#7Gp#4&RmguoF3|9DIfR
z0rpE{vPq3$rxL?lGRc}jWW;T%Vp+N}@JyOIg@Mqfa+!YoQ(Y~2?H2{7+p!R-dakR2
z(pPKzT2pRgX?BxboNZ#VjgZm-i5@iVr!V$gzH4w*Wba?coh*5k;B-!arZ*zEB|F5{
z*x4hk$5_BV_o!qW;qv&|St;6-ez3zK1}tvF)B!3~H!obvlBB`|tBR_M6}8W-`$nU7
zoiW1CkSUpFOnM<#N!Ys_wlgJU*ZW-Gj~sj-k6)t?6pGN#bWVTz#c2cUh4HzYk9O8h
zREVyG?3@Ugzkpp&ecqs<jHvyBkuNkJiB`krlysEK&qS(HRv>J}^w#!KKWWp<-}r7t
zy>HOpi{0vtHtkeudcNJ2>LPLe8qFl~ljS{-;La;;0HjDLSbgg3oohj**do5q=A{+w
zmQUOp8F-^F2LmgEPBH!aWopMu!EW^pJ3N7*r@|#z-iFXB>EOf>uy$V9@}FTA*j11*
z-9@@}B+FkQGS3pSR%G4inoBGH!+45c_n=XhKvdklRvn7cW3lz(Nv&){_C89E)tQXX
zbbXS6#GSMbUan27)bwXIg8(|gX&w6$AUTF$HBSA@pEu0q0xD^Gb6@@ogM9b!_vE+S
z@P_g5a#!caR<&C`$!|uYx#!1tLT2cHSUFLmzP1S}65JTv%C@&8<RU*Dh02W;dAdKP
z`F4(EQ_5ptjwstF#Kkrs6%qIy^-k2RsCA5mb0YtKfk}>PH`lIz&jNdQ^=d5DuhRkD
zZyX{hg5HW&6~?b875!@slLprVTrJ+F&g1TX#)TUcR3;fIKS}pCNJ`I_is&m+82J`D
z1)xmcfEp`4zl*f3c;bXFm*SFP)sSaZ_OP$0UP^%Llo=wsW;U>R<rOS7caCdWIboPy
z^sHM4z#@}3UwXW}I4t+sm$#)nV?)_VSr8oh0g58$kC%=z(V_>h`4mfAq=NA#exkZR
zqB-4po~DbIB1SLeJ8G*}{c8)xDv5A+&nVx4&Kce4+mlY-mr2mmjHL#q>WPU<Bx`>x
zsTd9!qH;madXtETFY)XJq)I(vT3n=>$Swj{<<eas3;ViG?bj2zVs^v#PEm(MGN~%Q
zWA_<_|82?7=3MIHP&eBBasgbPw!Zg6fmjiAj2yu(wW%}pzUb@xkI{_s+DZ*6q)w;F
zuC)g7*B@KQ0L8|)Y-;p*N0y=sX}}W`u^af>2G4ty_6QbBb)>^^3K4I!Z2Vi!^9AY#
z_d4~&K5LH2uQ<amzt4SPuVeEPQdH!eRjk1za|xAGkk%e9I`+6RH%x<HLb2I&7hIaJ
zHK;Kn?lxbExm{h7D`EG!W7yA?eo_c->3^p~Oo@dG7ET_Yyu@XQ1Ljg#B|n4XP@1TX
z#bW1SZ3?p|C(+m3Iz8+L_6@Q+iCPogV&AF%9k?2@I5`f;ZgU!%JUi>P)#Q3zvg&VE
z?1q0m;xJ)_OM`Qj6B9W5;%yC0jj#xqw04r`^;czptgM!*)6Wi`{P&FQTgc@u*&0~~
z&R)$r^{pYIeS?nB0)5iQ>1`BPyIX!5Nx?`~`6Ob7rEg0GrJO8IH#6y>4GZi$)VDQ<
zv2u&%^_S^Q;tiL$cc4wcp&ed+__xRB0j)P3Y<MHxfd2@B)qP5$X}uzjZ1<Rl*XWW1
z0fX-n+pAAJkz8H_Lx!d9S2|<B?74aLtjX87yx{)~TT`qZu6a?d0{b@ed@SiL)z``U
zy+iVeLjWNjatM#vZ9L!J2ZlRqQ+;AJUel=17H#JBA;)IJI_B;oI3)BRS!`v>zW*~s
z%3!kts$OqMDC`hvc)hzHrjR#~_)B_K&eGFildOVE9kM@rx%=M*+KI{CAXWVzp{k9x
zLH~~q+kIW#|BES>Xy&>`Qi%~2%`G|&A33ut+kgLewVS-FDfkB~GP@f&yLZvI|84L?
ztkpG1vUN?4*8bkpyP~01cHA&3g8^$tf}Ww_SDuL`3h#itJ93Zp0ru)AUq6|Eo(FJq
zoYRX+Hr%1zGwO=uzuvm>%hpRiJ4yonf5mh(KI#YdHb;J#xtQ2o-u-EX(2SIo;yFrL
zcb0^)M~-nhR{xah(EEte+XcAlPlP0>Rd0VZ6hqv+^|hw~3)#SP`R&xUOQL?i))8Cm
zfoS|wXmhL9U+B1Va?dVA-sEjK_I8qdyxNWCS>oNNGL<)XXnJ_n4}LwC{5I)#r7}wn
zuFSl+({U@p!{~e>Lkrd+wUzD`EI`>idhHo?7pOB#*<dXF9TO-fwD`L&JLEB+3n57Q
zzY7F>d&D`}^H2JM<AVcg*!#V<_J~C-Szs4WCQZXaw~|^&LkZ*G73e(dc;0MDD8OET
zZ_Tb3uPe;m7ma)sT6N=ok0TpmpnxrrBY5!{PKIBSK@C$`-~XZ)cjS4|fuY~Avb#&F
zNJB;^rir%RAL`YvZxG*n=atY%>%P+#TBSeSmA{M7N(L*Ho*hZi{DuM?=XiE>Gtv@W
zH50GtSpZQs^sdX2{RS6Wg8!q{RVM1P{=2Z=+n|t(z8$lzfjC;sJ`f19VDeaRjAWze
ztK!~Z0v(5Rz=B=~mSP64itWA<STP)`bo#ZCCnaagFaqRE-08{VEe}fT36i2@G##`r
zul>8q@4c+&7M}6<iFKo5Vboq!j`4LOr@>}20u8V970UXW=hgsarv{oJ(|;gxpM6rd
zWW>-MAt}n23y7!}*OTIj4u6j!{>gnpZiAF0Sd?K(RzzG&^->@k>-2;!kh$GPG_XhJ
zs#pDMCK&tXstz?k!6VJMTq}c>NdF+!^h^o}x3Q}3Egcj-&6WS$&}|XG!WkZc25T`{
zrff|A!8z$XnSe;zT*>F?Nl**Uw>30ptn;6cxn4h<##R-c5zgL8gC!s|9wmFcQh>S&
zMY=Qfc(slPy@NARD7!E5PDlYyrzyC*&n*WI>GIp=RB$Tvb)Hy)pmQQ^dPX&M87%z;
zmW86{*HidxYsYPD{)BjZtAy0U=Q$}%B5%$r-RlXk(D4BodLg|TAu=U~mUZ0%WWJMe
zhfGe#%{*H8CSK;J_SQ!W<6qw4E>GlzVsi2J8Z1@ytyqg{gh$!n=eptM(El!^I6m<?
zk(S|!9okHBEc0_IGvx$Cy$t%cvmmi9X%sd9>xrWP>MetpmjX-S2A;MzN3<OH+G||i
z-=GW(eizYv8sfB5@?t1PniB>xvDKmCmLE6fNxQ7Zu^Iu(lX7fb8S0GL#Akz&B7<A8
z%BVKMQ|ER=e*c-iH92;C*`4g#kt$YGnyP}G5Zp><9{Y1JABL7*Ks7IpeblDi{W`-w
z8Tn5vC<-m&ed1Q>48I2H)MS=#^{Jl}_2#N&`XvXF4m#<tagDFBH!hgg@q|`;c>xf$
zagS7_tI1rDSgt;ldUscxa(d{GH)Wi!lKGNmRw;d<<!F%tHYH8_k~I<F?Gmh!36)<j
zY$C16tQ#ZGm4VeHXY_d&zGR&l7;*L19BVxa%!fFLC6pj>QtxlrC4$Dmk}C(E2fqtO
zpF2FG+h-b!@FcNu`pFM|*B+|1u_Z~l_49xV23wE+2E|gXol+`6<hGGj+ihR%q7i#^
zdLfI&qv19jZNp}@A~VtQD1uc%&uwYE$1jcYqu7wo)Sle<ap?Obh_5rd%>Q?Z5E4eu
zgp15yM7i}gD2rbMWI%V3@-Ao18m2y4_d;Pk{#x&{=mia#LzH%7!3PM_G8u?~=)w<o
zc)_o-MfjZ$)Nz7RfSjw_kN6hRBp#_$!2wGoyRY2i6Cvzdk=F{}2*`M31siG+l_%E6
z!j+8P=cC}J!uy@7G%^acvVfQoKu?{`jLX_2VFw3#wYUEasa{)nL!zlFSIHbL&r0oa
z8EDc<BCZvBNcCGe7Ds!3%qf}qF|(_$Y$MNJ8ZBX?Ra>y$^gy@I?7s`y$D#%YbdQq3
zS^y;v569t@yKC3yh$|fuifFB-$Nf(XF7xsm7ns@}^;>m-ixj%Cy5r6_8Zozswj61q
zJ{=;a#UC4k?TfopEYSGOp|8jK!B31Vuf~8-;t?PYZsf+lQZ#}fqz6g>wHR9R&(2g@
zbP>EwWUjp@UJ^pPci7Iv0T2$%356hQvLM7`BHi0#^poCz=U(fuJlyljP#;(EGf;Ao
z)LOEyC2&kAMbY=AMG)5DsIA`gMIHzjs}rMxo$?GhPfx4ewU8b5B=iQwp3e^0gu9X3
z^_uPD+ux{afA`x=*8?qf7Y)^X%ZL>vMR>f=7s&C?Lu)a<VacDK&%`ShbLf+sa=#l=
zhNx;)zr@RwpGHko-~IL%D@N589A&}2nKF(dGX*;~gVB>Auabsoq_cN@mW_rCO*=1-
zCihKu77@_#2K%H?kIa0gd(j3C{fr=IxW!XXbM)SF$J-K`Y*xDTOThIjHzgP9AM|i~
zUTm=VvPH;IYIa8+v@J!i#+QqbJ$H6IvTI2lw~p8SqF2i&LM!OJxKW}Vy6x^$eWA&X
z8ricgDkUdlY>{RlS$bXn8O#Ks+yqZD6XQ0udT0(ql;%TbZnF?6U%lT4Vv{@}8aD;I
z@BE>A%i>frs`qGL#(y$~px&s?J&4wEvP^I|CCOJsNi^ktZX7mL@``<F>i0Ivkhxd<
zA3PRh4E$aNxuZ8lo{swBDd1D5JFqIUTqF8)O}MtK%F0G5YU*F#n!L=f>O3x=uBzLa
zWda|=uTMVDEqSVNpR11(W72+o(5)h-PpYBv=V-@}r*A;>2u9p=e3H}A>-X7x#vJ=j
zuPecxW?so=B7B%Zvz{Tz?E8Q9cU&+p82NLGM*m0ASwA%WzFk~DDvF3omw*DJLs~)<
zr8XLYQKLa(0}1I+5Rn+MvC$}u?i>vQ5*wj3W5AH^mWJ=M=RepFyWjVHo$H*}IWhfO
zvGhQth!%ZP=a{amRyaadxlg+drgCfX@wL>Sg0EBE?-;16=C`!W3D~B~9t<GZ66?v<
z1y#YH!t=v$*$!{_c-xI?z1(tAe<M*+!>n5Z!`uOr-3nY;YNG&k&-~-An>|djBLdv#
zfn|u8W6wpa@Q)@oY`!cma*{9}i1k`?HMwF5iUH~wElsC{NyzRe)5B~GnCJe;u4Y!1
z_~6EqU$gp<>Vt}>W}&}K8KmwXCX2q}YRSRe;8Lw49OIh%N~Rt+1~kZ=U#y^=J`80=
z|5Jm<`E|K-4=3Dd7DGP43djpS*wSp8J|;4dx#Yrn6p9)9F=bVn9qz@_I+*x;pfTtL
z^)r#KIEBTQ2efFA)GkE2AgDq?Y8>`W^*ntA*;3tQ7l0gY{SGa`)rIxZesp$nsR+Ur
z&L}eRVlHLRO5(9E33>GT6yoDx%h`RcY3*u^H;q7Vd$<cA!N~g$hpP~-DQA&+{iZ5=
zi;ecui1uF;v(YA{=4|(>x;iJtC=IAe20zQD31AF(^AGkhtpwLJ^u}u~Nc>P93E+d>
zT_KN_M**RlWE0ul!SmgJp#|p*-Ob4n<PAs1<mpg_#i-fSL%R0Kq2Ncl!;uBKA!3{D
z;~@o`v)W@p+(Wa6nQWMqbaDHKt8$MO9HCC^d$}6r(duQCnx9G1tUBEaN{INztgpK&
zjK<*ks3xN1XWp#>FQ($Rr_{}qWOU<k)LQ?L*lW;52q8U(S2wm+xdh6VU2Rx0@Xakn
zlJ}&~iK)^dM@!7@Ih`fj-A~0-_%-y~wny`~yn;$U2RWMj7~e+K-V8L>$%>E+OwG6v
za`sqBXUfz>aLJfO!u&4smfE=^jP6mnR%{)Knuawa8s5(06xETK!NRf<D;&0&*neQC
zl^Zu@!sT{cwjV&}kqU+TIQS?$`2F-p(|!cP8h^2kwAU_)C%X=ueC!@fu~DcQH2h@S
zdvbzb6aL+7idrg6Kfm&5mWuKXtV__ZCiYS35+#1}`x1`^Chnod_uNsu$~g>YTC6JJ
zx>`gX&M=(tvq9?-x}b)O>qh;j0UrJx6*rxfECNlFA6K5KUz_Nqy%dpXzOY9r2+67E
z(<oWBX7&=5?w?q15p{WH?EB5|SOqx;{qjfD52C7Vs29vjVDVQg7?eY=oS+IK(O$vB
zlKi)q?BORP5VxPg2}~}t9E3ao)<M3D66cfRRo^zVuv8m~h<n^Ajx-3?G-5v<Xz;7{
z1o6M8r5*Mlh4?&J!{CXMoY-7$=Iye2_xhs&CCJ)HObcz)XoGw6GY*0vawxX;O16YQ
z?B8wXKFn^apDxAhb&tck+ta=kvCKoCMNQ0;G&d}8_!DK3@61%iZnD_AhLIY<pt0wr
zQpTc9p=*4(gCR$xkO*zBg@zg-lwr>?4T3m^T4pmFB7;B41n(6v8GR1NmZ%&psf-qk
zw2+Dp5kallQUzfvv`9JcwVeu1OF~RR5ANMo*mGOce4+I=rF`!!>5Ej)3NQZb3~9Jp
zBuQMPl-2#dn(_SlWK!@hvz<tK;Wia(9Y;uNeME0%@nwpwp{e41-fioo1OS9pS-|Oa
zi5vL^Cp(HOvrYv`=){GIcsj6x&c%}CaSUS+NF1R6ZyKlVvB)arpg(4izBf)zU+U|s
zdCsY?NE_Iq%oQ8k#6<4v$n$)9I~w57x&N=6@NIU4PrqM0r$^rL-n+~FV4|?5g9cwQ
z*kkEB`Rec^@M-N=iHVIS>V)Y-q3Jb)I;Y!e&LmjjU_md<-6jXxBRtsU)>6k@YVNq9
z7G0TEu;czrshQ>-T>gm3^9}nxfH@^$V4S7MRJeU6)<~+pm!+X?(vEkT$#zJE5Fy)|
z;z|L*{lPm6idoOd2|bJm{xLBSr3NzPpsoqP<!<YnvA6_${<j;huk2xV*HBwxdFIWb
zevGCNMWe`%xD;i9y8+Dw_w&N#nixlJO^cHHr83Yxm-k+3q#dgj?OZGdd)xT@svE!j
z*le0yE<9^I$c%`&srjPiV|7dR(;n&n-AL?3{_lpr`Gsjl8In;SH8*@y;Yv~mnXf6g
z<|bc@<NobhijNbyuL-R>3QG%XSn8!*RoNDMrk5k&**Z5>;q_C&aojOwV)m5SCZY#u
zW*#DGF-F(N3dT^6NMUMiVH_t!hA%|>?Ph87jc<?%U{hZ%7M>45RE99ir1yd39lKL=
zS+jliWAY=~Z{_6}z{!<M07y)ic3LXbNOd>02j@^jfnGAGt7itCuStJ#v3r{Q%q}`k
zFv{kgAUjZXg`8zXxST(X`hpx~{n3oGCxK$3c=dlU)pH{*S@JL(ZR-U@KHxMD)*|nQ
zZTZITqNF8@H1R*odZJyc!+})!T-4+SIu+ujHb?N5wc$S1ubOR{F44-&b1Xu=5dY`#
zKBzOdqqswxqLDg7%$Lpe9<p0}oR-zhz0EO=&D~xF`3fW!CnPWE&TCm{THPodC87Ym
zaJ2ASXTv5u$bV2L<;?B<!dPUB6|A8;Ea@5uhO%iV!62V$JpsO|ddGO}?n$H~%3L5(
z0OqqD^rG9K_UmKM2{<jlNgIpG+&_UZLT#+l?gOXJZO8JAt-LUrW`z@Dq&T8YBu0}(
zYH>}uQb*xM0Ic*!P?po^daOyJ^bJP~i2aBg3iJDgQ)HpL@Aj%(Q*2EoT>Zff%}@PR
z|GUv%&an;^BrVy_$gJ<=m+ixNx6EDH>6l%7fO>!}EAozEeDOrM4w`Qei=t`$+788+
zQR#UcZ%vm1S}*(u_dOgD4Woiw-Qfd}@lwFH8YH-{P^046@2a>xU7=)qELSn(m6tvu
z&dT~~mlpy7iYI{CH=%=;fNGEk!@-EKgKD~L4OZJvD$P6Ui~AVRst8f{xw(I9^Zv$Q
zLpE4>#t%1QInv~c4x^2RV4P6>+u;p3kGBjljxUvpeW7K@Ye|>g`K(HdyT*kRS>^=Y
zpghg#h`ChEsLgo1*4NoOSu3bn?aXJHaa5y9$#_+(d5ZtZ(}LEdpV#6I^b{BGK?eQ9
zC18&~0uM8A424IL(9W1<*1d;+S)zE;$W&yQzbgIr4m}gAf{#5&!&>KVEb_&_bG~<Z
zYjH)73b1E!qB`jN`kuoJ4b%Pl^9phiACV$_VGB;{eZ~GIB>D@LgP%#-8<Uw;YX~Qw
zguY@{AH0AnHL>!)qPU@p(?P{RBw*(rJRRa;s^F5+Qml$7@D5Zs>9gFXQgPBI9iDU(
zr+oIFFHOfy=`1*x(o3+T-kgZnPCWiwLOuA>fQ-f)6&3i4GL&#BKXkUo9?*W&NZKji
z10BiC(P$^e+Dkc>AB&6*^>7OjKkV$IjzF)brhP7F7PaT6FBzB<P@UiqNM)yS{{xI3
zTC~H`!)&Ol%EzteGF14TmggSal|_GO+|;bu7FA<vy&t$ulG;9Ug)<a~)l-w<A96jF
z+WxeL>lvVP2Tmuv9CP9qL(!MjK67_=@S_YG-KZ9a6m-CI+4ZfEl)C6QqiS&c(@!md
zsU@&^w^UD=m|0iivwnh}pbV4*O>@6JZrxv1tmT?{ur<jz)uZ}AFziH3fG-yqI_C86
zep0!>Gyd)#xT@NkcLc8WOEHl>ZTAjLh%3qRj0gRu;fU*TBeis(?SyG=X>sEtelYu<
zj;<ZkANa#EV$)%|2W+XR_5KF>w>z6{nQ&U{2Wh~|C+V_ZV;<z>uIFyI+p#$>89(n7
z*WTELS80!8fOB!!W*WU~zkN`;;1=_3wq@D!Gu|wk+H+fg8d!bbcEU-a!%9?ZQGoZN
zA8G_}8qY7fcf0STe6|WZ@%D~z>9XanFBK%snTY|L_htu-6`6?8yWbs)7sM)%A;6*G
zIzmoGpAtrE$eM({c6zMkut(k~@E>0AGlZZ|;K^u)#h7-=eBn_|n`O@+>I@}<1zF#3
zJkvP4HZ_RB=xf_ya__7L?gk(1-qn%o(2GU?xbhunVHYsgJac9TmuO*cNq;u!FdS0O
zL-rB@K<g1haE_iHDW*TBh26k6GlqXGPww$IuT}Ygkfc_yzWS@e^LWj*zvo$GHiDFY
z!2fQ1TQdn_{#885Jr;HGeuNw}Q_AxB-Fp>!sl0K%eIBa49iK(ICd;SI{-nTH3ckp|
z`99%N>1x1yhXx#Gtc`d*XU7%pc9)|MwH?kIpv1YTWR(;6>+fLi*g{d_1|Lf@V{hBX
zrL<aSZnKn@KYu*B+8ozJo|+Rn{%oPjpA$?4=WePs^)FK+Y8|1aOl^;zzHj3R55oPp
zHnj>fq1yfgiBz2Latz6^CfPfbpS?^JT749@mTr;$lXX{^G8t<he*5)M+;djG_7|;p
z*r38rc;YA8<*aT=McAjXWc0W1tLS?#e#Tz-bmdT!u(zp9Lf+t-`l_iSP3WG$zTQ@9
zaH6gvXTHHTpKl4b^Du=-Ffy&Kqdr;B(lqR*3Q-9j46}vGS%nJ|DcFYfBo->0Ss(e*
zQ15BMZF-&*er^+c*s<2RpZL{*m?45T6`JpQixfh-R6cWaxO{9mH7`7KLcD#6d}#il
zyc13qqd}Ff57KOz7H<KKe=FAA2B$sr`5oZ`8eR(@)KJrUaqk4O(R!#!E%|-Dp2?h2
zmIg^Xw9`X9TW#>cn5J6(`+`*S$#qKqG$ORJTV`4%r|wdc=2kd;x?w~fAS)(W$Fm52
z@7&e-y@D&}g|HIN-7Uw6NvC^dEWJ~bp_gbv)?Kx1W6??#103Y)Cye(jufs@<{>=n0
z9oiFXx_af7xA`4r?AQ+Esa+vJ1W@r5JjtT0DVBpA`tPZWovk8gKarBpJE3#-k%8CK
zU*l7tRhIF21DMI1_RH^NRPuIG0bV3;{ufxd%DaCon{&WuKgo^ifVnlT)m%CFR_{UY
zLm7z)1-=3~{$KyhU>)#W9YI%lRiVbGM6cO_JhxGvMUsQ+IRDIg&B*+0_q2dnUahVk
zIe4b!1GpNnbA5v<4P63KUz<!!_g>6@ceHfQ5i1j#yy3gsd{v|KV~xGlTYtn}JZYmV
zYi<XW6_*%Yz(W&j3WZUD{CoE}-RrJbT5V;e%XZ&~67S{ah3SUhOZ=i@)TWry(*8LE
z_NT^y<NTxEU1kr>kUKW|!gepzHF~=l-)rD2k3>K(QCx8GSfBkc=X(1hoEVqUhU)o~
z#xFCWGu#P{Y_~khN}wbk#oV-?G`Fsn6FcgI-qqQGN*#=EZ=FNs1m+SHTOJw6Z^z|2
z)TKKxO*%b`FoYS2=&ux$?J57hm?JRSO}XT*hMv*iOh}YGFUItLH~v1?#>mECs?vb#
zPS3}`T#0j^y(U0rY_BFLn<sRV^Tp}1)FsWC1Wi;(0>9d2E`tw!5dGDG)n<a5F1v9)
z(c4^gz$wOKA{qMn$LuW^%{N#;ne&wpAW-v0^*Za<06QKXvB)98wNWJTjmm^<|8H4S
z8R8FM8T=QMk<nx<8x&FxWz2CBcsQGZHrM(sJcBi)fjf)L{qM$*c)4h_pAT!KaajRu
zw=pgE;0PR}Hts=Sd9)tMSmv!ja4h}@x=2QNtTpF2YW#iK)^0HZiIZeJHcpFTUG>&|
z0co}wN>E9P27+V1Gdk`=Au^sqU165MW}G}}61~&%UIl4Nz*ad~Y19Xqc01aYX8hZ>
z-y+LW#}`XERrU)@{n<foA(m$)MsKxk;)+1Jd8}7nFEl=+*(0X}H`90F#utng8lxEh
zOne11(e<0JlS%Jb&etWKGxSa&2m6%kQK`etR(vnK3J8QY;1%(K89o=bRiFL00C|^%
znAPLXw2275`Qq1V%gya9Wtu#fG@P2vp%%zG`~C3jr%nl!Mn>V9fp@>-#Gj4=#*bz?
z^3ivuyC}<|?}qJEd5mar=x1xWnUzGZRQq~^4LT_|(l;7@&9o6z{cPak|7a=dZhhQN
zjVmpe`01QLXl2~(k(uZQ;mxfq(V58^4D%(7IG%z*+iwud7ocdcOSFqoz9QKVtDl>-
zBq7MwJgaoZo3oEduIEpmdkfI+*@s)=H0kA+dXAxG>U>p*|J}%a;>w;(&*eQ5!H_I!
zYtB<)!}VwU_4QYs3mz0o%U<oBOMS&QGN?KR?X7YiXh9)%!y^r05ci+V!3Xs>y^VFg
z=d1KZ_9IBMzN3+40MD^p=@5gy!;M?6-s^#nnpUzrQ$V8CM!7C#!be!h#fs$z8gLCA
zmvd6$PS(p7N7^EPnYj<R&pECgfxZ@-_Yw2wOwfXNEp}%&d=JBcQVFt-84j#5zgZ)X
z`;+-JvpfByjB;yNSuI>^43}FTJ>%bcXTnp|T_c^rEVG~zZnKNf77I6Xmi(ccU#bha
z6$Ea(f8;1M(-2`~3a&mGg!EIi%{(hog$QE^I;#i$(-Z8hc?H$imTNe$YwI;Zj%Vbq
zkYTND4q71BSEhYES|?fp8$Hq~K;QU;6x5HZP2rhrh)Zh=!_Sd(2Ob5EKT4KzXveli
zs$s7_;5X$3=G3y63^woViHJs9moQ{bXP)e*11B^^foC!&1+a_dsMfT4<8#G!wm(HL
zO{xN=v9=INd4Kh+x12^fo(HMP5#gey8nV`D{>05Bvnw2!$J$yft*r5Q&<0MK4~V<W
z+eZj?lz`5ynxkEMnFTo$;XYGO>_>d0cKW%7LowklU|JK{m-3BqpL2)q*=!&_UE1Yn
z2W;0)k}7Lv+Ox6!0Jc5pG3m%-tRVzgX3#gw&Tgm9brkSv$r#PW3CWaXYv#UVm#||o
zV~an=y#jonDy)p_9FFi2_rnB7+5qC3o-{)kZoM-|a&j3!=0USKZaUksk583VwIvn@
z@SEOnH{VVRxPMAh?`vllyeqMQR?D1Pr$4i+s#{6MH66&_oO&eIX7#KGO(ESb8)(||
z3hBKmT*ein%4Wa5R#wVopyzfhO9~qG5jZd$wqaoV_YN3-974IwD!=ffB{)OLX{3vF
zl2emNK4NQ?2fF@Bz2~l1;-~HB1q%gx?GZ=W0^Zr>UK!sK6{u2IyM$ZSd&;sN#hi+^
z|NY+$hVfIenDvBBZ{DsVreJ|gBX2jPahzIDCV1A1nh&85Bm@W>F1Bfx<?QlC6$^i9
z*f#lVg8}PA8|Hluypk|cYpSArbK6Y4oik|M#AwaIV3ZDA2@5-LWm8q*I#(taJWsgV
zKQ{#}R|VObJ~86TS6e11JYo^fb5M)R0{+;tZC}S|c8Rka+>O~@<scj8!C{MUTU>4q
zS-3vyWej(|UHoZl7iBv7l5s9=`r>TR-2E9&7W!k{31T#v!ND$$Q5og2tq>();Pr@*
z4dH(j^Ol=LHciKdW1jcy-)47C#SOQs76|%<=$?O^%L_<<v1V20VctO*6H&v$wK99l
zVbm-E6>8@ZH5}KecM$nKU*O0aC=!?XvLv&i`<>KCfcztkd~lG02Re||lRON>mL)n@
zSy<0K#F^#tX>t9GRV^V#ib85~OjXz8^SS)$n^$ErOQcXxJ9-yx(F@r$U<;dm-Fv0_
zUq=D2Q1Qm*1Pi5WugUdbZvU)DWLsQe=r4}s(TkApvtlnpxAy+EaTo2=y&prhws2~l
z@Z>b)hA3<(VaCrJuM5hA-tz@#Oz{83IGRKpcjesO3@?w4vKqaqA-XeoVLh8*Zc15p
zGBBktI%TRSX2)}Gcgn0=H0nlMz%%U=)%5b^t2p+i#DW;foPV%U`k`No3O3>|Yuq$*
z2z7p)m(eO^vhe7;{o$3B`n#zuXnp}Q(zg;<5yYJ1-ugF3V%v|-CgfE1b!P_4f83~?
zJ0vmOofK0O*;R_Zshl4#ND?H3Z0o!fQ;3HK1D$D-7^^hQ)LQwcXjjnP8+|V%L?Y(2
z&!{O*Dch!?QK4k5v+Wc{=l!6*^ZK5IX<vBAhmB7PEABs2`@-8Cus0fp>3qt~hFSNZ
zPRYp>zozVQXn1Thmphb0gJ?zMy1Y~H@WFE!NzT{yAUtl$$6WfVM%GWobsO~*no%AR
z*~Y?c^(@)Y0V7KGn|@QUevQX=(KFgY4s>sGTD0@HcJ_%8Z$E1dwP<=eS0PNz*7oS;
zdz#o!;lRrM-K)dDO2%azD37g#W``HEMqrM!Myag-w2Op72T&r0N3{Wo<7$)3105sS
z&!zcMN%t&Lg{;Zl@)FIRR=-?lh1rTqEHpYA>Ei=?B$mo~GS!j~@EamYH$t2(oy{zu
zzux7B7ygpEpLki1fB(WCA=A>K_eLVeqI{7o#?^htrONva8*q=I52oC{mpjaO)hbw|
zYT2g!*kFnjp3o{;Jb?9oL)W+OMwemaeC;<T$3|Qzu;o_eH1$s;bMuUN*@0?$%-=M#
z{{HKYMR?mZ*E@7a-D$N<P$x>Ho3S`d&ad*SEEeXJ!S@%)&e-XKL^lQ?BNfYyQf<_=
zRlWnxOlZ+mYLS2N49F@_HrgMZJBdgt<LxWg#_E77f?U<!U8jrEHUe_IrZ}h%rHIn<
z7cD8ZCPNWm*D~_)R8;)$BSvMtcvrVb!O}64%XysjnY=l7Ykd_~bejP1Ntey%zqR3(
zHQ5-WZ4-@cqm*gw``qLflA4Sc;^v*`DEt-0+`oHPgE8C_8O=kqS!61qfno|^sK|4-
zvP$xfo|~GV=zd0IFR>?<KCBkYF%R6Dqd!qKN~e@q|EX-NR4h6(Iz$P=ualjjPSI*;
z?8yL5?iEru+6uwV=5P}NI3*LOu~*&Rr5|aJvJFLv9*E<>6v5$?erHf$A3tm9yXA0`
zXrT#gU+`@6-=Gi1<pX>=+Pz=9Y{_Xt9Z|FA+pV$rZ4GHU%e$7(fjPvVd+X4eqd_Cr
zDv-g}>OE1cmY$3Pucm0C8ZqCv2pDZepZpJx&=~}%u~_E6&R^aMTs1`L$^VhPe7us^
z=!-zfICsxCx&j!csICsB@ms$+v=ny5G;uRMlUAqqdt{u|^w4F0=3+7Kx2H$Ml+U8K
zc6LJu!hz<LQf*^QWn9LX7R~?Ncu)>(mfEix!!PGZDW06KpiAjRjJ`;#epFE%P^^Z_
zf)n$_TZxK}#FpovX$)U+-}S|nxOvWU^j34Tt|pNP<@zdx2BOr++>ZSq$IKMdpSUE9
ze!J+^UUUIcSCCMDiHIx-o2?&&l<*)-SRiw|uE?JIN7V0XoTIp=cF*lgA;VhUQ_okE
zsaL{wqDGVolXm^<<}*$H{E(XXN9f2!{TjC%WLtQxz)+I$T?22Lt5vId*>rdMz~fAs
ziML1afHr0oYhaX58}cBY8dDcK#Js6|#;c&93TI;L-N|P-cn5s$(zuHPAoZVr%{0&0
zx6;a%O{W%SOdMz4aA|AIKjGEy%-2eBR}U=r=@dJWsv>ClHGH-p1vT2HOXT_6YnqwB
z4BapHJ8R*J^3}&*q(KzqN#?y|a(15m`rjn9;NhxJ0SuZBp}dEL_k~<oFO760^BD_6
zz?4z_D+fm11mR8lVIgW#=xQ3iiwRAGc#!f(@l`+dG}EY{CSz@{=Lv~FcR7=GNZDS^
zEJ<-`V?<?oL{&UGVYJ_I-IN!tiMWD~-EL}<IbSizwxVEp!^7k<qi8!GKU0>}D#fUj
zT(`4Ohg5`Au9w<7vtKL`?bBlVHf*>SdKysIP$PdPgAu7k`jP&|`r{tVutMvT^Qz*}
zLvFQ`j9`re{9Oh^jj`8M$#>j>4<p&9hwDP_eXHayvGd)$^2#-Gp!T^CFgPY5X7CGX
zzK*VLQ3%|(j{=;Ro+f=UDiL6#S$FkVgFHAAjMgu55DDG@EOzraH2rMl@8A=eZZBs{
zmo=C&7}IRy?^dTUY^~@R&F9mBlZj14cR6$ogspV+F{EFb4JdjwGybA<Ewk8uJ*g>k
zAy2IB$-0%gVmB1~c;lAE$wJ0>eF1q5=Mhg`61qgpM`#wLkd#i<sQ?^6&+CvoN%k}&
zi7`PtmiiVDD&Vo<c@lS{k90?5lTH?e$ko7~G4DS<pcjizfJCXqG_{z=T%R8c4M~T7
z`mItL!;@^z$5)fKFx)VPj&>Uqe|=w4-vTBP>(N)Jqn{(qk029N`%PKh&o0;OMki{o
z%oi>l+lE7C&_|0R!SZv3R)?`{OUc|~Gl}!3-y<X6IBz7m{4RH>pzx~`y6$Bc*=Cxq
z1WIi+q(JR~Vwu%`KU=sm1uEWRyJ2wzfV(Z!g(K*|cKk0&Hk{^_QU~$&i6cg%Gg~No
z+ka$3t6JdwO^d8khrkF?)7|Ut8BB}etz^zbU0jrhy?5CN*^cYdF&k?t{$7l8QXInS
zXKjfKF9xd_ZgcNp;`#a+N~aJ1+b#s#CDzJ;qQS4L-bEZ3d&*(nUX5vM>9mfw>(
zeiIa8Ih7Z^X8zF~!)lUK|Hc#rNPmI~m^OY!dIESe_kiY;P=kptUW}jNWm30K>(Sh`
z1U#)qr4F#6BeTltDV0nkxCydis<Xb#_RRkA$jA~0jt1K!hb@0pYcoO}#{p_ZM~2pS
z6r|5r+=ep>K67~j1+F}Gvb0|;{OS9GDNejfALJ<;cGf>?|L-z%-~H&sGL-G~Rkvog
zT(1hA<~+{GEem8gF6DR4x6stiw9l>^bBfa}8r#(VZeXyCob81>p*Q$mL^&|@*4gf=
zn}7W3NL!oG?B+9=kEUVgYH_C8CJP<uPZU$X<Wb$f8|drh>m7EeYNV3xS%#EA`qhMb
z<$Q{=k9>C`LA<R%e{XU(?bLCE@2c;Y!LVR6heQm3ekd`989#KPKSZu-b}Ec^<{2hz
z!;6R8gzUrmlG}u41b8eFw==n9(=I;ScQ6@x>s@~Z)_gRTr+la$95)^f+`?OeZpUSc
z-wj;6tFC1UB03l)_Aqt|b>vaBI?fRaPt=e#bV>Ch`C;wQ^_UtLKJD4?%+1CK16<z6
zuxP4x20OeWFWF289^7<JXh#KHxhx`AG)>>UYLEvZ%be_)RBSvkWBKIW@Q8TO2SaWK
zQ{(I(9?ng&CdVjP8Mw`zRSHug0CX##c^4MX=(B(n)v_7ps0~wq9$L<M)#t5gj=ROn
zjt9TbJ`(y){+7Fu=JzkvdnF6K?GAEajw6XXtD{H3_s%t!F2U#V=cI~%@E(pSKk{-_
zF~)7^<NF>Bxt8?DM67+3>t;l?#KeOr!=K^E`}Plq*o2f*TmIBLw#y06CnGpM=wwos
zSB3&?-0uH@a=zTVh+=!kxG3luC*`!x8M~qS@4P1GpT!56kMZ3{%eAd?6=T{<4|AjF
zWskhHi|ixaJ`{6-MJt7VKBWHh!|P?ZBN<Hf<P=AssE|vB+=qmv>N}-dL2nEV&Z`Lx
ziGG_j&5Qvgb|Ao2BUFKZ4bA%$uBx?g<>~$ZacWlzX-uRq@f2@IgabNz5*t^kk#}+x
z92_tny+I-oNS@WZ2S9>COT(J-hiB8(CZb)E_IEfw*nMI2?|2F@FX<o)jo@fILbLHB
zp03E7Ypl2?N98uH(y*&Tnb-K)18ydgaDrSSXfV(#;ZDm#hEHUhBse2X=Eupv?2?lh
z?=O)HhpLB{Hfn%y*&fC}R^dL2C-^N^KyYOsz3cO49=I<$C8FN+Vdw1TQ|Z>dr_E1l
z&8d!C(sSfMuTo=$jP%%)2>W!}hMAcNW1Vkm9uecPUboX0g<vXeA<Y9HyYehdM?nTB
zfAhiPFy|N5+&QWOyh%YUI@-C;i$W^D*!3wT%$9Vqh9fw0gC2aopi2BR^1?wWXQ05X
zNB{HKDAyli(32&kjW&S4CgWcBzunsu?ExpaR}a?3Tu%kyLOHx$?(SHb;#~?0H0|(f
z4SV2d^<7!gdK}Bww63S4@`*)xa=ZV#(cIDYLT#+VsZL%mffBnspO%H&AX`HzP{jYZ
z%faWN7D)(G8@|Qc@)-Rui3&!eBtXQ@n=!DeA^ugERjO}DiW<VL6NJhKlH())-Yr@W
z%6S5(=3D@>Vx|&(>&4r|d;7(q2%n8Xx~p33;IwG9Ys@2;4lTQv@1T~f#Pu)sb9f81
zyp-+FAGh2tl2IeQTd;TDLtZRuW-nEIi;o8tVzhG%@^l^Q_<wN-Vz2!?TZZFPS!D)3
z;myq5VKv&)U&K*LJR09{QFwzVEQPXyy4gW?>@41J>bmPtMV`Vj3;VT(cpaX%?n;s%
zoimn2H(WWOaLjU?)Z=KTQGtC_%>6>qT-}$%>S`u63(R~9^l4Q4#F&0PV_oEsI^u-N
z_)q*%0rv?`^(7*Dl%2JV>M3vb>V@rkPwpT{WZe5daT`vJ(Vq3lC85t{Jk@sZZ(Vt2
zmvqGC5udl)y_e^2&hg&P=AvPgH<BqowS=Meu?&SSKvB`=<Et7?GSA_d!Q_uA_}agF
zHBeLtf_lFKnyC|t-|OG@u}gl!WsQsBH4e+%cb72_5cslmJ5$qq<pWA(H0h(G&B0p6
z_?_tB=eiH0X_sO22wM`jZG+jUp#D>3U!*%zx=}u-Cnne3ZE2&v^<r7fhXzzU#T!5G
zirCzRA2#vrQ2b0dFkS)ommy9daL_jTl6YC8IddT4mn+yqFc)}9eBcUtT9w58Sb*Ay
z#J9}SCi9b9cGMx{J#UGW;C57gnjoK7`as=V?Pb>6CBp+orL#N(e<=_$JY`!73rC91
zQ_igaJU<+)l;d}uiU2fkDgp*d1<h}7zUBXtB|N*2XsWVg{PN~$^IbWM^5JBphNW*l
z-ZR>>FTdoOLG4>RGD1mS2(0JwvGp_Lpm1Nrh)#fFBot~JBO@M{Q14IAK*YYUk#(+p
z9$OYg7em^N!Ha^fDFL3f<nF*Uk4|Yzy-uD9)rh&PYE&Ll;^P6%9j=sKAM#w*8|Kz9
zD5H7qNU|=%{2-JeOX6Jy$Q_A(06TB0kB|YU@?;*owAByxP3@E3+nex8<h%PJ?G}vM
z-fNSL7{E<z8m?tpl9ecR%rEOZ)iG}Up{uZ3lyCXbB>yyH5s~mwgl7C11pA`Hz&p0y
zep*^&k;HajgtEq^jg|TUUyMTy-3%kfX}*!cvf)F7RQuMlrrXH|-s9UF4`G7^(W(%M
zja+AKt_-h?$sU~Fe>{2B+uOY)sd4c+)%C9fo%->vi}${K41a?^x;CdS-uGA==R+H2
zO34;3p4%TLk>CDY-FA;(LxcAR(CU;z^1xPtbF!<uq@bAzPg$T2-RFlD&K5A=A?=3E
z*XivD7TRQtr^4LB$;NQdi!C*tEw-R~H^)Y9nCH)<eg~LzeN0Gd0nmWpiTFAAw3=(k
zR})h6e2A?Bg&TJ=J8}sFitfheyGG+`GpB&D-{_{vlV>?fPf(liDOTGyGqDBHEs_>%
zeq6>p0}wC_Ph0F>DU!C%!OYzsp%U0=ex&P*rS6vc)6SoxVjz|qRp+^4=4%uNGrltc
zQZfHgEZZY0v&Z0XXV0B4&Ls{s-A!Mb<8T`Fu4#)^J#)k^?f$+yDGAV-h|P10eVvpl
zT383HCkdDjHxz&5mAW>4EsGLxM)9tRX2PXiBz&?LLkLY3b&v;s^}4q<ZclBs5Wc?3
zwOdBf3=2{lyO8b%r=2(lB|bI=I@hter)x5pvwEO1Ux^uL%%bB*do)d`7OEo)IEr&s
zzC|<CZ(h~_3r?W#tj0Rl)XOC@KD8-i$!v!g<UY)BIy+qdjS-QIKC71)MQhdd20o4Z
znqa%bULzx#_LfB>2DTf>eh}^H-PuS5je6<}9dv)NmMNB)uJq5>+Pb*Qg-}paiW(iX
zos$rd+D^{bH1OXIuBxX8&M^+Y^<3Zc2$}#&SN7UIej_0hls;!>F4j2pS;gE=j6*zT
zsWL8ZwCbDch^o)jAquCY3V^Lr$P!(b_Z+Lget$xpB(g^<aY=V#O&%`$M-=_L-3YvY
zVwpFlnusLc28_W{twQxUlbtYsbTB<gUXQJ;5?F7iJ!)-XPCM9IgE{+SmQyY*lGQuh
znOV$gw~;62i0{$lMYJ+0KYdSfJ4-8ZGB*1Gw?wlgx5tjlW4lDGb2JQro>An>s~D!A
z-x5_k{dVx|Fd17$8=OghuiIU}jq+tQPFRoj2{e=(Fv`*vt`B4~S|hrVsiH68kZA!1
zwJ0Vsl1K1~aFpj3tnTc5AsK0Ci0QgF8c1eH=<>9$PvIpiK+dhCAf)acAL9`*qrSsc
z>Ab}FzBT))4Pp$D=lpfUaJ&X`$~S0PU9GyX2qc<56;*h?s4FF9iy+ql<cq{TY0Z19
zM;k8-w@uAsn2Paz3O%<^6oWHZJo?VwT9w`Zr_!N6clGur|FDX{DC2@3iC%Z<hqrn4
z*4)bxK%`y1-aQZ-CFIa35!gaV`4%QRvydrhn_g?k-GfeVtsKGCmRzuN)8>^V!Uo@A
zoN^xAOmD%4q$>Wy7`fl^UXQiROKOzbnHc1;Na;V#+&bwnz!Zmrz6yU8_mv}amV0Yq
zZzia$z45KPcK;6_$%QSinfwA}`zhFnuya-V6yXZ1eWv3Gsv`FdOTT|R&5D0*O|$Xu
z4k_h3>E&Gfh*k60b?<8XSm@-{tAcjj_12NY$?M<W$2Wc#Ze*Z-=OCgxPIt{G{wd`I
zU8zHtcEjGkq}z>LDx9sJLR`3~Lc=!O9Q%|`x5A<Q_Mo{_*`*K6ulOI-{p*Y_WqLSh
zD&Gvl8sDgIPOYRivHJX@xilsG>uqV-<UVeZ-9-spN}4FTs;%3Gi8FfR*DOz9%5`oc
zyWM`mH9vQn<&VFa+~e!N5BgkXa=%;SgO=?%raYfEt!6MK-T@0b*tFBz%)KT^$Of@J
zwUR>snoGSuk%CISU4DlDCs{8?Bas~YHQYBt>h{$hC(_*?S~&FZOV)T;U&u`~8HlP(
zgbw{;KWKW|j(ylSU0HR9MD?FTdr%5;R=oL{`MKl0@4PCmTT7P-)&$yliq>O^Dh;Na
z!=+sZH}c<Gv$d88CuS)c>-Ire6kjMJ0G@$#R+CirOs#YD$}rfuec$A6q9i2qP-OV)
zd%Fy}wJ0zvokzGoBU7D?eTr!URO}f4)3vdUQQ6WWT_oMj{Xa}f6Na$_BnCYll5-o{
zWi_OY71K+)S${_85-_M*+PppEjsMGK5DajSY$cPCyG0l)*N>v)TLGMZ<1G2<VWT$8
zoo|_&T<Xh7i)q^JFhZW@%pki8`aem!bWW^kjB%8x`8~Wu+!4UQ20(hNO&_T}%JG8f
zY<?6v0CrBo#p}qs6k$(1KuaT2Jc#nXXxKBj7zaT%+#|ZcZsU!H;a9Ua<uxKQYu#iI
z50Fb=;1+iFa^<x2*<t|Qs5pS-w{3H14`WW&a~<GSX--J7ik$C4B@!d#bHu2hW57*@
zgr3f%Pf&Wq^c`XH<dLzNTR`Y(2d<YmPXY+vi{US;=FhJ#9ggICfaLvT!gjMDX;3OL
zf9!Da?BX-6Tdwv=ebqKfrWkI96o4C;Fqsb{%MD{oMW2b5lx2^y!xbSm+NIUr!^NIy
zY3WcMf@W8T)JA$A?O5|KU86@7f;xpLj|y(am(qbPB=>TVF`3Pjt0x>x>3E`P9_$4)
z#||`mI5Vgf;n!q|SZ$$>a*HA!Re3g0{Ix19sZ3C#5uh4`6Swy;)bB9~#M#J0V)M8N
zYLk)TSa_iQvD5S0`Ez2@JWJcG2u-b&;_W6cx+=(_Npa#(3Aa8brN~oKu10_SbD^;G
zv%1s4Zq*iquJiwH%xjR7lS1uHc2+MK%rPp%yk9#})8ARpqs9qB`RCH#lMwF4(0l=&
z#{Vq5jWgLL?tdNRQ#z|?KHk&!6=He!0oka|5M2bLa)y3C;qq}M=dIAvBO(_mTT03H
zJJ(ahE!iE%tVCMD{Oh}tEjdjl5A*wgkhAM@&4&|s!A1AGui@NgffQ^eB9$93)=#rw
z=DhD8MuJhyxM+pAxv4tiiO=OP?D5i0M&sPkx$91JIwrSc8ec9BHs-N{-)zeyYRs0Q
z)RQks_hyIXA@&FlU%1rHN^$1%Ux6ZANq;o<1DE^5dVsyPzsC80`C`jYCF*QU$<?S3
zn!ly*bPC6u|00=@)0%E%?0^>g5%q{)_40k$ve_0#v^PD0i<U*!VV6ja)hTT3OBwU8
z^neS~y6tJXD1+I{8}UW&sD{)XC`G7K6|F~-kj~Ha9#MvL#gJVarqoB{#BQ-zrlE-C
z5s7|{#HOp({Zrn}gBuYMyB5na`Ai@@#mm;Sf#oH%iSw;dN+k}uyGg^jhmI*4!5qc1
z9kTu+!gS2J2<2VXD=*)9z$f3?y5_|*``sLThyL=2!q%>T$|6OgE7><58R9W2X^Lg-
zGq?51dzA%j#u#Y#q!zTa%j-MNIDSuhIQN)lRHZzgVuyN;vR*f*phPDHui00ACLCK-
zt(HV+J1yOQ#84ti_Ng48dURwy^Uor=!57*L=f>}M`Zm=P8DE3EJ;jNeaBDzIy8hv<
zg08xWDOFGM`Q=Jddm4p^(b+<K4i+1h#^+1x-g0%nQ3}u1H_u;z5QXdPa<oqJmiCr1
zl<TU$;g3{c`3*H~X2S)+h%tLrB(%Pb3@JxYtP43d$77CUN@wR?#u-~*{Z%$6dEKwY
z%MxB%rt{AX%DZbblssQ_!CI8un@@as(<LKRxNCCWIMg~8`06SO{5EBziO*R!IJsoh
zxp1K*-j~U}G`dVSQSTeQzG1~CVlI%+Sfl5wXieCF6;>qYYK^Qsc+M^w?}tbN-}9O!
znSKBqq(GhH<sw=zc|F2?8;{M!-Ukc<r7CTVp*$M4W)m9<OsltBFgnw@26+1Mtc$Bn
zW@TxWBm;2uQ0X^zIZ(;_-;>YB*Q{80hf{ui8feo$SiTE7lA3+7mGnxc0sb{(qfFk+
zfpPk`x(1#W$*iH)nKehSOalm^zlvwp_BG4ClJ@D>rD*PZJ1*i8zkJx*V#BlWU2Ju}
zY;8m*<5wQ~(hC7>dGHQVQ&=#dj7?BbdhP*#X2knlEA31~S1$(~8Z@doHZ@%Wd0@Ss
z*xI`3#g9$Xa@fIP$O+c~GjgQG+1f~v{-|VFz;xm4q#LI^j}GpOYu88yHHBud%aVu!
z>ZxcX;gBOMEp{uA*yWEDBVw{e1&v=e4D9kEbX9grbHQWVY0VBHnuY(w9NMXkfwg-^
zntoz|e>u$h!%X<x?M*-04Y}_4kh@^zq3}a(8$w?D=kM|<xYV~MQ~ammJSL^~A}-<;
zAFJ?cH6YP$pR(#ErnFqE{#b2l!YcMjB{&h47e+9OpLX@55O|hBp=jwQHC$M%ld7eS
zFgKn%RW0V9mZBa`+&FBLnrw=Y_7ekIrEn^Rq&k@o#;*h`JUec|_u$i}#x!G&oMr|K
zG;e{kDP7)$E}P`J{`dV;7;lHKxnB>c(3Oj+nUT9)ah_-wLKR;eF{Qv1t(zm|-39Z-
zg#=eGhJG$b|Kf2o;!N9jmC}?GmFczGBZ)sj6}-*2t#FX$S0WS^7&`Z2&Dl@fic2Et
zD=2bW=3i<nPISu^o7ufFPLDPBJ#G>Af1=2Ak5Hk1=7%(2Aez3;wC)h=AG!{;6EKE*
zo(c+^j_J4EpXrdPd8;j)u2nntr66O(F{$BivYxg$Y<s$TaEYNMt+t>b+{Q}>kvmxN
zRW=}^1ll1woZHMUtS!*(kygR&Qug5Kz^vcj?n|lzeJ=BLpWTi${ZbS1F1x8IjTG6&
z(5>$h9a;QZ!KcN(j4ChfIfEx*i%eBwEq21_bk!yMjk9UuzK%Cax4#(Y&N1njxb5m-
zL<9t<mArXl=3^A(^}idX5a<l#Z*u<I02fPLX0s8QqD@s+gOeLOUg-rsLoFc>W6c<v
zQEb~0OVh3F%(W1<*%MC?xhQOqOFVwSfX8R-Y-sL#o|w`6?}~p)$;0PvA<cQ$|GRPT
z@S~DsVXQp(rl7|EtAnlg&gINs(OqM%>o%@jKg44>6he{x0zNGpnVfV1y!($KEsmB{
z)?_!Hr{+z6cNNes_Wc{(Hhbu#3Dc688$sswez@D#XZN+GZtLI468kxqbawR_GB<mn
zDHxyC`z=iN`zBVlR9_GJBzYdm-Ls1Bs{T>aWBht=eijwprkya)_<ZBvvorha1`<2v
zpVPVQ>POpYSL&wph9RP~mt9PpW6C}s<G}?Y7)cI|=&D7gJ>_t3;4Lv0uq~Ut71FoM
z+THbMih|{->2oW##cE20CFMYCd<O~3dE0T9$QQ2c_q+)ZB<CbaP&n%dVwH59k{PC;
zCa}dxiV&Y7SY++rY*(SYl|wuUY^3K?k!<DWJdASC&j-v-_&!MRhs$rG=8wr6@40`7
zGghjO*-wmDm3(_pksG29qRczwMQ+S#!TlK0eTAwoBUxmWBaYPcoCc;HR4OKg23bne
zANaT=JJtx*2#Z`Shk(whu4<mTYWY0#Gr5nts;mZ}ul67B`#(;+*DPMpNK%r5Y#r$s
zBDH?81lT9ZH2IL)Tny@yED-xn-!@P)buLcz2P>YF^Gqz_qg;Gi`Q@V_#0sn<aHJJg
z5>!c6TP%~DEeZKKML&S+G54C){`qL77a-a|OHRM@2T+1m3O?6<GIR3M$&pyM+^r+R
z0%qz}kj?f*B8oAYmo9pJcr;&O%kv@>$~6jO1*yf_1Ccii4>@G_;rW@&C|RUheVH*Q
z`mCiN93u^r*Zgnj4w0ehVkK*YdM34iR-CH|yP|MCTkUW4sWj$cfq{as?@aq@co2&<
zT09^CSk?GpxBm1!7~T?#!dZTWn1X2F5>$&B3uCYrDxlz~Oud#vF>CM0uPdwN^gN?s
zBSBL+w1vs^hb-3&r47yh(wgOWOBv1CaL<=ADlVPag~z`jvQizA-MSYU7sT5eP~{o6
z+NPlIfxBsX1jD@QS_!8Ol&Unj{)Ig+!N~X%3+_ht9Ep8q)4R|t&&i}a;TI)e@b)&=
z<^t{G2rSss;ucV=1u(P%Y%euNoR>pce*BEy$XA73EbQp=RxxOQ(=p2pfhfeZdc@8>
zA85#3XUVw#xftmxJD^&ZGt`2{8>BLt6-{qSgHw`%n^jzrNM?}si+&<!EqMM9V6@nn
z=)8?Ko*+ZkH?04Vn}YTDb26OOm2r7mV>7Rt7#=Y%R#{CIIc>0NB&gvmNfze6JEZ2c
z@@9;bQePPS0A;-e7xTOO)?C<Qaw8g<b#k>u;Dx7>?nEv>{zkWV`c3BUlEMo1b$h%y
z!bykpWIr@}lkzCbBLnNY=1{D2Lkk~Lq9NAimg9m?=dS%sd8EGrs1|NHwU#&E9pzLJ
z%i4l8X$Kg-S@I(FuEV7|7GHBzfYKnFv7_VEqjghXzZ)*N-LX95>)#W5;ixB7*)BkO
zQV{;Uq1~fSy@2Og$kh0fC;9<J?x%p3<$&sDoCjRP?uGR3h5bmudm${4nN;HXqMgLQ
zY&xRbfn1?%n~`&#N9b3rk5`YjJ~?pA^Yx+cw&Naurrpq)Il`OPC6Sm|_m1dU!k<4*
z_f_eRt6K(tw&u0TbcrB1K{JN`+<80J3m7p9j1iLNpLA&_Nf7_o_VjVHC}rJBKZNsz
zxoI%Ns@Zmf{#GrUN>Fs?`4Ks59AG6asXVaXqCHdYyk)})i0r5SV$#l~&>PF&+=(F)
z?u=bmu{wf9kcKX}kW`sdSDAm!cMKx5!C|S3#$Sb8Ja2IF&@31Cg@*@I6Y#a!5u;UA
z{WD>w1fQ{zTV+iWGz8yo);}76*oUxCl2eJFmtBJlQ6~HPK0<2QWv5#!UOeU<`Bp`M
zptf?4;!aAzpUP1ib~e@iCmpK~AS;oDDp6qelap>g`sq!z1#|3)IuFsvkW#7;`?C96
z2HO%5&kP{&E00qJN*yfakd^{Yd@;8IcL8&d*1eAw(b9%=bAO8yoy!S;3<fDfdFukb
zZwj3Gt`a7<*XE*3$(88Ci=0T9ZDRWR9e&iuHV0gts^K<0W6$Pz6FG2K{%|7BS=#^B
zxEGg*{;D*(j6Jx@lzCUr7gBODUm}cO9Dcj5tuIx3As6A!Ut~#6rgrp~jl{AWkC>!r
zuTqqia>v3&6Yk`-Z$~mcVR046HRbB-&F8CW+|5|}o@n4)qk|da>xb>e8u26o%>0<;
zeX|mLWw%zw-__fqnjAEj<A#$WgEIykc-zRg^3ROFo-xgrmR>;_Iq<yxTa>CvKpd-P
z`J7iqBOi{zPetH%2H17_&~SpLPI#HU(1w4*&735SeUffGoGOg`5zGE@13ldwxYG8&
z8_19S4~jsDXZEh-2Wa}j!5UsiaPn~8B=H+%VelKX)MQ3f71-BN-j~H7(A02gz3JP^
zT3d>Aaed|6pmbx#xiHr3cTw$(9{zmaeM5gwjq~o{voedh!5hBn^wrLMfQ0pbG@^lL
zhui6kE+rI(2EMAS4&!4ZKCSKGJ7#YddCPKI^-Yk19|dmWjs}C$mT?YD%!YO9@0dUB
zVwughlJR|7jbt7+Y!YhZ$=~!VizXK@ta|V3Uy>A3bBV-sA{p5oq%dwbCTq8C1sy>8
z^OSjRXQ`<hh+#H&GY~@$=@&SKwA@{2w-<$w{xSr8SG&d_$MNkL=VG-eo$R;mRfxpg
z>pTBBaUC8FibVW5UuJT|mlMpZdlDq>OO<qH)<{j;(eI>=z>5(IdS2<;P=i^78>40Z
z<t~~HW0>4A^146ugHNYWbcaysbH={Iwlh^T9RK1{!xy6qtAN2HZTaCZQGui&!D9jQ
z$1y&H;$fkni3F^4u(nxP7Do^G^NF;oM44GO_el?b+BQpbj^;<-A?B@FkH@y94)x5v
z_&Z=k?Zz8vSNHgQsf0Y^u0l;Cm!tGxE%yp;lbn<>@3LLrZL(V%?5pbh*z{5C$j|P5
zTXjPmVWpu)z~;|r4u-#^(%cBHG{|!_sPmN}SJS7TRcg8xmiO4sH!d@omB<gsj$z-a
zF6k>sSN)`JA2p?2ou65q$4eA18D|LrH<H+E2f?;#ME|{0{4DDl1*ifxDTQM2Nu2=P
z&MiX?J)%UJglhm)3!pG*$1;&`ZxRJ$+t<;?`mHri*fLB_+63-ZSPIe8;9{{#x7O^b
zdwv6kerHh7G3lf4KQXr{VKWuf(s)QF*4JlH+Wi)$<PF_pk@8dFX@*SZGM$ruUOJ+c
zuw;hvF{pkRK3jh~N$ry?WNonYZzk<v&Vn)uB&|sb5?)(4Z4xVBH&zA(kufbLVfir8
zsr6BhFU7KH8q@HcPugek;jx)>+PS0hx7j{ETs9_QL0cPWDYts_f<J;DjmMFp%~_<k
z>ue`lr{rts)h0*sAFBdR@BiKCyd*0Ud;bDvbxOa&ChyuQ#Tta=#{UY|9XoF~QVJ<0
zHz`SZ*~de5mHPY^`7+_mUNY#YJIiMxw?d&^6PotLOFZOXl+OLiVlW}_q$5FS^*=zU
zg>=;)cl^ATf&=h#MRV$(VF`EYXk}tXB_v{gy%Y)XZIJ;+Ox*IJYGVqc!sFqe+4yoE
z++)mO5e{1-+;P7=SVNNWQuz<yC<60_&MUO@N#`bijFM1^Ip%m!pt?z(lhH#oE%?c#
zltoPZucJI{@%ci2sv=rklP7Q&b@VnQq#*%F<WWwf^TnETD_E<w*2uiBL*!KZ_jyx6
z_5!3hN2&I1l8ZueBu+~EaV8?Z9sXR91HY%axCBSHzM0@kE~|WFm1eJ0Zcze_sId?)
z>ESm~?1#6G96bbCA~o`T1{=3T-iH#w%CVK#N{vpRiLqE^Uq2aRw)~aTpgXMFEutx5
zyIAe1AHKds07?~OEv9_Zk>a?G{6QT~(Q$V(xewl8%Ca95Ov4mtt=7U(8e3t<bK~Og
z+2ak+U7eK(DH^%oiHwQFFgK*20s5f0gq(j^g;usL=$9pAs$S-%TCq9iY4c&Ud1~g}
zmkV9cwUv8+pY-z5>FzK}a4E-a`4;%zweYHTt9l%uYrs?Sp~4Oz!sD#zbJsxcp6d8Z
zoFCbUr(-H2s+G5y%%A5Oc-rD3^j})(_5<JaQ6yd6?DFK#|D)(U+}Y6EFx=CYYK_(=
zN?UuY5sIo-t%Oi3_8v7tYHv=>nk5k`R!i(1B(|b<X^9mvTASL`=6w1716N$+_rCA*
z+_&<1zU!Nf{BMPXnbxMF#+EpBR_aQmD!hmTTwZ%pf%GMVx};feBybYnKu3u2?lBK>
zE+LHtT?)ZdXCofsUh@wK>FmWDX@v^+7zaLCG|P|0vlZ56wT7zQDTX=BvVO%U3;vu=
zf%9)PZ;wdD2bxp0gNgiz%&Knt?p{aS-bm{e-cg075HK-PC<(>l92+=-oK#daQArFP
zYgsdzU-%D{w{;J6wB0@(URycoj8z_qY3*45Smb`fs;jR@Rd4;4KTGP8r+iG0T*i2L
z(lZoaN~83R>c1Ne7`9fmH9p835)t?Ws%kk;vAvy~Q{W#__^j@zk6=E(06FlyF3H{~
z_j7v9`IpKNwx1z!N3+bYuPE{}vPf-ofdzeXt1?10ZRz$+(i-ghwoH;*x$y-+Ov8KJ
zBUIW&q25;f#Z-Rpxx=S#m8ZK&8zwW;;)8~27xUhr4)5u)bq2FCNY$pCX@e!3htZzB
zA@QgBOY0^?`2gMn?W3gVLXSn@UL?CBh#5{0sV4w@kl%uI77he8fr0D?28?vvW%HIT
zvQf_Y1Ar@G6X+?QC$YE!-JAEswtc#+qK(~el6FI&{9QX^c5d{XgSnf=%JZ|s<awIK
z4fGEXz`&oxo22Z9vnw-W49A^@)PfvG1t0mhWI?uLIr10vyL58Qx0}MkL%eoAJ^uoI
zLp4_{5c1;}`VBxcsvNw%dg%L$=@X3(7tELjuif%hKT?7m_b`k>3dxNP6#L`Oiibjv
zIJ(qe0G=j)%l~c+ABwd-T_P3&UgL9#Sxfl|&fhc#@_2L5BUv22&ffwYIST%E8fF&p
z5bXU39S`LVvqnaTTSw8f_6rj@m*+q6M*rMsvw3d1AZ&ARiUu}mUa$l6;XD*Pjbmfs
zqFj4SSw)WgXp@;yynJDqJ;|O^VI`>ga!Gh8%CUe!Fp*T%dzQiBm*mJ{=<{!fwztE!
z)V)Zj#Yr8xGyXZt%rifQo#TCz9)Sqs0+*=`Tg#H&==}kZ%q3bi<A7kaBproRsgiI?
z`ut$qHmc5Vpkq^$0-B{#*)uSI3m{HoQrkS{zQC?vr6re|KB3>_iO;JN6R7v8BIX#^
za2t6?^uGR>;<GXSt!z=?q;~*3=ab0lSuXt?I}^xH&T6|t)$XmH3|JZv`CNNnRlR6a
z^$%;D(c;ZJ5te<)yTuw#5YYBQNofg7f8<D(@;jY2nEto-sB)j!<<byo+$XRbPDw_z
zTE|=qTt}Q%|F!t<2D^eXBVn?y5EuE{%zS?KhiswY-G!#SNT0iHqvV5zN6FlLl2?9(
zNFQv%;OQ<6mI7_3<yH{%on&qg)%J;s;c6FMl9>tO=zzbXf2*D1cZkLs#(#YCk>!>T
zo7}(U#EQhOG*n?rs9$GME(D(ypx`qq=zCLDznWd9en71P`<4(B^DZT3JlILVTkYGS
zi%Nl6;F`DOiC8AcHaYIq6xFvXc=v+T)zBRc;Xc*PI$O;w5t3YpiGgpwy->IB$`Z~#
zV#_*8Z#=W{;r6WOapP!KVzBT-)ytN#3#Wk6;AYtz*%XV7=6IzRcH!prLA?ge#z9HM
z#tHq*{^C5V(=OBAKQfQ~<!-fGz-<<h<X3l;6{cIS0gjW`1&CO3jCI;kF-?h6BfFeX
z)9R8N)%u2%KTg*`R;}zUKahoLc7x^;a^a<);s?Cowt^;!{+G3tI7#yP4b#@l;F<n?
z5;AJB`0H0CW9X3+uWLN#<?c*f!HVHf;9Jn{O+MEI_skIq80vr+Kcj62J@z+Xh9EpP
z))f+07%cULhIG~v=ULvAnY>hg!^ci9`l2aAOVO!Lq9KPE<@)j`eAuqi_O^F~tzI;|
zOp}2t9OvdSJT&K9P;72~hQA-q{-I3mzQJAT?tM-w(nJ)O_^Ty3(N%s&rm~O1R-<hv
z3yG_(bxH;RLA|O8(umFgf9fm9A<5ruKy5B^@N2J(C}MJJ%c**xa=pJ-i>>lu5gSVr
z%$7{z^Dpb;L@iPD71mZ6GlqN9IyV6eoRww3+kI!IB{Ti_c*iHWldXM(sRH!o?JA>t
z<`_@J7)%@^nOMMSh6i`9-NHj`0u$uTm#<ARIb%$-4klD;=pN;65toF3dUp!bI?jZ_
ziIw=kpMB6R%j?F$4x7(o2+cjYmR?ukIq3k+l8@pk;CBB34ADS~BFqvQo6t=GML6aF
z<HE8m;A<3v=UmzB9wu#biCG0fot}R+Hzs~wIL-7hSYoO2G9Z%+=iN$lR$#2uU5>0>
z5-*Y!P)t-m)MP!WFbe(dHIUq%{ozaS#cIu=QyOgD<im8++uBKX4P6rhdh?3B1`}&W
zX9R4*vt7(#iZ}HoSgvr7T<xCJ<4buACKy!~o({YaP;+zq7LRspYF>c7+XK&o`E-V$
zb1jq4oZn5{{i+I{ZJ(BxH6k$=iO|zA%4G1mtk*MW$JYWn;edw|sDDrzr--KE&@9p7
zP{0#*mcj4gRFji5E=b?7`ysh3YIAzyA6oz2#)+8`%M79E>WJzgg!ZhDb4OS22yGpR
zuQG=R(5(RG)Z7NV$n}cUbt-`Bd9gq?<VcBwd&zOY{G%kjp^lS20i%P#sK=X#&=9-X
zoG0+km(M*ou3j{iyc)nHHA(1*d;4U(rK|2ySNJkYVg2LR@sHB=E8(L<r95&M&fvcr
z))^%Wg-0FmJmttie9O>C5aI2rM87P%vmDmy-DFHeTb)>HH9qlPXK<rs`hax^#pAAO
zQ7jlb_1+Y<MmZXS!02wgr<h;WH1YqAi^E!-L>^F@NtHWrT}z7*F^(kI@HC<Yo-ldi
zj%(Kefa2L80*JMemQziUu=#36ecU6>bMzhe5<gSZ{J;{+$>@)V;V1*jzebR6MO6v*
zK2ESWV=*k9c`u4Sd%$J^#(|}I`a!&r1RQ1ZJ+GC`IvRlFi=MWB=~sr0{blFm+4t-)
zWZd{DC3>2SxOP4NbZEw1@NDp?-&6N12Y0^NKwM40;|dNY-H?{AZ8v{A$C0h?<oYq`
zAL(94EIHVxO8aZ@>vSD8U!-8TMLX5U_L~&`1F03kze5>(?)@T~m=B?j*r=sN-}?`(
zB;D$GXhY<&RkPhqr>?WQ{Oxn2`k8EeGharmSUk1Wsq_18hvAY6&XgBY&9}EE&pNSA
zaPg9_?!C9&w*2~?Illh;I@^GQPU^<)-|tx9WG{=l&|n1$l|NpP%jmtE_h?Y@AXr;K
z&<Lp{3yTQ!wg_@E^zZMIiVrh5IbWQ>p8|#KF6avG1}TK4=w&d42CrBcb!<5?xTF8!
zluYX1$;o^4Lo$6Qy8Yhdz?qq=H0njK`>>Xzp-^Lv4BfuJ)-Bi2j*+tw6Zl!AL=c;=
z191EUbbXhkpszFdMi27XR_q+M1OSjdq`Ar=9Q*mp=Q1{>oE9hHAsk!qhh(6&dWu^b
z=G*PRu1gev3!{8@K3Hb7*K>o};m#qWNQg#|qe?L7$mM+>e2PYtoL6kTkUL-=-&Eaz
zNf*hv^|6CHYWO=EQn@i9!;u7k_}PeTR_`Yf962xDX<Vq%-xoj}h-D%}k9jAgpC&?}
z_up-;#8f1m{RJ<O_u6z}r~WEAl{AH)5BQjON&Nd}k{sCqr!v-tWk+|O;hOkeIyhv)
zES@m^3ko1>Hch7oA&P8n*%?Jf+oo<z;*I%E>nRM6UVgB;mW<ao{*0MV+}0X*EU)8i
z9QH}A;Fq*z)neyqg2NBn8K6!4b}a#yfFmNk!>ziS3#Tmo{)QpJoMZ<qT_3ws$-6(D
z`svN3Ug6YrnK@he8<&h@O>zQB>u%(5ke6wT&=(P_dOf#X-H@DpjEiUp%?fw8@GF)V
zd0Z!-K!wgD#eR0$j&(M~D@hC%dI`Wy&_+6tS9opwu0j<xPY54!B*~pKnT}1QC^I2c
zZl!)Vx*TUKH@KBDuZ}9v=lni-<Wgv;^wfQ}@;X-%9-kjJ$NJd)i<^J<lC+NKQ`BRG
z)L)%~v+6z^$a9dX>4|{iO!D8gUYDtdbUe0C#x#;Sk6Y#6(ti5+jnPSQAF_Kg>_P(S
zI<@?}0}Kn-p17$?i=H#=hr71zxgboOoA3Or4Y7s+)`!}^d-O__EX>hNNeH^DnIr&H
zG~yS+Vc$Km2+iSt9{9=b|Fvds-bz>MJxeUsdxNSE4>4bgVGh(F{C7i_Jm+;V{ddDK
z#P-zxw-UFdNw0yU&^q_Sa-^qDoI>a7rS4uw;w8^M9sYOTqerOyNB1Q%o^5ho<#C;1
z2(NwX23;qL+T>Of1`Ru&oX8T93COlNd5JBZN!2J!{ivTrNLD3vZ;UFYhx69PbWeb^
z;G}~CoAF#L7OSk3E?o$#BU{BxsnuKpKd#2lYxxp5uD{)Os#HI<K5SQZt*09sFe|(5
zV&%FI0ueK1QQX{>gXTMmHW?sBckUQTmwT4w+1Ne#ASSbyLaQ}prfg)@Q!+f@Tf5xw
zpwXzZSO2Wtls`W4hjtD=k~jCWenW#Y4qyh1Z{b|3cgBY{c9qirLbH42>U;-}FIF=U
z@YJ!>qtc$!A=+g(3Mj^idw=bAtxIMMBjn+g8DbN^cGQb<AlMh_n_NR>cD%PEyE;Ki
zJc9kMv3RyZGoWyp^u34~&4r;%DEjI0FEk5gHt=ET0lq-jj8-;iaxP);JipqVoxR7e
zaQ<L9oxOapSCxV>Xf1pQjjb<GyPU7qEfmiiP8~y@-Q~5$vq8<NjU2YLw8V6}AvP!x
zO{=z|Bccc?*{?^7MVaPX<e@rH4Cma6Imon1Mc0kD9EOB?>!b@-w%r9FdEAf3{k}gJ
zrnAt{76U1^dn9&{Wwd%-I75eiI>D7+iuNv2TAgG*yO#d%VFOb}z`sX{Doqt=t5<e%
z8rhL_MRB~<-d^pQU4GlH@#Cm<0jfLsX2|jA@GOU_tyy!I8Ag9SFzmF8V3_FC_~ErD
zEPZm-=tlr^w2$#;w!V^n+H$-76Ef9U0SDq#Mtu&jFoP`TpW>HGZwXFtWp%)dLpTh#
z*-cv>%=0w}1j}_?*`s0x;3>Tf3U7x}%}R_rIudyg^%Z3E2{qi*3XOfizgh`2aO`<v
znW&8mvU6d9s(u37zE%G>OijGJ;dD^si?K*e8T5704$t(!8WkG<nr+<2HNB|Zpuwq9
zQE=tI8?#jvu9({V`fZJkSPOlLrUqhu*PO%6&D@iZtWb$gTl1Iy?5_0ri4)GA%Fw5{
zhn$DySxoTbuW4OcgwrTf*Yq;u@s1YhiV`|gY(}t??JTffAdqTqjkl{p9Gbk!J#L0m
zD0<2z;<_YKPmpFhL<=RBwn_0zw#=T}x<3!oNuOt1PfklCO?3B4h++M53i|e~?@M2e
zWGg-Y=@B0Cy#U0Uc%I^ieqLQ}s3o{hl4MlEBy#1b;;!mEC&))17GnUl|A2v0%F$+N
ziZRy?-8nzj&6g(A_#>=fF*0s!r%k-a`0wzud7KaQlgGF6*A2^(aYp>RWbuqNgxA<w
zuv3v%$YL3-#ajW<SIKwTn`DDiqQ6A>$~g_M=2TD{<rg-qODZ7~5e>G`940r>V*Bk#
zEzK!rL!*~3j|w``!|@ZTm}bsCH$O@6@Dyz00VPOlEs;z4A3RbFZ>Z>bvAS<|I}_%<
zySPgMa+vu1gkN!>AouFfMF6RLpHI_cb76|_Hwt?1@wb=GbK-S(&zx$P@7=qZCqe!w
z=jyCk^tg`ajM{2V`)33RI80jT+0*db1lX7NzK?*xlctWwwM%0$#UZ5sZrqHg{3cA!
zOLj6s_neV-6Qg~JlYPI!44g7W7J_;dWph^2RO79_<Cu04aEAt@IvLS%;<+`Zm7m{>
z2z@bYMKlO{_&hy<Q!ebUDX=HmTvF^6CI8pp#qH++kUPq%<z{3!%*yZWo}Q25<C7j^
zr}_O_aH<d_c_Q?{XtZR;NCQGNL@#8c-OYe(^LJiJ4AnO!Z3k8~D{#MevzrQXli6BN
z7t3gQj7Rx*Lp|sr%Z)1fvogDL0~4kb8U@v<DoHK27(JtWjJjk{8#qcsSMe(OY+>xE
zc(XO!JA(}6E<Ad(w%I|#1(3-%jN&D|x2UXFxWbzLwkfM~#bd%AUA?|CnEda?8=w70
zkIoJ}E!MFiy^uC+Ys6RAZPT=8vLqnVN0YS|s-5B1Dp8Yf-20X?QffUZ7`8kw8fUc;
zV0v?R#GDswwR0-#n)H@ErN)w+DujP!XnMZoobh)kALCZN8}m)6PwLgoYy;wDQ)+&o
z+~6NaV)ad_6;mwMeTh<K`#ocz-p#{y8bQjMW`NYzJS4SkkLBK+mEnyRsF?#hbJk5c
zn8a=Hyk=^Aptsm4hXg_nr$pb_*+NY%l3k)m?Q!4~qh#7Mqx%QpPXG4XUpklb#WwZ9
z_!nZk_eXnzP*}m!T(mwv*mWy*^=ND$vw>InW<ld`XDsc?U{rdG6HNW8sq5=gET41L
z*#q&5<xGZl3F#trYZyM2|KvN~!Ku~p^Bs><vAe?iruRNSg_R6hVlN|X!_FckNg>sb
z@7eahN;R}~eA#~Lqq>3}trlZO{I)reAl$rPrNkiXnBM7CCq{YdMxk!a`f<7Ph>nGY
z^;aj3e`2PlgV(7hoR@Ay*Uu1^gU0xPG!q#u-Cqa49R&61Bw^6}aV{iCHZhj}*woXM
z3NdcVR=c(l11Ls%1^m>ZkMiv|zxl^?P0nKGX}aqOdP&6DdA9z)8xIE~Cy-%xGla@N
z>ILV$7IrZyw0ZCcsOYzra6Jv|YDI?;-@f9);1={s8-CN+vz~Ax>JjXB^aq3+%H0=W
zg?qGq4c9#(ZA#!2uu{4G`jWU*{O5qABArZHCYJ3>+%P`)Auu{6skZcdVg?AxKI5%c
zXD(WfZ){WGo`I4kZ0DFs0}cAlDCW0Qf6(C`el8-0x8oI3k5aaWuR`m<)rXgz>6QDb
zW}j4Gyfk?vY2GR{5PnIof7JhR)wLjchGh(-geo83lzXv8VBIl{8}Q?ZD=0Qt%2NED
z<*Lm)fBsWlnozuw2Ac}sMf=RSNADlE_ZdTSOLZISJ#wYxfr2uV>Wk+|YOS0Hs8-q;
zT(2H?BEkqjDCR{E-foq4OXKrKyni)2jJc4>N1xpl5$k+JqW1KOAXq;dNutMEO7aOD
z^gbsi@`_wQM#?*&o(ceOZqWxxxw3;$FO*S4&r|H*peW~ar_zqEWbti*8ryk6?MSw8
zoq8Vcm=hCQj~K^OUHSD%F&TG`{=^*dWkj8uwP2sHc-1FQ&iPeagQ4kszM#s>(uQM_
zu#uMST|w;9!3-aV^#XH{O`Xky#LPh+PV@V3nGT%_Gv3GQ#gBvTC6U+j0rNEVmF$+T
z^@?##@%=sTx!+KIeN1lkgfO=;xuK|Z3dl>k*D`3GR8g}6SNo0IO4-cM&)BISDCq${
zi+%2bVjV%{6}QiwT|Y`-ZYij@|AZRSif`1uWX^;%j+X7?7!u)Nj~!AwoJk&B(R}pZ
zjbm0W*(JWb#^JZi50cNTe`Iq{81RLnx(i>Xwi7X=A*PI143aV#vvXx+P^}Sc603J7
z7Y-Z3i3pxt-#wjL?AX{%Afxy$A1(3y{IhP;@6ngaSqI@?&(bT`c=YXs;u2ZU*<Rgy
z>2GFf)nNr&lbP2)MC*Znis9bH<c|90%qweS(1Aa#oJ%;+u@l4F3DC_Ny6t_*vXgSN
z*+XP+w7~S>ipZPip6!62wx8w;?W*m8dv_b6T(m|XKjPSK2az7mf7jn6zP3*oICd%{
z1V0th2Fn+}Wi@BD+$uq}I8{Bo`|TCApwcXIJvS}_G#?ePZPM`Gb^9}jqQ{^cIsaNt
zJ>z!Hi+}c`{q7!X-YL`M!M81?uakUvovQ=ojCF{a{69VMdh8rV<JA=IKa1uEA^+VF
zavOBo_cJ#H8zEvjf|a!gt*A5l=g>t=c>H#fRWom04BUWAmQEr&8~U@*Os1eL-JIih
z;8)BAP&s34h{US*lr#hMFXO}ooGj3dF||0lP@c)x-Pu+scjLFmKvi1!b`eHMDVJtd
zbyg8xfq&~{cH++^-ar&QDUaQ<M^I1onr&*F6MX~Hf|q)FctHAWy?oD69d)q+yfPb6
z_S~Y5xN%}KR;FxJr`R^pnJ8CGZJAc!@rm0t=3*P~*2fIv3Es0FeC&3XWfUCxE~`xx
zb?oMVU`Qs~DH3@Xu}Lh(<vRF}-poPB0)4HADi)h<)d%Eydb#vNT;`iNF)jZeN(Tgt
z&|qVB^L(`CR$gZEh5v{bqFrUoKQdG8Wn4@78!sob;sFOiyIx#cLfi0AC!b$Z{o-Yo
zreQr@8K=(F8edQV6@M*vA(a8vpwUrZCn>blGtem;bnC9-&47!ucOUUI`p1{ksSc9k
z@)(4lOsbm8)QilG+jovgym_`RMN0`(qHUf{6x+ge?PS30C1WgJu>OTrRk1sA_`-IR
zf9^4v9Vt9G*Po$b-|IBN`xB&8j4cp4R|sNB&=~Q7ClE(l+ox~aJIH6qkv3ulgbak!
zi4hP@%Ht;4HLdtYyjpDZ=*uW3KVors6;m-PAAHbzTf9*0Qqf9od$ibR-!XMn(6gpB
z_!ss-?}wc_{YF$9ha$(71n=C830uo0nucxj-iiD$lvbudSY3c=l$j;^Sy=m1J@;kn
z22(xcA)jy}xVcK&(v%M(^;3D)QZWd;i|j6{5c|yAGx-X6DKCTZ3CXIra1>%8_evPW
zmH9L9q9x{!(=!LATN{h&Mnk7tb0y+V-I*sN9SRF?1+4PXTs|zOVAVj-sD=ZX+leB7
zZj(9vFB<oDcAZuh$IMttmy3%~n@o>RdnZiQ_`AeQHT1-VU!#J(j3D(U%^!YR`*thc
z47ShjoMIQhkQX41DpPa`8gQ2fNh0+9>~v;Q7>i0R^NQSn+G8LZ75y3GLN}TM_YNKq
zPgua`;pa7Y=uh;U_JC@6OcL*WjFiq$7<$`O(qCQJBE!04K^SxIf>ROUIFR&f9h)Fq
z4aEd{WuyZfm9rYHU!3ux2MV*_i+DT^_3J5PY0O8vit4p{u4$*asZe0d+i-&$=1k?H
zUx*+1#uib{vSU}B_Yg+bD*-*esx>AlDf%c763XmnDE<?*y>>iUqhy*O9@Rp3R=DQ^
z^pgSd)0;lRw`-5vVY9@anoqP%SV00L*rcpZ8vI;(dGRW+^?11svO`S-n3OfCMTD_x
zn)aTdE3sWr$wZtzm!f>Taji8UY+xgKXN6tBRHZg`^s!GCtl`QVT9t{}y~*KO^vXAo
z@7LInASXBotUT14ki?A^l@BC|ZjC{<D#hD)S>1YMJVchu*qF@xcM&5aqjRkJuf2pX
ze<8H>6+()&ih$jCZ=j!tVz)?|4#v0(N*~H;B>D!iBH@J_US9yH`6|?&dP#38Oocw0
z*w5|I*mzV%_TIg)K94=e7aq|sok0FB9Rw)knErJv^SElO+fbK`Vfom7r7<4VVMKNz
zT9P+-TFZwW5zsv>u?lOiN(^pUb^XA+jiad$RfPZRj0%TkF}W?>bpTnr*HBY;v^y@V
zn6o(S2{9g%qyzaQbfznAIB^(9Pz3vZpN{?8Q*ZpTA(Eft`KtNj?Erf5Q)~4UcjOr_
z>|Zhe??k}K;2!UST&(Hiw#LCd)!j9*<|hG5Qgg6KlGv}r#y3Zlm&^Y?f{CSA6C?&`
zmA)f!FB_r>kD=IR8@7A)!B#9{dP%e9Qcj7(sUnDez2qIp&uDQd|JS6*PKpc!x5NCj
zB`rX4d*R7eSKfX!ma$wquF0$NK~CWO(bDjoRDj@V&efZ%18>#X>qS~v#z$VLfN{}(
zH&`rEIo|9D{6Xd4;ifztQgPU6O5s0v5VCSAcB|Lv19=M}B6rXoaPJ>dBFBSP054}Y
zWyz#tK2g{SR>GR?(HZ$F`hDY?1Om8C?$t1PJ#Hbt)H2^S2T0p4h-i<P0F5I3-xq{6
zJ%3<L`8L48@p$kO^|bA@j~_uv{^i=tQ4}fgj$l4K;kZAr{W?#mr?|VIVGcK_U{;Lr
zz2mm@cPNJk*kfkP2of1*CC=1=MVv4b{D4^Ph8~_L!4yk^eD7VScpak9y3hjBTe=#i
z!iP5Y<d<G*2w^GwbU?7e<yQPg<oHj@DxRj7#)(2#r6whKzhQzfjPJHG@boFD*>OO2
zhVj1}SXFj85xFIHKEJMf>=6xSAhAj`(!2fA9=t8W3efsB|G^h>f31x5{MW(5GXun3
ztN|DyE|rgKe+r%qX4<ta)>3h$U9zyduouuuwFE)pbCT(%BJ_|yoII|l{%wMeRw@>s
zjkrQApOF8kxciC)krO4C6TuxYZ~n`g>uQe}rDtHh%OQbr5BJCrF(z~ntNM_0z?HP5
zzU$8t;HQdcb$xqw)d@(oTs*>XMw)D^Xy!;PhHXLkfL5nsZ^mZHPnmj(1b6olu|D3$
z`5SLVVj_eQstXiapTW2R6$<PZ|Laz}vXZeO75b=&=~R1ToU7r_rg{2TY%gW*`nIqA
zi8SW#8I~AOdZr$X)9T+&Sh~31B>w6h`iL}r*{7^+ROnU%m;D#{Bvx<~cM-h82YEAV
zx9z-sQ$6O1J&$l73vGEDl1&Pzxv=rozwbUZpW&FwV0^fQUDZJu-_Gmt6a@!SOG2+6
z{tNV*tJ%M9j^HH{Lz=<Md*hLW_Is#CGLhbZQvvgrZE|XOem2wP%|Z+qdd{FGd%T~u
z;*(j-8Z2X1F00zO^n)P%A-ZK?%qvY_kABDaY)M=a;q)|pBsg1N^Mp9O=YCDQsLPk?
z*b07~u)vS#G*38Tn-b*x(!t`tk9%%}xmT4$)ob->=c9;8JwfWQAjp1`mtogL_~VZI
z>3UFVWZBaOhK$$?yLK9NLc^E*h4d6x0^9sN#0W<$>X1P#eB6zAHA$1`06S<bs;0?>
zQrl@Z!$_nMyHTIg8yaM1B^;@xioEJN>gVXNuS0;YRkrN8Wy<|8?4ff27i`t2&3&)^
zc!-&kx!%{D`jHj-uuMlO&pm1L-#R3FlOjhAnV5R@uL$=M8IpZqzTE@|Y-_@1!uC0x
z@F0(QKl94Tu$GCWJKJ3l!gPdaB>EXu55$V8UF})v0d)qch6z3o?ebLTZyJNlPugna
zLel?fTqyRP9X47QdLE@UBS%lJj%M16Vv_{#G3W!;>CXB5?y~J_>9{zQN{)JzBruVy
z>=qu;;JD9mLR#mGQ>^3?>%fe#n6h}%r453k8_JA<9xv`s3pf#KIP`uf+78vZMy1+v
z6pWC5n@G(I=DcoSZk(N|(+3IL1NNkD9wkOf_q|wzsENET`aP&dT#(p=B#Wmg@X$(%
z%4;O184RPn42@jVRGhtyuHgv1qCfmW%bgR|E`j(no5{Nd#Cot~X&S7Q#H`ETa`>8-
zx5E7%sc%Z8A>-h=x&O<riBJwzQWlBJxK22pk8cc$%pi059jbH=<PzUkN*J&1`H>OZ
z_8%jB?`XdSLc|uU^Fl1;Ui;L*PaIPCSoqh^20V3o);RyWF|Da~Q?Jaj#a<wrmESJu
z+k!WpOiDvGGh1!7792PNv5G_lh+ipuE>S%9)JpD~JFbg0W3U64E*s;#QSMKymU7e5
zn^=f_u)c@JuiZskpL{eJ`}V1KM?F{W`@T!@)%(8m@4N+!Q&=LJ;Bc_P%wnAtZlF7P
zOuF-M0ttS0SBt={2^~|~g-x-~*yTxhbCIc+k}A9HjK2B4%mW$xmWrz3T0H3EA-i0;
z3x-XVP>nIX9_hGKZ9<L1!u(7@T@T2c8I&AM=_p*(sn7oPW|o6+u3~5%$f$PST0IQb
zoXSIKaBo0=kn&Vx#zco%$Xs58@$5GMvB<d|MPP86++6sRrG=D(%U!DYoFmpVN{d6g
zY7<UiXuYe!_Y7u!{zu#GkKWJVjDs-M+7VW#B^azcm_)REwJFE5y&xU(2>+TQ5%z`l
zQzb{0`+qmaC6j7mO2JDnQ60eacvsY@;*fh@(C=aOT=tR%UQ^apD1wsQ#Uv!N7iAgx
zyl3B()9lTY_jD1tGk7o0o(e$TsY}Uw)#u#nXdX;Ms4_QhSrbnB;^DXA2PFwYANAGx
z$8xHHdnwJnzxFPTyEvaMfaUJf2aLEIv?n&M{*Za?TLOI;!X6j~1~KYG597<2N&)l)
z3H7Ok2B(^7!k+c&GlSd^TMIHwtj2DlUub?8C&D)%r)g)3rJYL_=5NF!%9hIXj|MK5
z>&|YUvlSAY;l7clTw*k3>K`7VvmP4pAp?wFOQ>cB@1qM1x2k1qiVXu>4Gj!?8i;oK
z#S;GZB*60e#J-_x!Y23Fb(Ox(i?P8j;e2x7V!kDyD@&E6-KRUz_Pix{$>k%|qIWO_
zk2^Cn=hG7XroJRLQH*m@^hmOHyv4S^(cG!nDG=b4AWTI$LcS;(kuf&{VPG;-fIfVv
zhaV>l#qx&28fgrAp3D!~35Xe&Dmt|=%^TefqLproZxylx5bTbAfsQ-^7i2P$Zu>mp
z))Vjhc(r^w39(ie(`(CG!+}&CD(4b7tXR>COOI&Mz16<*g0&-K50(r!06`*w35-aX
zJ}V%QsAu=3Nuc2nWnSw(K}+|YJbsJ+T@W#6{-v+zCjp!Aiw<~w_eqQITJLk%0_EW-
z0rG82BjWfSnAi?PKVXtu=#xXcj*e>7ePIXq%bk$!X^xcwfNF_tKy^13cXCn*`2o6U
z8qJ5S&>yvD)QK<_mk1ealFnEC`$Fv0_oH^~cjW|x@?oCF#x*iwMfxy&GyG3E`51E=
zhFZff?FC@4NIv}8TJ!p4Vpi6QSoH6t7(EduQTi4umY#^@9hOWt-UV2<4`n-1z#)~4
z5jrd|`~S(5qNB>Jo7-4A_bZUO$9|&revu8RL;iV1bS&8&=*-%%8a$vE>t3g52cCar
zO6M)_SYYW*dSWV&`;fzZxdK(Wn;j)(#^YZ>#su$XUdG8S+~f=N3gmG-P=BY}NqJwi
z<k4YByQY-uk@`E2{%I+?X~*UF!ECK5*02wQPOzj@X<IVMhWOS7Jg?#rY&Z7g+4t7`
zKv`|k447zS*p4-K()=FGKjq|W`YrDbWPjVu-N-X);P0@T6{xPQpYrrISenYzs-}Sy
z#?)`w;!$D#Zvs22EI!@8`gSq4G?IH;r`46fCxcV*{e1R<=&csYvFfvOY3ikYI|c6~
z+sr7n9jgj%tFixFvqqio)cOM4bpd6ejk((Qqr`7VKDcgr>e`7Uc9DPF#M=_xk+)vJ
z1(p*FJu@jIb5FOFDLdM4r^>dn!MP)1GelrXzHuftN1R#kXI4}tKJx{5E^obr+-UVa
z!+~DaNGR#;cNf0IEVvjj&79qh$5nhglnh$vscb-cme=`o0DI3$QMLGkVR^IB5NVC8
z4%HiTg)=|cKM+o+kpySq+cDgx?WEvEf}4aWVW=vJLF&Dt7EGS>hr~X!&REqD`D%iX
z{+ef~<5?j=NYu;Ok<CP1W!DD^)-mP<=t$fm@NhdaKiIX_9`c38bz(B5H2V&PEOS~`
z?o;&$h*B|D9xIVkU;8HA146ii`;b&03v&&ia0PjTvn_eFzNomRyn3oQs2&KGS|)Qz
z9W)Hx&5_!9>(`mK8kD)E*f#EhAiPH8wKOnvtI(qE-!WI5nF*mUd(6dgnU7DR!N$<%
zn}Z6F95)@8<gqA~8U4a<-uvQHOL$(DzK?A?n;BbOW(&hy*3t}?{8R+^l=JpVa)aN@
z=#~kEdyC8J%)LSQ3>6Bt)NhmSu;j%083Mvd=(M?y$&xKYhB|UKecB+Il2O*T*Y8$_
zxC^IWQjJ6vH(g%UL0X?Q2Txb9jXt;&R0p&O1KE|_6)DZOZ#YzQ@-t2-@O`XS^vY=p
zM=TL`RWE(^>!x_gW~lkRxJ)K`WzTe+i?XYAcTFlSvp9+VDLZ)SwM+17B`^Apo;Bi&
z%mM^uJU1+ZC_%Xt-G^8zOZ~~ge;4SP0kXt&^=4A+w~Hf-bZVc7i+J6mhMh0}jnYm2
z$5rWHx18BAl(f)fVnigM0v=KCqUd*|4=*=+|2hB|aM!e9ih&@+F^HEP&?uAI76+mD
z+q93%*3Rt;0cSuM5?c8tfh@7k!)e`%Ew|gSFUe8#Br%N}u|otRgG@$6+WIb*AmHh0
z8Mj!wk!`1{vdCH>$P(!K4}Ns;eRBX6B2?e9<rzPgSQ_jiBN_vlV?kvt{pxA^rh1Q$
zwcF!VPw7iL(5jEMb~lEM5X#c!e=^|g-gngV-wjWlO6+7A!1C$<GC_>afXP}e7W6Qu
zgNo9Jz#qt<*UBZFh#1lew1H9H*T<cF%S*TAQydR{NHU7Dbg+9k$fvm7PJMsIaUe}^
z!yMbQCUeQ{GdJxT{dg|-skykW_UH|GGWB8|B!CX&Tn(9>+`OE0Y#ID%SrMfS)Ox?h
zPi%+Qux|3oywD~<V8fUM0)P4oWHQOg2McYDp6ou$TEIDd>J@8I#oh%ZY~|7zNd`%&
zindR*&Ff9RvK(V?&==D6kTmHMCC1UE%F*=!GhQEP$GO~gS!-KFR{+YP0)^BPp>^U@
zzlKh11Pk6E{NcS8A>{AQ!IWEbx+^iFGu@vBUx^WQ0K-bZMmY8?rHYIE+xDE{A7^zC
z#!8we^Cv^w-RvGer5blJm1t>MWNlD?eEx>n^44mpXy#_BL4X-N6gRXy?llk%DfK$x
zSfXxlHVdh-(98&O`+r89#XP_U?%y!Q^;E0KEvbU3pHO7$OFCO^EG%*oC%|GMOmSf@
zz1ik*Qu$G)QpVX_!$#;h%|yFgy#e*kai76Du#2HbHWDszEK`r>@JXaCL-OMtF!RTn
zqSfyg{OXM2TTv5!R=L!3%;z?h<+P%oH@mM|UNz3@3`I3KRn;Wlbj1}|MrI!pv>p>K
z-uR5W+Kia*_ni#uo60d2=J(fCdLZ+rZkO|0%z6kq68OkNv{Yq4XY+iLBkazhzh&NK
z9?Zc0kv9hTEUK8w=uNe9h^Oe2Z<TF{B{DUl`G3i|#4-W}v*4(H)NsL_c;j|&NIA@^
zWI{p(5zBoXn!S$7ujdv1=hoq)GcPBDB^+?ig0I|5Mk&)hG0S)~S1PMNMO|tV<F|k_
zEG%$$`k6Y>iu-Hzd2NBebnAOT%xEDYJ5_ML>$wC0*wHz5`kTBM7cTdA?GQKoByo$$
z2+1boa(UepoJ%VpjHzVh!tf4)U@Ea;SF?sLA%K9g*GhURYCLQ0(~D>;nxa2Xf}9>x
zj7=R42>AH4$D`_sHLW&CGU>p4&k6v*XOplgo^W!QK<0iJq_a;7l<Dd#6oNk<41kRI
z4VN@$3fSlqsfolZY?3=V!Fz~>JmuDx`#pE$aQC?jtl1QEiH1AQ|6SwDYtdXUtzhz%
z8Na>Y3Lf@cLbLr2;Zpu^>Ml3FL1O&$ZRt;T;lr6{KM3WX&t7Q`>C?Ight_tjk67t5
zNa{<G++;gjrw3#@snXA@BTk0}G^GP*ms$gVa}M@WZiMT3rrpxXm3S+?)9D{^9^`Le
zyPH;?A6#Hz8YIdUx<A+rG)i~Bu}<e_kUitSV^@0ji-dMt^wnOA_07L(-j7p@YCYjM
zYZAY%n#T%5MSNU$`D9dqv^x$}I|(8@z7Cdy<qILDO37^Jq~!7)Ir7X`8^2{gY+{Nd
zUDX+Ws;{QF!&Ui0DH~Y;8C#^WUJU#-Z2VrU_Foh0``#HNGBck$D3FyYHNWM%6YMgy
zeNyMI|Mt2{>1q;m^G^sGp-E@oTc1L)Ne`G&1>Ij4H*0(X{SxWtCdt{d7`Fo<e#2oY
z@U+M5m-k?z(T3Ks->_J@z@FXh9PsTH4#!WwIf4sn@6J&6lOhBC-b%hIw$YDt@5psG
zR!3~3xMZ@KcCS3GAa+9Js|8u>_jl-Hf8W<sUd_}t6Him)NY7+H%h9jave(2iyoMS_
zi-HR4hHw_7@m+I&`4%L<cd8q$b;ot9Su}p5AV|uzv!p90hb`u7aqf#Vy~gsbpirSf
zg#z~5q2Oa_y@a&JiY(;EqU@r*WTo)<<UG{gibm@;e&g4Va`NRxr;X2RNSWih^twT!
zv_ot3+jBB#;V3ZlkWlyQ=IOw%0r{*gtE8}Lug=H*1AlmYz`LK+O~mZKXg*c##?ing
zuWFj775P>V7_1cCBOiFn-oQr&Vkx}ECi}$(#G@IzomkEIr$tPx)b|{pJNexxaZiuF
znforl6gYWzre>yJS|WstDkE!%7ak%PMQP^6@dW<&`*Ko3Cx_Io3icl5X~<&=w}&aV
z`BJMEEi&+stU6Jq!L7KK!+(ErST5hf%bRlGXi~U5c`Wy1Wd9-Ys(*#-qEf3xYA7l`
z$nQon2HQH&slO+FFJig>@OQxHIvK|^;XA~yuliw6aZdt5-e<r*<m_F35Tjihy_MOT
zTFt*6*+O=uc$_+etb&*Or-GANUV_@Kr$p!#>bSgGDL#(m1$lYp*D4H=2NcZ3t&pWw
zFRw2JA?w~v86y9j)l_LW-Mi`O&xb0~o_sDZ7koo*riRBd*rs36$^F6n_=KY-?mo_t
zjiQtFmxODd+ULNouYIxE6nAj6@284?I=04tgopfjq?>-^DtJWCtylB*3vZ1(S70xV
z?9uz0!#oNQ4*3<YKT$^>Ri1B3<dE}FMMCp>6*C1-<L4Ax@8iIR8_|S9?1$zEsRZ1C
zPKcF`sWWeDRw?s?G&thNlwgJ1C9)yy!RDaU<hK-H5Bswf2ZD=6JU}SDv)n$Zy-`3X
z*~_cBHIql)(6JfbkxR^|oB>i7_h%*zhJwC*78Jj#vtlp-Dw;u)sE(F^><_P`f{LJL
z3SVb6H`z6sAnmIL9vip1VH2%|Rk_PKr0on=KJ6jHvLrTu2L|4_GYP2!HN@x|8-mm<
z<<3<0pa+Q{5oq+3?9)o5-SNsA?`gY+j_hC)iL;G`{@Mf7v$8d!kH5Za?-Ma-u}yB&
zcYiI;W*MbVBW=BJb29YQdP(xg{Tf$kFIb<T#;$HjhRBXu_3X(#W7rsUE3HuswxZvk
zHmlN1h-$FN1U2(r)`2JGUa`$!zgP=x)GXPUiS(N2q&^$3WN>G*AVAXOD?~A_OUtGf
zpZe-ZI59O1?kbrqJNmdyZjlZ`uTE9U9@znbh=_>zSsgYlFW#l~d3<O>z;Wi#0~e3t
zO|Y+g>)SXL!hHQ|WUeX#;EU(5tXQ0ab?FF=mRo!OcZ1lRXi6bz&RRF(F2Lu=@~2-z
zt4J!^ySrUn*ZxxlN(3lbv(@^nqTbQ0yI(;aV?_dz`>N*^XuXDk1h#UcVxz0DO#U%w
zcc4>pCis((Kpu>M+4In&Pbis~Z|qHT?tNDl3zF9UEYk{ahvtmFefcu&Xx7a?o*WRI
zn1ooiCEC0B)_Jy*Pler`iJ$q3r-aLA+r70&F+K=qx}ub}1hc9TyGe<fBvF)SsbuoP
z_g(byQVI}^WQ(%XwDZQx^9@8m-S6G*0G@bUII^0ko0~n=jC*8c#o4etIYCs_OmAOg
ztDm84a?+x-<h3_yh-(Mh8Xl%5zMl{)$7j2&0G8{?%9{#b7I`7-Cbkx0N$z$}^3)nd
zr0)Hj55{4rnca}tR~In~ZziSMz616Ef%mwj*d|VUbAA?;#k}~8<~UJY>G^DS!t~R^
zZqA!bZ65(E`|=3!BaPrjKX7i~h|)~9Z%2o-+kd-%<=`YmOF>#AaRv~^?k^@kjX?Wt
zwyX#+D*D#R@16>MsnRdjnr~`%Y%vN0ZCXA%?j10`8i@0R5UhCeq<8q$XnnWZKXBN{
zD_&F`v#t50BGg6H+@Zw~QevThAUojT-InD!{A<ebU?_b*)R)zLmbv&}&~-1>zDV;*
z@UP3Xbl4{ZXBOM%jv-dQX5jnuQNMZh-xrPvQh6{0pbS^1wI;5sQo1~<IYjXFH8yvc
z6Jf4Ri;!_=!L3OVLeWd6uqhqn8-K3T3Ju=F)QM+NuOXfx_B~pbnIUgf%!U>GF%su?
zi@ACwzvmoAOXxOyt#vmtU5TY2sn1?6aShN%N;fT;IlWvZId-;n#DpQS{ChfOS;S`J
zcEB!W9SBwQYfw0}WjP%tJ`v#|{mZ{u3U}Z$W-6Kn{cAd(z{(uuSYgRQ=qMD<Aj~ul
zv_lNOLNoD3(>L;)hW@)TD7#(9a1__7vwP47kDq>a9IxL&qHKE{Zei#R8#})0R=%!r
z#AMjwrI~eK7VOl0pPf9J=-G15w)S%|7?O?8lqGG)3iQ_BH%w&UVV4v+3(K2gn;?jo
zIZ9<1IP;UdBhBD-dPWtDth$A9<&o_!vN?3@W0*~3z0FMSsrrb!j{#S9kXL$1<-5Z$
z;`54M^YDbb;8zTTEx}W`2T42-b?a=pT+A21OGSwnl5iIU=y?aC{^{k{j;{NE5}@06
z8Yb8_jEg{gJI~pLB$}uXUn>@jYYo@iA(67_oE$}BFS*1P4klfO?x!d@YzXFMe^g{0
zIQrhZfJ+im;zN2}_CBt4*d{wqk}8tt?^4Xj4vY0}95|u1*S`_ORP6#Pva1X3cI+7X
z^ncU|!Ye9<?t8Tsl(}xOg-0b{O8F0i%3AK2OC6T5?W@xs1n`e_2S}`>CU1GDwHUoP
zU9lPG{AE)3R5&d)xXD@+tzRLPfBQ-j<fm<6ZWKV#BTRh5!*<#Kjj``|uuhwR8Kded
zsEU{&i$DrvV)RG2Aj*dJ<7ryEaQnpk`3Uq!4snKYX2P#}&O;0Tg-91=fsDl$_x@Cl
z8kyP8hGfQWOu|+K$8)%3c-A9*@eA?<SCSO6*~zwPym<jGUzwj=Bgt88-@$o(y11?;
z{#Q+FPJU24r_Sg#1IPOxD{hIU6X_h-LI?IRck}HL(3>95&NzVGP5;_!lNMc`z66Yc
zV17RRw82|0Sy!<NT4K59t~pf3a13LWR^sZ&+G~U(%LUs$#RIGUY?xRUtIuY}U(WcE
z0Cif1MpALqzE-5`)BPo+!T<!J8Bu+9(o6DjG5Vv9`GT~c8+M}_n<huHfW#&KQ*<T2
z#it;x3$D;TP^MM@%PW!8rg-&9y<~BPoYP_GJMo@z&B$8+<3W;%VE#t~(3yWVTXm$4
zjG@0AM|sbw#6_B_77u4)sr$W9=HfRT^gC8A%KT|=>K;~U`DYaf&~ych!8)_sJFg+X
zgfBmPl#Fq^mv%@Q)R&l?MN<2StD(!*gkmMM11G{Ri*jD~^x3}%l`8Kn<Zyfs;GUcq
zUdFnIpg3;A-pDIqPt6qsWrr%gJdA>`JPk|h%8bwt((Fn*+y*O0M-6=Z#`=SDOz9IL
zQwbxv8Duz1A!A16pL1)ijSk~+pNzwcTpfImR_UXRTiUGNEq{Mhub)dEp~Uh&{C7i-
z9FG1Q*0w*cV14q;JIDF#<<v&3k8Y=Il_)tS_UkA4Dteh>9V=QVDk}T*^vkHYE_Y=6
zS<ZT9mZGi84H(>nmZ9oQw*F<4m0;Z5@pmGnQ?v**N!m|F>q<)Gvo)JsB0Kdw*5w{>
z8|T!+9k4+8xJWx<@0NE<CWGPp_lnYp-F6oB@~(cxiDmWKju7#f0kiYv+Y9Zl_vC^D
zI$QsJX0D2To6zm_32Y@5X!~*@J4cV_xs}?qb;wA)_FVOd;*E2gfBHhA=Jq=2j1yUs
zQtvE0hIM-Y$nVeETmKY<zPn-Bp6Oh{dB}TjSyTLgSOe%hrKV^<rEigc;2t0vqjFVa
zh;yxBo$Yp@4q1K`{gYz@>8Cz@@vA@U5JhEmDl6mh%<C}+{BMQC4eDr<`wMp>_<M#@
z3Zw!Cl5VYuxZJwa>o{X$PhE@vUf66!>U#(!HAHZFktJ_~>yfKkBec^K_rK=_T}ib(
zKErm%3+bWjLZ&)1_%*}eNV-?V+%HRXp!Je(1F-uW=oUaFS5*f1PI<`!&Oa7|=P_FT
zQ*k^pEd$Q=n(tp#DJYHDi43nk{1H{!BD1lTT*|^Te%U)(Ng#1qc#amfhzqJ45QKyC
z2z(NLDtho*zkfciI(M9Ym(am=YQ_3!jcxqNhj4rB`jf=PHk>D#_=)MCdf)w3jCJqm
zW}2~?)$i^4B&bM{<?d&PAr#r^v~923zT$ZF(DF<&%_z;XXZa_xkb6_0p*4Ed>>x?7
z{<O~UiP91~gY2H&nmOCT4uCAIyX&z%-`Cd;;<GjXoS{!|&##~Owl(w1#fpELJ_x^O
z{3gx!brHmlSV~knlq@<VSHxGBIKqEiWcq$%cS(Zd&lcEZ6HDyAqiT!&8eW7K-8CN>
z)|iR0)HiesBpxglEDw8ORP!fleA1+L7PG*~alF@*$Fx#1S$E{UG_#gV!<8wxhd>7T
zIpuwcEJ2;o<-t@(=l(&3yiEo~vfm;Q7{%<e@U^|NK4?2>&KQq#G_xM&FuWd<OH`Xn
z#fM%e%jqU2PO(D96tqgKB(vXotW5sNxSgw&kXFi;R?MQUMNMj!m-loRMRx@{e9NNW
z1bFO%MvSr@5kp_UW<aJ~Ii~ofN~<6g`bCMu4i)T@B=1<*!Yo3~WY1o^d<^$))x9z?
zOaFKsp`XLIQFUooDjdv`U2m*I|A}GBVst@TWPQk0gfq%UiJOM0t#ql~-RcsJEQVxq
z{F7)eKJlad9v&)NaKaR%sT+7VJOt^n==gk8@(dBjrjrrGSuS&0Y?)Ahb~`_kz=oqT
z_J9NZcKB;1(j?DpsrJbLC6=?N9?<x|o0?#`U*Q&J#ws}#DWX9U(a^PQ4AMjr9K^0J
zE16z-(#!k>Qi<Z_eA*tE>KiSalMKK)Ch&6p2$Nd5;&)M^W(m(aR69BzmY~w#|6-fp
zMgC9npy?T@s>i@a4gJ8N1!W+$xo1hexp2;oL5?wF)uj^E@^IIubEb;s^o-h0Q_uGT
zJHf_Y(&U&4O@vulM)2_<trdmiC<Von5cj`56*D5?V;0=Fp0W9pUY9Xju{Y<KLb(4A
zc?&;?KE}E0EoOIUiJU3yyvX}CUj_ucM(;*PmFaxy`*aQ|Y_s=wCX`{E?D?O7-7OqS
zMHvqgj%G2`i%qTL=+Ss>s`-QDjkU8?Z)lP&QBz$JQ%SsT!pw~3G~4qLS|LeTLT_IZ
zj|#44uBE5Qsg$PRPx^4E$V2e~DD7)zG%ZX5Cif*Cs_g*QW8EPA?X37mi(M)!GnnNR
zlS!kQ0T3(dsB%%5zI<%AdGt-A*$$05V|8JwH@z1~y5H}V`|8=`v`49q`Dm4D?UYDy
z&@RGNginBEFPxO;cd^RiL(Q(HX3q$BRa}xt?KXUwAc%B}sm`{l7~2!#%yA6<)wnPq
z9QeBEZYM^#b$OELn*u4V0nlkPif^!rW9(Q=98k@|ZH#3Mbw%M9bNk@Z+BCDD8_c&q
zX30x=Bp9Z~_0kT&jw_hVyD0xWF*_RcTI&F2fqTqEd|rwIM5J95nP$U16FhGwDEfUL
z;<J>svp}cgZf_?EAWf$bVGadil!KF`&_lQc?~<`&5Ae1_e_Qprlj{*cjKN5H1Fq=E
z`ri%BQM_=c4vkwnv9O=poZWS9g@#j<wkSfo`(FnJ_u(1CQQo@HhsBh*<VoP}((~=v
z(irHpg<UVyXzQJ=;EeblB)b+XARz8ymVAd#eWr1;7;fKw?^!uJxIUihyRc4HxxBZ`
ziCDri4`r-9x^1D#71YWNYRhlP{dA5el1@4o97|tlDtHMzNuHQVezcaA=2+xvp<j~x
zS;Alg?jQaLHat~7i<&?Q4%cqKT+9lT1j*o(EJ?g_XZg!ni|!gsN4x_t*LI;pRC&$f
zPA?!lMtDK4VHLBPr^eP&z2#%_ji;0tNbaWvs~32;1j?LyCh_SSGp{HvZEr`y84dvO
zVzy3oQax#Af0$Sf^-1(K71A(-5V8hdG>dp)oqt9dy6)x?SdhU$w;j3S4+-WAS-mnS
zzv2C0$Nw>OmQhWxVHie4P!ScRLzJ$8)IidpMoNutMs6UDq)Nk3*$8Qc5gR>V<Um9w
zqx|UZataRV_S=6B=fK$uPu$OSX-OpNq)vyg$D(Ut=Bimu0}61@=uA)<*gH;40`79h
zp3FeW*2+#KwWt)i2kNp6@=}l@TN!d|9<j7yNg$ic<}w&3S~XgQ?OS4+X;#>vO~1>v
zKONure&{<F*9P}{4IxTsrxr_rX8ou|--srDZvU<3x%J%=P>8)uYR`1Tq&AXL@X4f%
zAHQP!FalnL3bF{5e=C~BsDwXtQN)>B<Pl6CmygStH7%HhM9v{gMLA+|oIvuB=qOQ}
zLsKtk_O<ws?!KseJyn#`<5ivt4MexJ@hCq3J5F+ql;hbhGtVTi)8w?X4wLremv&J?
zl*!M1rx-Zp!O6mWlV$ZYCp{&vZUbvQO-X{F#^Cm-f%lLnwZY@vJ{)coz=)|xU>4@r
z{NP^DumA;0f;T>(QxvmdUv*(89>5GQr*N%erGdikm|xpfwdc^h7nW@Xi%z#fPqfgW
zTaAFHAmt{@c4@t98smPAxSClpziwdru)d?Fvv0V~E7U0jJf5@PIA#RGov|Ohv941W
z>r8J|UPD)CAq>`j-byFE`!0{8Y;dY1ip+rukn{FcvmMX;J5M=eu;|4O%TU*qK9KxP
z9yCELUg#lds&cf!Ykb&F#=2>>ErAOR69xxfHK!*waa{y}M2bPdgTDp1*y?i(htjeA
z=k<Il>NvOQ<_aleM?Is!qrviGOp2bi<x@<-I&ATVX{f@n$a}pqjvzs~H}&fg9E<<$
zJ5&LjxYcTY<9R<Ps9PM`M;92e*;J`jHqG$Y724l@U&fhqELC;-kILqE*^lw)xTVd`
zh*6c-W$d!b!b0k_-5TRDJ=@t^<vjf_cUX1vTzjM*Ot=xcq9Y344xEL2CB8jwzhNcS
zIV$xf#{L#iW4pt>?Sc6#;#!iutkN{+WxPI=!e*KF$n8=R;I)0U8~+-kFY(OglVZ@5
z{J68V`K9MFiIb~z?~)VSyobPmrRgj6Y#Ma%8tgSf3MD$nf?ODo;!`_jb891F|C4f|
z7S6kWpRU+`T7EhmRxpq4`V>6p@AixgzdnDyzN_;cUO8N-sPNV&6=$TS|94gZ{YG&r
zdth3EO9%+F!^*J5CA#TwCs*Eo+zale{!e_WFDW-_>W)Um>K!8WA#ur|k)r2dBNMJy
za#KyXK54$|Jbwcib%Ll@*??9~Zn99^De6>r*wty+UOA;be#kaUO&7sotaeQ2^8K+g
zzn7GZMpc_1^6uZzBKZj*Erm(r<d{|~Q0{ZswVWbeHj-t$^6n@%p>(szO7$g48^!r*
zub^D>xfY_R{!@bThE9(cQJn@PlEw7gHY?D*yYELDi0JYeR12xl$zq18UDSh6je%|e
zmEL~T==I{Ou&u8xx}AmJys9iIS+vGIp5~4}40AOn=eq_d^cF1x5hU8|lQ1bT8={r7
zQa+kN6yzB-Sa?Cq%+m6rZPrjAQ)oU=YlAJ@K#yBW@Z3>-oW$8mJ$JU@xC(<cd5F}N
z?TbJuYiOoVU)#1(tCq=G`OxU;uy-W(@snN8e7$d}?w|jFt?I4OfB7%V|51%ZpW0p4
zoZI}pb6R(qL%I~)>15eams(cibD-l*{zui64fv7yegoJsPR@%eWlFcW#ruH*D~qO_
z*^WhztL5CKj2{ma8}=y4m~-BK^<TWJysx7v-*>X}J;h$APWiG}N=Be!$M>81-QNct
zbW1C+oeNO~_A5Q1s|w6xCTXYY`-|F>r$$Bl_&enhr3jNKJ4j*P+aVb-q*rh!TRsoI
zU?MN9UbD)Wp?VZ&C>rOa1We+O{$t1IID#8i|A4ps<?TFkRVH8Jy})00&xr2eK%C~E
zjD!|PwEQ;TDI!1FoLS*?Q{%WayA&zxA>n>ExPqb_@ySj$PUZBI&}rL3ciB%13k?vA
zZona>w~86sc*s4?4QuGk`4`{4qii^}e_yhb*OQ~DCd<*9*zyny*&gky7PU#wIV|Jo
z@fUR{*}-}<JEB(Ew}_Uq%A01`&cYb2u#L|_-93$Qer61U=a(;96I(wBc5L1!=;|@l
z*_MFVgTjJ8%|{jXM@&lOe1m*Quh2L2sUe823|wFd@eleVzb#U(sBR6IIv`C`)wB|y
z0hEf8I>wH>1c;HP=bsHTh2{swh0WB*zK5~i)Ldv8^bZ&B7!-`_+aCQIeEHFC9m*Y6
z@8wg5g<aIy;<!!0I2fm3qxmi+!tyX90cl)nGd_%OwL4NDtV`|%Q$g4^#@5l*;;b!B
zkJ?9yw2V_d5>Lohs2V;}sk5?VN=;bPtp$(;6QM{aV+$5)m5JE06VN%W$Vnr3r@Yf5
z^RCH1#QTu9aY|rApmdt`m5g*Y_enWyCB>whs9{Nzn4KGV{P=|){R|hn?gf{@Wp;<I
z66>7ZHM8tjAyPcs2@;OwBphSv_l_wOE7uG?31d6l_`<PYJJPJOT(k5Li33*;r`Paa
za8Z7we^gm;?My1+Zz&NIUKR6CGVRjQT}COJ%uP>Is3kwR5O#OmLf2|=Hq1fvo!jp+
zI8)?fe)0$TC{4f{IT^0BMFa;Jg-qGh31o0EC@*++Qje?Lc=GOY(&l(zJA1av*khD(
zW2@U&r}HtV2;Y&7>T#N7AD_1MX>*dILOW$A6ZsN8kPioNKkfJePfKcIW=~w&i`H;n
zs>*Gw90HyS7ksb(49_&Zgta4|4*tr$;H@^0O!_md_G!OOsz9c%ODvo=JLmp=f2+Ye
zwomw)D*W)BI19@s$TL05xcP9k0FE#@)OLcQ7^7ulxSm{YGPeKdVbZzz(^c3mE~Tb-
z;it~>A{@C||3V`kWAfl1l?t{&;ZIRRrMH16)b;}<3krCUcv@gT9r6a%_tAH5H(San
z;VxWZr0-Bfx+znYlSj@(hj#ts>*FI%pT44Jbz71LhBBFqg4?aY+UxH_emZ7y$QurW
zI9S5aMTb!>=`-EzWgSFEQ<IV`_)LFak#(KxILOobNbRJ&tZzlS2OBTWjw+NYWLJPS
zFd2Ew$yj+R(qQ)_VJoRU%HS6-b(J0RFR&AE38nhjR8_6AbDIL$+cz%=>wgA0?|=Lm
zv>k%K%>Hrb`k>G}lS5I0F+Qmi=TjpyJc7=+X;_MZ*HDZQqt?=wdMQ#bPId8lrtSIn
zD&s&VN1@~(WSPEHW~LtBwd!1XUjXIPExC9%*>m*S6FH>7m6jlj_Y#hyei#N1Em{-{
z6~5k3^1A|}@kgW|)pGSoe!EP=eDxqzkY;gkPMd07zge2{&+2s4m#NXavv5u=WK-lV
z;JB!+IpC0-1LHTk`Jo_@cufpr8@IHlf8UPukE$h7ncDahIXs#yxA!K!<|_MEFHM@6
z^*edjD%G`oCE7Q1LyP+3V1h{_c52AatwG#j6W*6E<}v5VDUIaO&54eV7^(Q=l@Dzl
z<}5U(7)DOI93*ZcyuQU?q)bQNv@LEUEQ-LjnS><yih$s=!`Ve($87}2^+&at))Tct
zi--{~MYeV6Nv2b{fbZhzx&cP6*gGjuS7IXBNO@T}$T7!)hVi}Co5ue1@Mx$I`SZFZ
zhZdeO<D@uZ&L;4ui%GadGr^`j%X9G*l2RJi1N_iU0UF{({k1c(sC=u22LI#tu}Y=Q
zIsz~NzO#Voy->i)`X9B33(4#*_{HEJQyX#6_N6n}_hg<VYil|Tf#1jy$vX@Fnwf3K
zhk<-b--|VIDYK(D*PcLftqrQ?@Wl#TwZdgASqEqPCCPN}=9MW5W<jK4Xm|(W7i2+=
zZ%bujVK*YV*_!9TlBq?i>-)37Lc!1eq<mG8z9Wa(+dn_m>3_ui5mYAq%{2#mRywxM
zU-j$!oDKe21-l_+&Y_n&)}txaRT*ejTrNg96!+Rgz*HEFpK&I3H${wF%b`EA>&hmU
z6><onQdH9m>a#|Pn_N0z?86pOy+WU9RGfttT-6_r4EP;ypK6L{-S12M1)T5P{fld`
z()lE{7YW?FNzhcD;gZAw2DPn(1C1;EWkNUVFqD8ofh9r29%a#Cxy?B}GnM|VkRv1^
zMpunp$qSQs*glaxBT~mfs;YoB_sqy4D>G0VHw25Y$4WgsVSb4(py3b^T<Z8zX{2Pj
z)d9#f%o$3&oc?thU7CgV4^wflyDX-B;K~S_*{V`!o!55`ln}p^GTW|9{iwKJE-9@}
zcc5<kE7FWoM~?;nqnh2(0yMllf4ij7;a}Q+oj_J`gI4y(-~^P9Dyw7_p0v5y?1)~Q
zFqn$lcFB0H;${!JDsfM$H*yym$ku4jTK@KINu%c7NuJd`{1TbwjzSE?k^EiJZY}lT
zW#|;fwK1FX+ID(7BVd<u-U5!NILhTL8B*t@dJ(0%iG>|JNMa4&EoFXt3jaCVieClt
zs)!+{y@p8ZRvGB?2vTc5B`y4JY6jAvviyYIL1V`)MzB3&TKpB$vw&GHA^0&lyIbJ?
zrHnO~%yxWNObX%4+}{{}i0kbsSBCJGKrP!?w!G_#<jlASwh{%@b3;1Q)EbM6;%+&$
z?fv{#A701n#B66jXs>*G2Eo$tG3>a)YJ2G-BvR{);OYWAyK;zYmHk7y@I|)FH;BeW
zI=DnaH1xVucYOSmqy~NXUn@(L&zT#jmi(TGlu};WecAy@%WD;YojbY4OQ+Ag@ez|W
zx|V<cU12xvg~1{RDdAJroG1UVCd_K4{5ds{xp&%%Q@OW2HeTtONeb&($d6P-dC(TD
zQqNi>?cOWB=R<_?Ge}w9<<bfQ$6_M%ElH~1-a*kEY6dWFjSKtSSkbc`YhETME9K+b
znSVD-Cr2mErhQab7a{K@Iqa5Jv3?ac+s#7p10K_eC517dF8&(yfwH|mJWGK9@d-}&
z+Voc#ts-oODLd-tuiIJ9Z5%@5Jvq31W877Z#N1gx{mm&;L)`0wAV^sS`!6~laXE~L
zH|9UiyBHf7Q@z1qk~oY=AI$hsOBwdDAxEdb6+?tVFi*{P0(?vi+A}%^yxzTeQ`$`5
zj__;MDzJ}^O1%Lf+~D*Ysms7<ss}08@&iO4Kc!zV@?N}A(XWB<rp}ksnG0tQ*!MrO
z(;0e6OzyLwe}Zj(Y0f?|2$7)qfYxPm>z#LD5sn+SHh;<iB*D^pg%<TEna7bWkn~$Z
z1QX}@1HuJ1WH@U(<nFsd6BZ7x(U6>!wo;giHAmpOKpH@EYvJ6fzfYzt?R~{4ov{tt
zQ1{Q)Y!lZfN5X})x#69$fkla_DjPXLk@yKNJG$9f;7Cy66ki2{@5P`PUzmB8%h{@(
z46HqNy&QrnPH4i>%Ty(>xQtNEMcRn=IBv^YdKHMUCK8lKqiYT;tu5Rjt&oZGOJ^As
zWqY~S`vNy)&CmSc0kH}_B@fT*gnodx0#Eu*oMVt}Tjh1}MaGM5Up^|7#v61t|Cf0A
zf`iL$I|o^!k3CcZNuUa@Y_E#dHXiPSFuJ2Y-^2Pwz`R}@R$T8YMC%TTQrERDN(4Od
zTc<7pbJagHbiU{c7ukFFr{}zS?rALtc|u6tk~JS+Vui^*H)WOi<+?sIRif`zVt_lj
z2(Yut^6ZGy69uVk#~Tae;5>ciLN8y5EDVZo!y)F4YGRa}2e*C4t%lrB*MC9SXTsk7
zqdMy$NhbNR+b+n}z{l-5D1CKNuQ3d;giEv0yXH}?yBO{ZHJ5zjB9z93KUr*KFH8Q3
z$_W+|Rhp4pm=>>XzkFKygwRBu-Ojb*5s)3*GNJ!|dnyWe8;OJc4i|TsCqgJyl_9Cs
z*Ov0Yh+yNQd<D(fdqY!=MEc<bGuO!9I8nufS#dFH%Ip#Cpg&Ncz5W3DqIGMd>F8=7
z{kxThy{)BO=!nCdxr3PLewVkPr8Gad1cfJu2p8!tfC^O}S_<X#E{jQ5y+}IvN0kS@
zGM;1ekn13t!Evw7u<DTj{G+LjvTx{L*k>QKEUSWok5_X)hBiwpt>LKv;GHxf5a!DC
zfSIXvR`$WW*vw3uoj)N)?2foyZ~m}g&L#4DHm%bJIS;UcRhBq0x2?$K*Y!zZsJ%Ud
zP%_9<H#I@Q*5wtBK^&GN(r`8E&=ENYZ}Eb+hI~Ff>3iHNhp5ca%zY|4Ma~lPEDIjI
z7s;48<F)2`UJ7dPO69-zdplJO3|b$RLJ@>M&g_s@>Sa@v^v9aqC_SHm*3Gs=O7y@s
za;j7*SVAe5{@MBJ@tt4vSX2@|e1l@{P1jP27H4eAX&n&NgT>ZEhSYN4Zwq|oy+3^=
zqV~rRmnlF{{TxrB<h4WaydKIDzf;EQMVh^|>aU>N@ZL(&%Zt=CY!kJM_z)y+1eO`)
zd_H?!t8S_st(AQ{R^F#=%5PJyYMCt|&`KohVWav%vb|RBm-rd>r#zQa+P4XYtE(4p
z`N9qp?|ZxFm1KRGHgVo&OoTQLQC4BO-E|DcK3Zo|5j+CA1^^b0K^JepJM{wN3c5>%
zJpqB|d&-nBpFV$&N1$4h=acfbG9ei&qM*)CY!<qJtp1{#!NS8O1MD>;u3&Db5|ur!
zWbr)rFx$%{b9nZ+a8f#%^RTq<1Ti<(*kZuZM4BKzm4Lf`dv=}ZU=&fpe9=28CPqQL
z=0S|yWrY@0ZkenXd+hhLDQ?O&J@@&e`TYX_0)9hhft=^i+gjnoXzZqI5mj~eaup^i
z%IVG8f5Qyq3xhh6>~F~4&#t(tQO!RuQ~I~qBzUAf<9<5oo>Go>1MWgLqU}?o0kbYs
zZ1I&vKZEyBb6CFUcM;-)t8VTE|ENY~u)n_udQJbwRQz!Ceq^1ArwMS}j{41AQM|I8
zE3&A)A&~Tj&m`%IPYb985MwwvI*C5{?9cRFsTozpIk@|XVSAtw&{bxptBM@|8F=~d
zfOUenDf(32<<u|pM?Ph)7bq+;aQDy4O$AR>!EHko2;!c;hC%Y*B!TJYrE3`x7mun5
zW7s|kF~b^)QvIF>Fdg9e?n~~+5hEIW$H=L}H->GmkgnCao=)0SA{+jq-r~RVm2U{<
z{G&rc>Nc}IiV@PYR-b8L`3<g-d3Ly$(jsx&)wpbwiJ8*jtEsGv15zzx9^Pz-szA;o
zgJKw0RLFCiY#GI1M7^OyPLD~RGkTSpLId<<l3(fgShhBjNDw@)3oIj(-(f|8j3y9E
z2Nzb1vV5|V(IjCc_^7WgEm^1eyOS>e0yglVUh1nn_n*Tu%2|zZNl4lX;&t#IBG9ww
zkEqY#DDLJcr_Q&SAdDh82jeyZDJb8jsj99w3ryB2f5See30xd{TwjEG5h}XFF8>Nx
zJUj{_Wg;V+?Ea>blI`^vd>!Xhv4QU}@`%>@!tTJ8gR5q7{1F8wl=`dW!lKVw&Hc3S
zG+iy88|-%aclXB~Q?y!T_F&@=OfA~uT)XObSV8+D`j;{^zk)|1kBGEg+HrTX+x4zD
z<7(=FU}ZODIO+eaI$>FlS8t_q0T=;)KXxSL=FSR#xK@RA5c)tnHsS8-t2X8EMfF}#
zNG2<Ysdq2b0SU6So3;e@z0tl5V5MpET@=UO`P<vd0N%@%bB~ay%<W}YnSiXl?S|&E
ze8A&g^{;spZf4u-h<DKVC@&*qCLr^!Nt<F|5y`r~|AL(c@Kd#CkXkyQ$KS8V3~uO1
z&C<))`%d@2y4h9J{pNvfiepn|AC~<F_F=>00URs)Yx@IaVB(^!oYpFzOg{Dj8F%A9
zYI58|`A%q{!3#j1lQX;#XOI~1CO=`3zUbPQk*9G<`S92ISnMsXdy=dgHF<QjgrDrH
zWLm0#H=LtTaU$nV!;?k=tGK{nZ=gHD;fiD9klPojAq(e>)RVsREZ1tco??izQI0>Y
zJ2dkRm&_HMnb1W5V2X>mm)bP&tJoQbzycHh=*Y1`0jgqZXlEZV7pJlRl`+XU)xD^g
z`{8g!qgy96N2XR67D&UkNDZvo)d(*}l?O2>K}T&JI99#3L0r+nhO);#aD<&=-2D#$
z=^X`tRT>16`0d!fl5bM~S-eJz6fPj^vj{$A=*FlBHY~nS@aS*eTFDLLY~aLQa5-sI
zcP>aUvU8K>M0q;H7(B?#_w`Rs4H=&CGg8^JbIWB!Kv<W{oSZ!?kn7ziR%7O!n5()V
z9hgxSueR*;f<3cO!N%S_qcyXuFCR}6ud1%cX{`;R%zD1QD%CBC$wy4hJdJ?8nzFE%
zwyB+<_F7#exGTHXzeTRAy>9h>=MO7^X0^7^kH>Zbvbrx`9?drvwb<P?yn`OWuFjrU
z+CtcLM4Mj6H>kPt1+!Ef4(g>&qlz+<vvdbdfbVI~E>t#r#IFz)K5R&|n#wanqfScn
zX8s$;gx%29Xw<r??2tvu*8N8%xH33qG0Nf{7*RM1L~Rz*)K2Al5?1m;fDR`E<7eTb
zg5L%it!PmPY{+$qhcQhVZ_8L8xm38%CU0jiYK7v6HQ7wxUV-iEi#)Y&&6vmNu)NE+
z4w<cc>#s|c94^%i3Y_Sj2Y-iF8LU-xdJFlDNuVtfy!wr<3ZS-HDnAX`Dc$ZrUG@eu
zoTb|nhfygM#n>Wr1f=v_3^vtoR+OcOKcPVGgu#$i{wrDN`Ko_ZKdPC!MNNleY2`Vi
z6_7%ek_904y7BviK))PM4Dh(kUNdF%-M5m)*GnjO(a9L;54NWko6)0kC4C6sNMbGA
zRad69V&S)N6%8afIawy?k20?<7X}ntE!HOYkiADN6UR;RT+eKi`)sTVVMGh(4)`}g
zk_lf+f+<|vy=X7gTcctx4yBBZCi0@Jk&F4~q^Aub_&L;gI=+9YPHg4_i#D|4q31f&
z*lH&mENZ%~M;3%n3E=1G_r_ErY@Vc*Vs*N1E}X5P9eKuCpWICklDA*fcg&yVS!dn<
zW&e>2W|1n=ra!>qgkY_OO3V)~uZ_a0vH*B~u0Q><J46f<Kl>lx-X`}}-w<PQm8vw;
z3^{=_><iKUDUg*evOv~6jNY48bHB+qtZ(sY^nNXmxDRD^J1KXnUl1nSfTrRCZVGHG
z$<4gduPW+IX>U6m==~80yJPTzsY0GZC`q8N1g8DN%P-gNULegUd9f?nGV{d@3U6J6
zKF<0b4tg^KCp1d~i#ye(eva7#enKg!=E-oIRhi2hzyYcmAFKTyAiN!EtS%nBI65on
z>-N`Dn#nGMY!!Rx-lplH>t(N24VW!9@v(H>;?of+{oAK00VQNhge63xFJI*hT^Stz
zIFWVNh@z{A^cWBH8a0dZRsX@YwoAbr)?Y^?n&l@@E*Q{Tg9`D&*K}?Vx`ifxMcvJe
zbqq6@r|`q+cL<d@W#zveq9<&r9{=T8&fYe>J-gu#@_Thd1Ao27A1POotc8HO{G)n(
zZQDw9?XkaCj}JtuEWhRQB<C>2;MV;-NRXKC!O<Y_KXvcgVk`y#))N`Q?b|eJNw)(3
z#=5L?_^oE&9a0|9tB*e#Bwru18J>_CXAKZE?D20&bp%v*C5^_gPgJvUQAElFW`aEJ
zy7Q$;OP_MF34?8{hcegNF96WiMoj7BTP-z(MS<TCFQY4>UMAw@fBfv~4HVaND!E%P
zM6mp$DZ=@4>%d&2rF)S`T9|H7`PAI1Y>#}bT|N&b_|sD@W9;o#ypEu6*NF{22&orp
z-eP^(Hv%|x7+m^lmS(|RBdA?=gU0!x4Y|>^sx@;^XlYh(1aYP63_M*~nb83@pZi#s
z7S2rnQKxEzXp%06;M5QlUu7;qfs|HyNW__~`{SEGB3$ITc0@yYrhC)HawE?nUelEl
ze4kPbhwm&-5B^=LdibL9J<3cuAo_<)Zn#sBzB`kD%d?VRp<|Ni@UR#~T&%$dFQ#F`
zgGJpXGEQWL<Vh+1H35{n{_Z<r5=jqOHqsRbZT5%D`gRB=q~GtF>wJp1A?&z{&sJs$
z?uJ#L8LFF4m(kX8<I{NhlNpA-MR}ots_+Jn{#Tya=&IvQC2(=Qp-SygY>M2N<)jaH
zpGCZnfyBq*qPQF&eaOY05i+YwZuC6U+ok%gid5FDKv7bEsbP@}BIs@=IqMqC8d%6?
zUR)wT>$f?kp6d&pa}b7v;)de-S`pOW)H(tfCL5_^-f=!AWyyq(dv^XE`p~x$v^lt`
zO^P0)r04%pUHD$6o~tiKGg4+U(IwH0=fca;%x}hzC7XHz@8?DFNnJ`EFOyC&K1JIf
zP2ea}Vg7%fbs@0D6SuMKaj^xx*aplH;M$cTajMC;t@GCpKFgw6-uoYM_qb)tp%Bx1
zQT$7-wf9RBC5xH{Chq|h*SVd!b&dv5(<vx<PF4}_aCO^x;X2TCcpt^R`yzL32D7U-
z2NC=FJ<8vvR22M8Zv2y=`C9SPNXGK5o5_8J=tY9u1P6YdS$P_;NGomm=`8RC!bD2h
z_;kU`AcIQ-nwMkH?^k99oIxc&&D^b443O>^JopT^D|*@3`MBEdEv9#fMWm5)xh4XP
z6`c{80hdJ^xy$_48O1+THSz5@Q=b&8lm_+8ib>mxf&B<R_6FvbBtUU~)5~8IecNkg
z9?-hd^1;eBH)Qzro^h0ql#r(&MHk6RXxh&1-g2&*Lh*y=$+^kC6jN}P-(0G)-SO9w
z(!gY22@+UyM4oWoTww{s4>Z#xzb*@4p4pm7(F{DLJm?Qw7W;jBf0V{~lwQhHsNb;8
zx|C3@Lsi4dC{6*?9dp{dimG#m7wRMEvy$SqX!Jj-3hkfB{R@>J7ah?%5hp`-AlV!&
z(ra_{t2*1`{&8g9JdQ(_o`!gfhxIEtZ%E@uhsI4FY+#H=^)ZW;_))j3&yl77GoItF
z_L#tH%5FATeO4Ti<zCRx&2N)2uDTQMTpE-$svBlP<G#eEtSeh~=$C67HqbYFNNmwP
z`eLoQxtGwun|XUNC|8zOkEV^q>_P^{6BYNh)c&~jDyU^cuKT{Bw8ukt_}-4sihpL<
zY0thB!Q}>rZh+#*I6(F-iKqVkQ=Gf3?lfe<B6D9Fd8ej`#OWxV>8uYJpv^PZi@KZi
zM_nDsaQV0l`X&y`YaRa3)<+47)>Lh)Jjqw^6u{_oer{JbhQ2rCH95_=CXXmUkK?-o
z=b&YX`49An+1oI}^`RSelHIPDY}|?XRl1l5OImH;H8>F0j=E`EueHBcAjgO+z*`?g
zwG<LPCR~+oCDCe3DK9^DYofZv!7kOo`e5nWvwOefuh<M~NgAe6#%WyuRnJH`nYZkZ
zR{1`Gk(9wc5ysEn<EYv5!A*y&Z{$HQ{EQ;n$TWbZQ8#wbF#XalYtQ3uUeA1ALlRT6
z`1Ow}qd=u8GM{s8$NwC?$2lIZ+Sxz!UTxSLL5P2J!*+Je(?(T&H5Lp1OSl;uJ(ph6
z9@^0MHx}Pdi^@$l;frP%NjKHhdB2079Z)w=qpTX0e0dKa8!flGaAy8K3B0uU-a(1)
zXI0wY5)6IZd2J%g^rHDU{1b?V^J^3mt|^dpFRxWk*1lM;HurIN>`IHA9&d2=8*0fV
z<<zt2h;CGa6Jb8VbfSspkqus+qU@XVQerQ~)Su;aW70^_+^D=|?k&;LPqEjt#!~!O
zhg9m{Oqj^=NvznKbx0Pn%TfNl=GS^evUuDGm%{M|^J-?*N1$y>fMBxWw>%g|hi}GS
zH;6^_c8-@YFSKyc41~J3V4MKL%yIZS=$lpj+8VSEm*y;w`?ZfbJL*wkieAemSQq5!
zoLcWUbR7GyBdw;bnY^1dFe?{3$?dV&j%UV#<B=>u-<nQ8j#So_34_f<k(5)V?`DN%
z5w_M(=1qkp7Il;ElcJ%b=JfA;Qo&w{z+4NCGV!M{CAp$p&HYKq*-d!^)60!N3k>Xa
zB^(Z1^V#El3i{&^>7;sQ6S6bquNB<Sch33r&9Bk9jvu4)&k(bHVoFp_RL?TjZ_CP^
z$&p>L&bKi0=qX-Wn4b^%ibd@EeW`y`^sSxeUdKiU<W;-qOWp4~CsFK|+`I|ANm?ey
z?lm>z<?l`#CUTF4)H;7Dj$LWh{#$rTSwokEE&lF25H`JF0IHKH6ll%}i+@zf^4B}*
z_INW~x=ZVe8jvpRsEBJCf=ulvmA2j@p}XcwPSk5EjI)u2H67wYqC{(Au}Q99d2ii&
zwpn??gnDmN-tE6{ux)J|A6HooSx4RG;CdsWG7bCke}b9F73EvoCToKgk=fs$2^ATy
zsqZfv@LP9_vd3--PTiLh%-rg?LM5gCD&IMg^|?`*TcOhhSkcZudMqny+A+{%vJ3wb
z>ni;`g}|cn&OTdTtC&4Qz<ZIE60GWj^XQj|(WMOf;h9p!lzbUBsYQw8?R4i9>1j++
zSXFw^)$fbUvwf1HYYcB*%e~z=G@gC(hfBGlzB^+=O3+JdWU#`rbar}w(Avs%+bR7u
zg^Z=A*1<hwBa`v2%TqsR`r_}iLM^NKBy(724pCA`X2G(NQ2>w(2I&RMzY3?~w2^x0
z=Uo(H7ModY<=NG#HZ3YwjutdyF|bwv8}|!ivprOmul8qzN^)mBist8-ugdkyaq}vm
zbAbu|wf}QH$jsADmd)KqfvD7_ko+t@$7NxwFMs3)rq|c7r$znh^Xp?NNgxn1Qi#{@
zA>jGFkudb%?2fpzmxGx3oX9#1I2l;=2wujk5$yB(3?fi4XzIbQk)-v`Hv*TR#rByt
z+m7}uZ{15bW-gWI87Te5Ue{Zg*mEO~#qZWl;O+L1^uQ1>rww*yuS4|AV?8zfD~iS=
zR2CrTDSVhdXX+jfEgdkOHm{78Z`4dWRg|lauXjZM8fE<wiCr2+DSeqhC^^^swO$6>
z+P<(&%KSS$2&sH)_j$B5Rk!I^rftZZU~KY}+)}_%RRt~D)uK<1Uy?0!JBWy|fR4AJ
zB*zxg*4Hkit%w36wfw9aijPIA+Ly+eY=f;e=kFU_REQ5=C0NUS7DX^+<^0XDXc#0-
zX(@dYCkaM*3n4<LtbLLNMacS98=aY4T6w-Ay-mA4tWDEIuPK3X%l<JjdaMG{KzVB<
zZmgaAbU5vLZ*w)#_Ci*s^OvwiD))a$7H+c>P7X}rQZRjIg27fken%I};4XCXoP#O2
zn9>&5n&r>1qF04TIZyZ<zNv?GRU7t@O8eo5?)2411jz;Vol*p~s@Af3r#m#gV7NsQ
zqccqElb(XqefK>kAT#1NvYl&@B2SPO<^&29rxB5YrTl7E(nJB<QL^UStIt3DBzWrc
zy8lj)G!YBeZ$?JSbewU!B3k7pB%bmS9+kb{Aui*3#%*KW<(p#{h2epFl)BY3aFwUn
zyxAfGv&#tnS;x<g7!}x-fXVH88OtRY?0-znV6d=QTx339;|zyH*~x(Cn+Sz-u*hah
zF=<FOO{0Ww0H;M58^{u`D%GE1!C4dZH{BP;6)t^mPnQb|{hqSL@}03aAE;G+Lx%+E
z+D4-O46X)f_M}MV#NkXP$PsxjM+4oY(K{TvjMvAWPZJo^?TT&1v0uw$pWb&pJo=hH
z3o+aMj`lkrk!sw{F7eeGX^drz3q0_;As;@%fyxaTR-N}ah)}md??iJA|2|i_aiGS9
zzFmjmlYz(9x47A))|rh)8SV4_qY_X*$-bnNB5PTv!WWM(Q_fv3*p7{sF7A8@8$K>5
zvsjMei?j+lEi)i(>0H;=3$#C3JB==K(49lJDTb*V@oE`@rY}SP3pBO$h-m7B9sqMu
zVHu4v(jbI(evH@I#^el4J0e-hMcy}m_@XX#f>v!TmHo7FsDYK*Uwu@f<kPPTiM<G;
zWirO@Cgr+k_RNhNmbXr_ZY}`G$UhOe7O~;YjB^`{T9y9A#kieZmLizpay_mxaO)+d
z-$ruCxc?GKs~a-Q=QQYSC}KuGqT>G1C2k&>4xf6nfT`DG2WB{ZY(C(cu}O5xkoUk0
zYZx?s^X|^>3l;SGIqclT*G3(6GBQ}A`HX5bF#pWS0GTp1fjIC#-q>}xO85r1$XWcI
z<8ZZ#ITZ-<`T>3E>11W6I0NgvI(JBjo3{#+>!=v>iv#QJ0<t~{HrjL;zca2<$tkJG
zp-Am2C2|H$C6X0%jY+qpKkJpg&`AE%_~UWY39vXv?3a?x5vt=v1fRRI)x2M*$7ip(
zcTukZJ5drzRlop+#$dU+Lum%}l{{>91j<9<4${`h6-G}eUCTYVPlpS?%q68?>{+Tw
zotB+8M7_I^(w6;4<$F=^Sl1LO3bM%ff!d3Wk}(O0KyByZjul3!!L<}-BH3nT*UabE
z5DjZnfM^^NVsCY;$+0P}UPm17sGlLT@HAIu#;Mk3iuhR=N#}eg;6beej%O*UZMsWP
ztATEjPBKNy_7P1*P~1eK)R~*|UHox>d+J+q-PwlKBg3P#5BM5c0AtqqBR9lD-%j@V
zyHXKD-}=m>Z3CaQ#KCKe%+EL^l8{0=Sp_Hgu9ge8sJkQLfVzv^t`7-QkBS}T9nT;^
zuZu<Ff@!MSU4_opgX$E<yV7d0*UH48Z%=wZ+WD_M$DPD$^-2H;Sx;-nvxi>*){>4D
zV0jHX6YKTX^3`TB;dEC%bwaNzJ3--Xp#)+~T%1+&LOozDP%lmJ$(rYt^V(Mcj_@^B
zZbz-osA;P3g<CPybiZTnc0QKrNR5F!&Aw{2$@z>v988^r#s18dUw33Z{E`}Vt6zv6
z@TLx{w13G}HMa2Rc|XBREw2P6T*Q0zI(ZB!eE35nDqo^Km8X{=_~cG$Fi88lehoVX
zqU9|kuoxrGAMO^V(;tuHeM=!vKa}*x(#!BC*sQpW&Hy_56~hX!@9x^&H}U@!Bx(4{
zHZQ+km}EtwR{c^Oavo4gxw|L%{-ffpi+P1N%!BzPz;^9`?#qvUX}52do+R}i59J#0
z>xxPd8xijc@}851AVMTM60ri($LJd4w|;bsEAY3Q?~gO4u+x&PF@(j*e$ToKZS%6!
zI?n@X6QGazVffNW;8s7Xy4zeglheY8oBc1ynpooclN3ZPLt-Qyb}(GjOG;N(xO4|U
z-I;$(<I1@3M`rGDu^tDxP=S;nZ-3^rzb$(#`UU>!T}f%D6mEL(LUlb*Iyqd(;p&7t
z?+V|aMUFmu_;C6h;~?q4iGwNrBYre_3nHTaYdRCFmGdF(fcbCW1|P}K4N+pIjB*n1
zFjN@$L&@u^Q!iU-T8#y1p=-w?>!Rj>Zg-OVi1%V$SW<1CJXvjAXDy(XseI%xAga~X
z9i^XV*6C7|#SP%xtk__JZd7oa&|*#bUYy5meQw)|`Uq)U8dL=`ruHrx_(OskV~pm{
zxX>csQs}%pjUkI15pvA=6Y2$5TC}uZoD;Uf&s>qlhlt=x&-k?5vi!pus4488LEBT;
z<cHz^EmD+ONb}(qk#&qnc^zalgG^dIxTppFK7ZR+!OEcZi8TfWzQ`*Y6*CyWcb$|h
z`>v?iJ9LBe7B%EAS6p5e#xOTBpebynq$z+}GYeJiJc=_p{EV?E`i#yhcFD~WMR;K$
zdWw8k_>2XZ+fo;Nyg7Q?++qdlQVecZEZ%8S5`#84K~dI!!d+{(D3EGUfEkDGp*QT@
zIrlm9pEs6iSo^fa=ygQd!*?=H8O97+`8Iz4sM^P@jRfaEdb;o=1n2LBOqUDuA(TlU
ztUAtD5UsLa{<S*08f)`-d|k_{Tc{i<NTg1qv-z3ZE9H(DX)}=cRK-dSXZyr8Wh<)d
z4Pgn$$a>@0hH2MrdPKSc4)(mCeR&!(DDWeWYx_NO19xU8ycdCPO<T)Yt2Y$MXovU8
z-tV0_I3%iMTNktVmL`n;kiwY|Bus*k9yffH$#osNR=vwh-#GXC#$FNTv+<Tpf;>Wd
ze~t)-C~vR$Oxo{U6t~Z=w*X@-)i^#_T5oO8UjG{ebGYcMSO&OO_W7o!<l%FiIL#<h
z%xn&%$uuuXPhDR(%9C%5v26(+PT$iYOFl?QVoRME6xasda_(J>Z1!u_7+)GFFVb8*
zb~4#<R!p;Hn@!=anlKo59ag9a-dw!(5&4tOQ#)<>_VFcy1bglIVPVP1*D7Oks1Bl<
zH1*P;L_aqLOjqH_?E?mw%8N`uXJ8}36W9!Xb)UpSLu1RYHp(p}Pgxh&T!b_gw6A_~
zvs1}E(FDCn56$=L7-BFM1$JS^xG$$8X9Uy7*Swyw-S?XF!peP|$q{^*6+{F~%m2re
zU$X7=ot%xVoQQM0(&|M{nWF5R$IB3)aUnTf4VZsJ5p4Y`#y-BpP>;jS+XPz9_<9T7
znWq#|tk+ef8}v(Jp13d_KW2aPIx+pNbX!~ag3KW*8}0NlT0Wd)3TKPa760U%?paO0
zZCL@g+2Znj^0_|nP<kMfL2Hea#f$@_a|aJJBxy8PibYf<=RO7rG~<!wGj7jyelq>u
z^UWC?U#GXN^))N~Ff|(3rVf4lN-L*)s^e2KXEELZT7;kaFKCA`-4g%!S-dSp5KS!o
zjgLmOlFJMLhX(H>HWgcYtzoiBwa=c#;9u?K@m}b3Vun>kb(OLLH{=`L^%v+E1PDqA
zd)C14aJN^lwnn{Vu<Pu)`7%=>(Gj^r`ejDcA`f13-KVgfR^^JrMQVcnLqD&G3ZCve
zQPR8a4fAC7{bg<u^I>PBl@O=&N=SPE&zP7yx!<4Mtw6NsbhpnBdMFyvA=>l<2f>7Q
ziG>ZnAx*w$C}g;tmNc5_PsPXy+M`dCt4_EJe%jiE{`_g?f@TSA$|a7~)`CP;KEA_S
z3wwA~Y$^zY6pE7lEFQv(VDxNZwuHc`5^n+2Ax-92-c63-#tWgW#$mrVH?F0yFLJv`
zY(6XLS5jiS1S(zQylfU|TpDS=V>Bw{m`N6nJze<)dT1bTM}yKElSkLjveg;Sa0K@I
zMw>$?&ew@Jd%>d5#$Kr_$^rSjUll}0<P!f76y_}Qeh5#C{iUo)Xk0|KbP$$jl;UCJ
zNP^$!&aLuiGRc(3y(tS5=N2|#{V<Dm!L$6B6hi++2u`=eGCTiKw##nT+MdoJ&S>uA
zWuNNF%`dIwOt*P6`)2B=KLodhg~<A4NGw^GK!=(prSV-A-8;Tvk_5Q}L4c-%oLhkd
z&+T&l8(JDZ32wRHFM1wZQC2ArwtFWg$%yV2sEJQLJSgsf`fNe0AQi7z8u0sEN_&M4
zU9`!sQ@C-GBy>^A4Rbxg`zHJfY%P>Deli!X?B=PlHkzQJS)-qyOkSN88;mh<Eny-T
zklE)QO-PBpZr|Em7uk>oU=FZZ@I36$-*Q+nr-&gJ;J|@FDS5K+$Yx>0$QGFy2=m0$
zrUrwiLqZ+W`aI*L+F6^jm8sR^xh45qFA}#Zwtmdzy<__nmI#f*@BDZUTjnbLsrrm;
z>o#Ko9KnbwIE4qqi_;>SAWTJTHh_xvlgw{$J3_sGv^05Vxwg4<Ggyk_o!mLx;>p*P
zzdJcZR0ts?l{{)w%sQlTnTB~0tny=@mUspxS-^qDFxPrhT?YS)Ha8Q4d>3Xj5Z8P^
zKIGKJk~ycr$fD|g`g1o>-SX~cC#fU*$2@?S?ZV>1DpD-!eesd{PUspoqpBw2RC_ho
zKSUTR;}!+7S(Qz<xs_}2ER}F<=Lm0DJHP6xj)T<duzAVI@zhQ;H7vg!?$~#;|B`9o
zs<)=FNEdls5>i_a$ZfG<(N9v|j_G55?F(nAywau!)G{VgcF4X2r`G?*@70dGenipZ
z!LF&_{GdSH?d-ZOUiKS6Jl2_YiK@VxMks`w?K5>Rf<y7YxNOG4YKBzD$6eSb+piT|
ztJ<2GfCf1VJSShzS>LSA^3f~GK!+?Pk0Wkp*Vf5yG9;=UtG}kT9KM2lV2#HXaSkkM
z7Dr;ur>wFb7{2j^GgvsicV)vn_V&9?L;LA3YX3t){vso<OXbNuJKKwoee~d{?=|br
zgSJ;)t^9r+21(*+R<%@%KiD1h_YV4>+y=P|(39it+E3rLcc)>OuI(7o@UA|Jqa5A}
zm>H~D({IN(dJjzBnH9%fdtUN*`#R{9y}Zx*IXFxyI$>}T{YZs-GxS0^`sMIX(z$|m
zS<5I}e@F295_S@Y(#AJlg=FWCkpZ`!{(WuySp99SbuW&y{js#rfLO0&^FW%aJ?eoi
z$_*mVja7+sNKW61`}RZP!X3cj^Fw)KFU3*%oIvjyg^>MuAmovW7EVxpKGx{5zl*Zb
zD^<#J$1QM=gh3S#2Q8TEI>hs4k%`~I`<^_ie&eL~ZbzqU_3s8W72Dp~h()2hcvkz}
z3)EWTk2B@^?mfYzg)r@z{Z3)A&}3XF(g{!H-FzHa>5`}L)~(OZ$~&&nsiz=1)7RXZ
z8Q}?Fs3Uc2yg#A|IdfME{yEUL&FRFJ65JB%mkgV6DT+6ePNkz0i?$sJig%h94g_wQ
zhtRCFyYcvasf6rvx=xMkl^kty>4}Wx_|o973uu?IY6I*GR2(nl#v7OVjz+eU&hK<1
z*~&!fjIDeRh8%W(8GkaUNLKu@1VM+%-ZfdEpy3Ng8#AzN1GRopSF4#)hJ6N;d<p`P
z`<tXTjopmOJVA_r>sOzHpwo@VB7N(%@O%?-Bapm7+xF`3kDFOxqCge-8_d20as*ej
zH*b(J{e8F=><+`rvsM2JnQDOFR@t#rRT<LgfB^V@hhn}c6mPm9x(GOpel)?N9aN%i
zTY%_nR7O3&Aly*F9}_hSDl2~akILUp6;~zvK4SD}G;6O*<XC==;Gb@@*|Tbi!Esv~
z#?9GhGy*2W^f@E^K^v8sO3qn<MH2_J^124HK7jR%%XxxZZ$RMo?9D*ZJp);$bfU-k
zWOKUX_WB_J3{D+&xjv#G?CN#yiv)FEq12$kL5$RA(Y8M^wPI^0{D!=M^VHsj<H~Oi
zzmy%9jb^b<P)1sT=4Fb(zWbHjZ_VlOQ=JC>*>iHP{Wg1mnFSg<vfi`m+2;LR7$yVJ
zGe#Wp_k7fBZu?pA=xeEMa#EfBt<i9O#0vI>AwjD+RVC_JyDQwRoG&(obE6Qta7t7>
z7Ce|sveimHJtCH|>5mLg(7fQIG*;Eq=`AXs->-wzDw=S?dPm}D_iLosr)gpBZq6lU
zw{=a-%z_NX{!!gNeO_3zoq4a=Kkz0de$tG)rJ~gRsYt)EZk_o1O#~uLRQQz)wn@VP
za7>&;81-g}Pzt?V<?vN7gIf2bi^XS*ZO-bOz*EN3shoG?-SHWpD2aDeTb)!id!>v1
z!~%ygkCF@5;``rm4ZDz$?=BoVnfcP|zKd-6*Tis)DHsrb;zxu}##JALmhgT4=lv1(
z;6vrWLyuNe-94l5zTDf2*iMmIXFJ8}**gd=kw3RWXIYcv^5J(IX@c+3wx{@9p!#*e
z`##S354};w@~PS{mAN*&fi6~8JyMGsU95-|GF|&g#zx-V&vz-uGB=K+V~#s})b1@`
zDy-g#KUQn9=TYml?>r6j|DYSSuYfd55vMznx-_I5J>5A4QPO77H~vw9&Mht|!+6u6
ze^iXWFQtYHO;?rd9N=nvr>+maSH6g-GaES`QiIRst|bHPgQKrzcuw+iO@xFWF&XLS
zHcA59V}<|=Z<IpCbmymSA_K%Mi6thX)ao)HcSEJkKfr|FFgqA7e220aD-M@`s#-ZD
zlMw`~68mRbJujhm6gIg72TMj=+7<Xq^gO@L8gNq@t1)S<5f!qb?&Okd4Ma<ir+5DF
zZ+1|<u4=JXSXsDE)z3kJl2`^MVwM;D=<zHqqN^qlr$ewA3284~#}aHPwZ-`yH~z2Q
zjm)3;Pszqn>IIS${dcr{!P)e%bP5$if#~STa<9x4g<jr6Vy4ha4IYhB7E$N+oJ6-y
zp&Q;&)})X-(2;7OGV25MU8*egenEpM12M9jUdRW9MVxU(Cnci+eqI?ZdQxC(+NCL>
z(X5Bo<5i&aF#Vt{-0tn+^L`sbYJz-xsz8oP>09Y>v|Y{YW51!VGw#}DAQ3pU!{FR0
zDBVXU6zn^oD+7m4fqdV3{*=rp*5%5Cw|i}82wdPitAEq^#vQA5em^&QzSP0VD_b?0
z7e+y(JLLF#+!-5GdS|8S(Bg4P!O+;LA7@Z_7wSLhPQ5RbFDofmk?OGrQTHx%JW0HC
ze|cr>Zlg!v8^`u3jPeI~-%|{8Fuemdl>W(vL6HKZIh+6xJ6KSEL7YIw31_3eJ9FCI
zB8W10Lp;^gnG%?!CH1e5({t#-Y{;88+dm(t^oeOWg^XrMeto&lGBaJ+T8XydOgAx{
z<Zn9a5*Cgrk^9dxy-+`s3+R?^YhLGjDC+jZB)MUmILq8Bv~ntDiU!595=IkzPGTEo
zS^iP!-OtwP`yip>wypBeC?ox=T6t^<pnog0oo{hV4-wpVL-#>})OXUm5<j^T9!bgz
z#N5)6Y4axK=wfYcZ9W*<#D0D<v$)q+ZK)(Te;9Nn<2OHH^1N*sB*A1HovtS&D)P;<
zNZS`mVy>LvY^r{jau*Az3cv>toP>~RIcrsQ27y8=$xXSQs?z>C!Ex3o6UymlFvV|h
zCP4#tRC3<ZsH*qG?M*jtzZps6Q94Y*9Y+6_=N@6nfO!4_k9-=?L>(#^rWubkFjaB+
z;?EvWGFT8Xu+`@r&%%iw;U}kVp=~#2Fh5(g%XG1d<G5+e%GSgTQcRoyEa2-Joyig!
zE=Ovoc}f~#n^Wc}C+zxe`(E_rx%iguf6k$Zk#!A*M^>SU4>AnhS&+82=3MCed8slt
zG^dmBuvCS5+gHnl$~8N(vAh|kiDvB|b#Fh`<~TKgc<d4+TVp$=2Voyi+?BVt8WY4a
zIJq&qUJEfs5^qmt2?~1Db#LD!GOlf{RV7amM3p;29oR9&W_^;LW)H?Uj;pZM;(}>;
z)#~4P^`)84>CpRWXA44qa2AG2Vi-bN*XDH_Y{}i}yE|@(U80T4yt8&pA+&iKH6m_e
zzb?oX05=U|F_;mns5cPIc+d6a`lx4rs>zA6px5w!3h)xZxL;TU%th1NW8BZz>uYXC
zG0NFike#a6dVQ0Wf(tZ;>LO3{><Ug$Ioi5|xfPR2Y)ooRhd)9_E))cZ!qj+AYfiDJ
z9+&TnF8)z%g^q*uygMSLVx#1ngx0zOByQL%eE&3ir-Pk2)tum-;ZZILrzVCs4Y~aU
zCx!H2iyiYNl7Pz|?DpFm!9>T&@CAe5hh5p>Siz7zq$|D6MzF^2s&2wl{A2_WeGfQ)
zzG44b7C;#mdc^pIFU@<54g>ZUZ^L6EYoWDH1M@$Fb%#6jK_+Ksj%M5eF;O?)p2)yn
z<TfrXJkn@e(sS(8uldrzlI^6`FYn5hK=jBz={2r(W%WyV#{2wM?QmvOmyFevU&iP?
zy9FDuPxZGRKxf&5!Ss#JW?ZfhVH-9NPhFq9`ug?lF27BWQ>^IKRW5$VJ%mC8?iP?7
z$HJl4YyNiG8PeXsb#)QPs#VW-?Mu>ndt#p-lJ8}m+e`lNi6@XsK&b$(ef#b1l_(i#
z%<oV}d<FHjt`By>#JW#y<oHzD3wWoXTO<}MI6W|(BgrFKW?r4g#=-{RgYHwZ91Oqo
z{!!KOMYYVR8vy#L!kl&-?lA0QjU<EK7qhP!wnM2xWg7)Qx5+j^E2#CdqlJQ@*&G8*
zg1yj;N<CMMQ^9M_O)281t~8k^V-4XOuE#|uDtL!f{wBSULHC@f&WnfrUF+%V@~*Q5
zo*?2Hmte@ikrq@@x{mR{F(0eeo|4%jgPN0+nk40o`Z)-@vxM;ED>>X#%PYwB+AD9G
zYM)aDX>JPH<a85fc<-TAulF4A<AV1uJfxcEtUcHE36VE!PXi7BnjPFT2h96B9LP^C
zOy!1usz-V*UyYERm`4qkGi<yuS|66rHpKbCUV*2nQcaH=((O9pl~8>tXr$<$r0+OG
zg-kuToMC_9TYqk_=)LBp&cm6w2F_w~cJSM|j+GK7xgG<&LgP^naQDMwf2(1hN4b15
zRidCpxoy>(XXc@zDS|G1%9GH?6HlE)CNxqX94IJMEMUu&iyC?E0WA15(EoFE77k6m
zZx>fYL;;m95v03AQc)O;lrG7Q9!!u<5fq6bY>Y{&qZu7TT0oj1FnYp}?oNsCyWjgK
z?Adld*L|IHJ}2^k$XmA*P+C;;c_0SeGv2bDp0_RF%x1haH5jJ|P-z+%SZPf!D}`HC
zarv`TwsrkJ?4)6}`{VanrIjl8=>wCz*AvC7o9Pj$0s=a)2H7F{OUd+Co3)XYt~vI)
z#a175uX;BwA9SIN_#aIW>(^0hIuc5v7SXBS$KH2`E7I5&o?CST|Anf5EQ-aj{ED_q
z``89qY2CD9ra0hXqMYgRY@UPOjsSl8eiu?P{?7&TKmf3wAgmxNUymdC&u}B3asjT!
z=6+*M1h36j`H}j>XH(z@G8)s|Ucs4ui2!|*+^?9R%<S3bRn34cJhX2(jyEv`Wy813
z?lzc{scut%Dfs!%p*8aBmHhVg0&h8eHB4^qY*n-jtSAU*dK}{JRZEN(EcCXe2g<?0
z-E;tdw_T!aCU$Tl&#X*4dDkn`i?9hlDUdVBxtkv!(<Rtl@uI}aN|Fxl&wHz3g*0s6
z6}(Jr>C3N%7B7N4TlGShrd)gbO*I$^4c&~P3}w?ssDTtZU1#jx3MR|XSYM>Zl&^C5
zFv15|SZ$bFD*JvsF~Csme1ym#cA0Ukk*btG02u=fdusZFIQ1YF+ji5bQv%*;QL)Q~
zeo)*B@r)ffO!758H{Yx~>y#dluI)-YeWL7s|4evPcswz9w48{w?DH+-N-C=Tr22U~
zgT%8vQ2X?$-T&Ed-Ys?<D@yzW5p-MkCNQZOpzx$+D-tVYS%ks1RUyo5w`N!1Yc&Dw
zivG&`yXhl7WYXvd5kiHV37+7q+Bu3V4fl9<B5Tu60X>59sNJOhd!!?5LMH$NNa)-N
zaBK~^pOGotg70RGDl|1CFigF!h|5ES@|CFPE2vgM`Ype+s(tZ3G=#Q_=k#qsL#<z6
z<vW_)#FfD7>-~1cR`nEi>ojt?Q~>}?h)=xP%hUv+6}2N5ktgBsw0p8!URb~UtB)BS
z(!9wxwxiw5OFiSvW-}f2OVdDT5LNi2xjv%E&w>oBlUEUAkok)$crxm{N2YxLCV8tK
z4;xBH%$EQ3GZkn&h0Y&ikoOGYnZPQ05D)Lx(Q*%zQGxzXLo2u}Unb45ZG4)VR;9T{
zB?Q)I(DzN#rAQv`b3Fv~wyq1T*8$456LF>@8eIx5v$lHG7u>kY?&UBF;{s<^)YOe+
z=vVzUnnrn$%Lgb7_IJ-33-UY(fsZ1IkP6xN`HymS=#d_YI?{u$W`e8dZ?eDzIlsRT
z1#cMF(s4_%(qtv=xXVS{Zg$|_L~4S<&;PqI;Zc*45ZtyE!0+L8OM5bF)ijDU(i#jv
zG+LkH!qEhe<UXErM}}5k^Vu4mypkb~Ak*V6tG8N=S&U})boG8k2Ti6vmH{69qqd{{
zcDr{01i6SD5<8wJ1XKFqtdVxl%J19bL186BEsBPPJQ(~&KYe1Irf71Ov3Izy7q&T4
znx^~rOk_pdrs0*ze>X@9>W2uLGYJwZ<I3j@wHSHx?1A3;<pbS?f<lc^z}2!5RVeG1
z5W6fJAJf;4jnC-wo%ynD8JS2=bq1dA3P`mU<QsiYzdSc|J(*$pPZ)P(URcBiPAt2>
z<2;e)g0U77Hu?bMz@6d*mQ0Jn_1Y5{PFIN(`|fNd=F`>!f{o832p99?YLF@hxMUST
zPvf)%=d+qApYK&4<DVL?UBSRJ^f<E)!r=L*i|3i%q8JGmeR_(sNivd0?~%Wk&@E&^
zZ?Lo+zV^+-6%epWd6d6we$*3ri$R(vO`1yRSu)h`6+*X#8}j?-+^Z#~`taS;K2e`s
zqgffh`GgWZfFU%JAlj&n55Q~otD&XRH8SQZ!bUwI@YlhzrYt>$I!W^%<}4=xqrS-T
z|8C4XS+NQ&#z*|<FaP&QEPfbKqkm8Uok_G^rcP&QuU7eP>VQVbxi<z0#z`W#7<4q9
z1GfQHAi*u#`M=5wX}(^6vG$llddv>v`e_%5tJo`?beBX5Phj)zDA~uDr?iMT$FxaH
z6Y9TiX4HLe^Qt~-6t+tjTgo2bMlLhnWiCztwmb^2{BEglnJw?5$-Cv#Z7}g66!4s`
zs0VL{c?)bB(iefIyg>@a=fL%?kQUC3xwNk2peG`4huj@6*$|$0zAi-2aR4n0@j?vW
zFZUMvO9xwIe=}R;j;~P*kiUd1cjp+Wu=Neee_XlvQ|&QvS94^d9dAzY6<Tf<o^}`C
zRw8G|QDZ$)1_po7_+)X=BL+6J>~=xEgA!ZKyi1J>;mGrkLzg5Fqufj8cxT?|g!1-Q
zk2voXG$nd>;A*cV0Ea!oD#P(jn#$YpK~<KP;)Pz;7Q3lI@g>u5AgoRM2I8!>&?vv&
z1|pLQDUmnwBmau;XR7LxqV1sBt@TnQeYmKHcxPCH+gh5WsW+DtXPA(j+Zr6oGqI^k
zj?9?t4LW^GJR<7jgm9p%-=D8B*ZG`i!;z!IXD2GcC1tH|*N-BE`uYgL)VOzIi^Dbo
zId8yN$iP7RH;+Bf8`C1&?;_NF=V-VFlhvS!l=2FJ9fP%029|3h3Nw3L&7U|SY<rC0
z*`~koa>?e03hHMHtu$~d@w4_iOD>!8x5^K@YLCeK%1#Gs6<nG`sQqr7mrDFZoBwy?
z^nHGsto|+n%)+OqE>Fl3ACF3F7@R7lkMM;=4H7k?VNDA&fcLwx(o?S_K;kjt4IS-x
zYGuCpu><hllM|x!^YKGu^@ElRFu;Uc?&?qsK~o{j4GrJ#B6~@MkSiaO7&<JAY-noS
z1Z6)<vK={yf<L@^?MtLyh|HAT$vjun-|y0ke(FIgH!Caq%6eZ(5Xq<xmY%nK_?h@O
zO+AJ6^e2!+4P;~+%)D`to}Hq|#9Bcn&&tL>DZ)n85qHUc+?(>|ETYZ4)-V6Kq>IEA
zO>sN+BLy=ybR$Z8bI1;BBhp){Ye?~v2eIorJtiww6KbhEl{M7>K8*(sL&fIrGJ{?R
zs2`Dp@v1wwH0Gz-rNKyI*e9m5z7)kw*{J)zj?>a9bYr)O6qj);8<!AJy+Pjjc24lW
zb*Cq;pNl8Hnpq8O=eX++hz@|?ez~Ri3K5uoPyU6}DnswT%?fv|;oDTeEIT`S|26-v
zOhpZi)r5=dhNn6$x!H`omhN<?hJ{}h1x<=s4LkS^&4mP8KW&9R?N->f;l+EI8&sXe
z0my+SEmxe7nPa{?J|PU*`O_np>Z4l4qn6F$leU{mJrjm*ynpiiV|y;8MPG=GKwV{9
zESuR4<wj-}3evmiBRFarGlDc`MIl;~2g_rDQ$^QPSJN7q-dhkphQ_EW+r6D(0KV46
z8ZFINHTc6CC)|>uS4Rejrw1#vYt159OcB3NW&L;oynJ<M9YTH{ulCcpX3pI_PJ9eb
zobws=$O62-jc!M_T0&3RDttv;Bt&`(R)A`W<BnUy@&D=`%l55?rE!k{^ILmt4&pkV
zqv}rD9*cHqs>Zvwk~g8{A+I8l>Bf)pbzLYmN@i`Fws$jvk=%p1?@#?bUXc_i$ZJzQ
z!J<Nq<C{}%;nFpc#0L$tZgiehuB>6qoxPK4U%F4xnDwlDN(C=A^U607w<gw67;sHb
zE9lxvu3uE{;!*zIGe%mZ2foJUss>fO)_(+rEi0*vuK)V)hI;}K()N4#OW3zexVYu_
zu@_uLLDy~jv5$4|Bg)<#JYtgUwPtUhxplvWDS1KsWi41{Rj92KUl=8|<k+@k>}H#|
zTZ`aio)`Ho;l}Jr{;k+7yP14XuDBb9YBe9N9n337Gm2=Jgm#Zs>Z?>w+%YS4aYx<Q
z^>(kb6dbD@aD?XJIZjRwqwQG5t-$~q+Mw1Qrh1L9Ow+ze-&x-)Y-c1C%SR`Mf~AVd
z`S)=a1UccB&rNY7sZm@OFvADohgAIgRy4-mbDS;br~P*L%z`}&!$PAm%iZ1VaJhlp
zFtae!T||P@DP*;CJF~Dxgyp5C(Y+M<R@w=Y+a3Dg7K=1$Aj{fr_XsC0qNO+!fU{85
zn^72j#Q9oG!o4tiJ4ND4m*A&86;8+;lEJt{urPoV=;E#rEuB~VCH15AxRck+LBDLV
ze^fD$UpA%XBGhS*Glj=-gH`$FbRW*6DCgZ+Y;kTv(7*%r0(paw?2%?0&nG86F#4vK
zei0-st)_Y-UVIJO{n+Yh3DyJD_=-XNIKIFzNd98?1+-Pj$Dso2(rNa%F!4<{hI`#s
z8ihH7hV=&zdpR-;qIsWPZdY&gh{3BL6Sf=bT*|!OoBxCd1ypT;C!>swoD26#n%sj$
zZYPovs?F_FXX>A0q%Kq82)c3ElO;*(p*Z<HLk7P66Y2bkI~?eqAks7Omh$n@%M7PK
zWcufpbDTajQY;cAD*{*yvr4S^R?;cc@U4Wcb=o*i03&5I6{OI`r#>JYCH%O;#AZTE
zyN%o6XZ81OOJgbXJ@=+HXo6~0g6`~#%TyBMeLb#W_kDP^Px+ubB9J(~71Uii_kPh;
z9_8B;2%b}9_D~S`Y0o-Tlg)G^%IFFIW?LcwVbCbx?p7onYtTpC<kb*e&?T`BMwQ^-
zdIl1i=QHy)Lkvxq!6}?!kD3`<c=?O<6E(+)L+<57r*fSwO#JRfL>ty+Ys}Uy<qNxx
z#cvN8gZkkB&k)yu3dfe_zicHS)1CO(SJ2N+_5=aOXWp=<jT6+$Rj`k_LFpxn2S61R
z8b@+Mu2PnclH*PL1=zeLj^21GGGv$=(VI!}6}3W-Z~!>dKdt+v{Qba!OX4$v;_rfy
ze0|=xN~RMto^Rj%bFrv%j1{+h(Jcin#6eoJI!;wGyphuofA;7Lxs11g+K7Yq)CK(-
z({F7SuF#|2C#LWHRbSG5*tx2XF9aZJ(Uqow@NPM5J8mBTxo-%wLys@cBH_wkH-u?|
z*Aw>v=OHmMUq;R`!J(j@$MezKkxT2}Qtb)`cRoZaoOHECd+ep*rkx~&b^;|k{Mtq8
zm&S#BI7H>kiFyQ6oadjZaS=fR29M&yi(}AmGFmbVr?b;-WZg$8%QK~q<*6|xi=muv
zZ(BrNfIBVw7kP<2k>ws09BhN*>@Qi@vp{bYpyz+%J6THQy|dnfEvHa5J#DS-8`D@!
zJJ_$+sVI{%!6MDb5Ucj1*RL?N_1vC2)pO=TEEW6`|4`6AEbDbRR#;zLR7)2fpM>)|
z#Q~b>?8iA+<M_xQ6hbOkWD!%*O0@bVm!T^pox7gpYVO`BFx82F(-d-_)yz3|$64o`
z1Jl&%fi4aZ0stkC+mHG{U73@uy7I8?aSm_a1jU|+*`7xN0Iw3Pd9e&w-(%pUtB+I{
zlK-4qmd;j0qv`RZz-WCA+3s|u0hNO}+E%mb=5w)_p2JT)iKC}3@p82=%C7j0WC48S
zW@1aKH&KFOt`?j7Bl~jkRF~pB_+C!_B&g1O3Y(t`EU2m}RRY68vI)5k?X4fzndr;P
z)8*B|R=y!@`ckW^Lt+7-(G9=8f$ZI{<qW$@vJ1=RK_ms>V-@UD<#-xm2TD)1zDklg
z%AQfl5U{pP2@7h^uc0xKgcOVuh@xfpCCxkn;yekhidJm8PJYX>Ff)~lMSu3Kz-n}i
zr<RJ%$I#(P7{?Q%py^eM`^x4*Y&{NT2J_DKDJ4N{+#`$OinR?q{!`t29*uM@e=1wh
zwSH4DGn*EL4;;qgQUwz|;W<(*B}#yu1fBqR$tsb!nPyc)3SHrqQ{3wo4z|{Neh!6;
z$7-a)O%`akkKfK7wHfvu=6ud1HE<MiqGLJr{}9j4T@#8G4fIy|jS-EjV?e&>y~C^)
zpdyX{UUm_VdMnmg>2T#c&F4^G^=?=VD!pe;wMz4W)@jH2KyIW`-;bA&e(M=zAY9Uh
zFUY4vw)JJid78TTxzd&>GuHO5QU;Og`{t7%M`T7*(+cD!;bO&x+XAcM9rQD9`~CRC
zXRph#)>6AMUn(QgKfKfX5Vi~g7w>+UhfFxbs-z1cxNlFIUfYq2pA62T<*8vq$LUpk
zug%ilpGj6%5_&e1A(J06zU@XxQvBny86+@}dIG?#@-Il#84aBYK6B;-H<-6`wd5&@
zt^EEH<Odk6e%|Du-m(PfOSZFz%l!p2TQT<`4-s7Ttz?kQYHG^KWcDX_PWpc;v`1Aw
z{6?fuFG*N^BD1qpsnP6=rLY@v)p#(;O-)O7*57;6_Ji+$oNEN?=cS?5k51r<jmzS?
z{t;71V`R+7EAO%k3jfb|i48~B!ot1C2cwrQf<#o<MvI`9HhoQbw;yOq@X=pLoCP<<
zo`!BTTf5@TtO2G6tPz@I+f5%xg>(A|VYHoOle9}EWb|(TLki<tL@E9>8~UE{vo$8a
zMa=BtQ^O5oyJ5)wamcJe!6EX(HO(%s5bHdjFnA4wPwa3Q-;;4~LVy60dePjEr)hJC
zbiv`%fVUr)a|2x_FAuYyHfljmUT(1TKe=<LW=Bd<iUgEC^6_+G+qf%CL6ErI{j#t&
zW|BnmQ}_hfk*+NwX(%n>u*}HcQ_+QGFve-r+ir#S<!moO0(pGK_cEGz!fq12fX-X@
z_NORWC)Heg2#zHII8vvooMj!x`I`aWu89H4R2VKaUfr@g2uaOdBq+668x<9z^=-Dr
zI6~~>1~mOcob>BM{0<|_p~rdoZ>?DigMyU4GLNJ!eL|q#x+**<iVk)v)OEF}xJ!ou
zPI|F#K~~-B#BoAG?jx?6Trd82`eD7L2WnJC=|q2x7bb-M?Wy+C;u}~V{a~R`8G5mF
zS)bgPSM9E2Ea%U^-ctIlGu{B!opT=&knyScLBw0re|%aR<MbvhYd4Or_JP|0&VoIv
zapSCL8J0ua3dHH)e$PBzEl~r0%Y8u0ar@d_$Mi_wT$%NSNi152Y5lFGJ}XcGp8oP6
z+aobE5q|pL4f&4ZRvVJMlU3{xSK^ua&h}*Pz!bsW5B=V7qX*=1P1*oh>w`@(-(i~3
z10fbt-4ib+v$9<8!_CtG7jK8WTBn+c+)8b?hez4Kw;Yu3`+hCR&ye8^nC@AKq{+_X
zd-(KZ$Z(xtH{luYy(=JI*m3#Vcwy2{Z&?ChmU3a=<oXp@t6dfdsT%~E@7-Pa$KU8X
z|9r{<EPZOrW8EQQq{xKleSGuKO0Y3csIK`7tgp^!MG^VoqpJw@=&%&s`fOyHpZ9Q=
zBDoWXzNo)ylBAa^b$Kys{I%7SpNIdz8S1)vc%(*FUXzE9m%Auy-EfTn6-X$-6?BIo
zUBvsNjPqKdt25!at1bX0z<8$QS3C2|S?p*!G!Vy^%NDVau^t2JY*S$!7m@2pOVln&
z9wEGMUVQDWhwqYADmo7^YP9RY8UdH;Q@y_ng}8Eo>h^6MnR}=z!?KO)41@|)LcDDc
zjTiPDgY+0h>3$nKm$3&Uwb4J2=(#`hyKZ#qzKpdc_f~n4B_bqF<gBs9p+X7GxsSe0
zu26SzW>5^8&BT9Iq9wsy>2aA*VC(jY6p&9FVGgy0HlFQ&5J*9ew7g27CE~89F;IT?
zs^S;4K}@LbM+iT6<xZE>Ex+F5Tdi^<IjToYP$}{Amg<g9MjuW36tZWT+X#2%>XY$z
z1BSQzzn=HG{;9R*8~|erJ+@c*D+Go6S+|W=tM?JW1oOCYTQws)PbLgk1&ZIxDEs>l
zsC`o`Q;I9Z0AK`YD9vXc_M~K$yquW(LnMkSfvj@H<7hwAc^vA@cp+MU!n9*f3X^;h
zJIcIB2itP7z}b}p?*pNg^js0N$qThJVSD$8+p0pWwhh(^eI<1W9olr?g7OQy`}#3v
zx|CM__cWB1oq<((L^@VLD6iRG;Au}O%qUQd&0o`C_r9Tcddc>ed#pYsard2rq<7Xv
zUmZW8_4J<3x6npDaTXd;lqx?S5)!TT!gR<f6^Vt~-Cc+l?=f<ePmphK>|nqHj<s)N
zpx)<=@d2LkQ|QL%@U_tJ8ljS2YnK+O%W6$h2tk3_Yghr&Q(UMcd)PK{VAVArF*Q8q
z?1Oio+iO*Ho;Yw(CtF?*ppPtf=ZlQ?(09{x$^!^Bf{Xv6>XBHFu%v!y-%53D^6~p_
zHrZKtpyAVQ^DY6eSILR~2FeA50~d>Tr?%4ZOO(G9Y8acDvOlIXK6#P7n>bn+WhjRJ
z(go`-*PJ|VU!CdRDRVd+*G9<aV$W9twi)aj*9P<R*L@6Kk8oN})jQ?gI=^g-RS`ov
zz!KT2_bD%iN4K6j36c5Oa7(|pO_|$l^$glPkJg2(-#}fYY(O9Zq%W+%xZHeXR4VNo
z^4Fc{H~my_QmJ19w=-?ptXvCHB<W167WZNfJb<ShZ?>@~Fzn7G!2?arXcIQ>vKvj~
zoZyZ^aw#$VF`qs)lg<i6!jFuCw0j9pN4Q2+FaLgOO6u#9JCOQp@Mf1`+Wr}lX=dWX
zTd1bA!wnr%qlIdN;ew1tX{DD%QYfeoPH?PDc?-0okIv<gc3J;XaWXg{)q9b{d3HZ5
z#4FFrLuGP1o2Ot}Ak&s5Gi14qbmq^r6r{aL8g^#;_)!xwc{DiT{bI*d^(kkHLfS<n
zll#RiLT4v21PmvxT~GV`_+wPZDw?X1C@x>t8NT`0P><kikP`#z@ukuTCT^k_q7egY
zqb%R+wEX0IQmyiK<m1=l+s^Dt$_)$LH*b;`+x6r1J2niCr#d;uyyUM+Lww0e;OA`Y
zP`ot6Y`jV+^<DCKQ@}t%t)lL203Ch<PP+wSww`JXQSxHp`h)Vin3#^eBPv<=>@C02
zcRkoLsqVO2a19Rk3*hp<RlM~%LB(+=9!=sllJU+11Tg6miTKU}FZmNwB|Y6y4V2hE
zat>xaGUBH@!lr!~wwsag`v+GOOWKCVra$SQ?Uid|3KOG7C+uWiPuKZqnK6H_eI9Of
z0JW`P^u>C94FBcd%APw3_va~yFl3EW_o+b~Erjh<wEMR{ekOxqG76w=x}F~LDU43M
zQq&xM+C7{!vRxnMR3x241=|?au0I{%J^cRc>ClrF#zk?Mn6YTC*>2c<_F>++cPu5d
z6Mp5^N}eFfjT#ir=eXM-L(bIihPOuGpWfM~b|YqA)z)))7~g>N2B?y@u^ELZG4+#A
zDD(?sUzkxULXO$R@QjCxoNJ+o>p5)&|02f&#U4L6<~*hhzf%~BAn|kC=J$qtH-K3L
z3$+r{5~XK%HbM}`;8g`Lcm<?_La)_#*owKu&Q%>?cc$WC{LQUUNIHn3dBygG6+CcD
zV|EE}B6-dYl|0yfC+{33ciJ2GdH_s~p{mv(X4w_rdrqWab^Bs|{Z=R{3nze{yIIxo
zE2C62N3Zan(-&JrLwXZO#?Tu#L8d`9awN%_$8mK?Ijlg+Sfz*$O@Y2w)_ho{V0ci2
zTzVNYm~3a*ev7XiO;LJ>yYeSQLLf@|Z;(u7)6bF0T8#n?kGUH8t;}!X2PCF+MLQem
z&Mx-QC%U|MYL*s7q2Bu!K5PUqRS$k>A4v8H`~1q+lZ<J(TJFN}IScI%A~T%o5v5pv
z?_|sTc(UDbluTd`T7fn5q0J&U|6uy<6-c78bHL5$DfD;zhCdyt?9dP^yBq_2ob&Z>
zB>h<AAMM9G=M!o%b9-8ag=ZF>ornD<kuR0C))%lAi1GWMbqh#c4W9|hg564FNQJKX
z<Y&Lhs9EWnmNfP)QYa?VA&E=9k0s@r>66JOoM{z;r5`}lDANYZXtEl=(vGZ01ViVE
ziiYEf@*ST!E%xs!`=K=s4s)!oG}la2YBqGdi-8tIl^t&@7a*yTB$}(9jM|*?O%qpp
zN)?a8G<9(Qf!-C}uXMkpj_u&uVxw`OX5(~x!z(Z7$YG|t5vdD?lvYm!-_P}goA>Qz
zoqkNTRXOpEVAs<s9wW2Uy*lK~@3Rh)U0VM5VUh)2=EIiDx2}_6oBk%O3@yM!JI-jO
z{0P)uD(P)(+S?$d|KfuQZsKiVo~xO*L6Jta?8#?^m3?F(WUf~7JY&;XE%?G&QK0^t
zF0Ap}I-L7o0EtX*-Q|eKQ|y>eSG@kHR6E`1MZdl%5c|?VQ9Ipl8LcY9Azi2JP^@zx
z*T?I(?td+i%Z6~+<@3b4hrr~z(qS|DIiJD;?ZlI#iUXUs|GObMN}9mty(OqO@n_fX
z8<zzo3V%nuMgRNMXTm4`U^&~MP0N6k&)60z85+*ACsY|6RR_urdCWekZ^O6hH@3&)
zDvLHI#d}e$(?x~dIKT-nTJt>AO~;TE&tL6QlafM~u7>*B)_T`>xFjf4#88J@u+H2k
zz9r52r;Z410`2NsClZ4Z79U<in_yD)($@yOtCwj=lL-u|?OvJ<?CLleEZxfH9}G$0
z$m?&zReFi#>d*397)VD!eqUx+c%SjZ9r6f6ZJFeEH?nL89uOYu!q`lcz#t)0!sh_P
z$X(<**%{@qiI9nYC>RbgF8wxw-_VTaP_LVFGBPOrF<bi<_4kVPIZ#!32DUrX<f^d7
zzwwmKC+)L@$-Gik1em@^GvHa;+poDIMQil=T1*_;A%%D-5$n5f^~`g!ub#iUiTj2q
zSTuApjPTjT=PLfZQT)gqu<P;m(OYMOhDSeBAjVgQC%7L+;udNC!#cnb`Gwv4`l2Lr
z)E|zzUunG_+&zJz^2Em?y&G$CJ4uDOpgz%IIqgmM%<}UcMu3!;!XB+BdYOZN8=r0M
zWmWzf!$O~VnU~$wIEV?6vjwu5>4ChMwreth0tkuM`nj(@o9dM8*DJ<sYD`EvNrQ@8
zlxHI|!AO<#-vsNsR<zFS)kNrKriXL~@1_gEtb4$thpBXu=_>aJNpjg%Xg?P=JmCKo
zQx&Z2sP773Xl4z6VyZpY)F2`cH^)&y*otNc{(;Q22T-7b(;VDcpHhc_0dgL09#7RR
zoCXt|xc#Zr1o(PlT7DDYi%iuxil+S|8Bn91eM>{vT~tQj&PY_0b*wtS9+lZq?BFvc
zMK<n%q^Z^~x$SKFzIGaWY{{36n&)~zXSzy|%cPhX)VSNX5e~)P+p=-LWP7d)iU{0I
zvLh8DdoX3RO2*qVTDzyZkG&X}?a#DQY~H-3Zf(+fEp4Rq_xdlOojc}YB^O}h&PxYW
z%OxSe({;aF=bC`HE&j=_WRkiAaelDE@aMAoLDoq$1sGTcFfzMjX_54DhC_ei4LKuD
z;h$f?`#B8_aL+Mq5RZ<ATNXY*3PPr0R<n%P#IJGwnPQ7emnz3*YBUagQ9rW>?3zcX
zy_b4EEwP<o(YB{B@SR1wSb4lK?O>r##mIP^2e>qct$?uMQf)=IqE|FC>t(6plk!OW
zC1W|7-PZ*Y&d5g$e;io^j*R+KZNLZ}BKXf=+NhBDw)?;Xm(&V0nt1~`?eg<4A9&eH
z(*yQ!Sj-5@?%dTp%L=P^c~T66qeOETdA}kIgdl@_-%{=is@4F@WGplF%HA76&2wmq
z2lFbo#t94keA0_7J>zVTUr(?N#Y=h3&PR%;Fx%#=IRBji1Q#=+MLZpUoYu(a#w(a+
zShMuoXGjv8aE@}84u}$KMgQ+C11kpZnv;0Ne))ACcLDaehl$Rcbt$GChhl#BG6GDL
zQ-s+eSJI0SV|`jlaf!UZMCF6>)9V17JK^5J8mKHK@8!p4P<OTe=qY#aua9u`&|A@`
zx>V}7gKR{Kjsx|NfmA3t0;!NZ)#zxvgiN={*`m7q6vZCNUkrHGpk*M%8dkYq|Lk?X
zk3X1zI6-oO3_4rjfA@m=4yN<Qmzh~5%$35Wmg^Bi_M>!`-?T@#r4RwTmG*~2sID5D
zX=_7MSM}HBQ$V$kBx#zB_tU4l_2q}8hVukZPq#ILz;9{!)~O9d0meBR1%^MdhQ_oB
z4$5U-K?lZN<YJ9Z05VNy?%(~B#5?X*<@${6JWEHr^$pR>3>i$LFn$lauarpzsYLmm
zJ$dv&RAQaSxbTt?aW}Knq+R7NGntWouKO%;twF()V&60gC1h(L7x35+(%-ETuUn@{
zUu>aWTiv|ar4q}Ko5JicjVY<8%xpRtNqJznw`D$=%@6|85uu17Z+h{w_371VciAKW
zu~pP9v=KX?pS{s~Ccl`f8+p5m<x{G?Yu4z{s|spvK47u<&VM&r6%u5}uL=Qk={|`8
zs|d<gL!P98Ou$!^CbK5j*#H%P?Rs@CB~tK@>2WYlbMcRaB8Y`V^MH&dbv=4EN!g!0
zg(rf*N-+<0%j$6$57HtXmd9IrOx7Cz_I+a=+_P;@hHng(v_BJ@HB%yAaiZ)VMW^Xy
zj{NBKI<=T<<f+aW)qdkjCXm`Xjn-?;E=$(NXs)X06yf=Rw<@vr-}U&lRfc7)m-4Ta
z&f+tuZgIE)3jO@Mjo0><)JLf3uf^*&Eu7CVd$Zo}FMIj68TW&6fb{mRuBFs?fb8y%
zIodvOHP5S+dNJJinz?Oiw~W7#CQZ;k2c&~>p=O6DdgJqobtyjB)XTg1{`-)?MTfo_
z*VpI;v*LQHU4%uAk73PoDi`rf3R6OEomx_2QdR^6{R-*2a`M~a`<m`V&?KVo_`e%g
zoZpVxjBP^f3?(ee)Y^<T{Mqe?Ra_^5e7aimEs=JSCL3)QW5KQpCw<KhOLRJeGh0J;
zcWrvOnXzm?;pferj+J&hO6KHE*!k|Fq01k41?P5L`aa{g5q)r*W?8qdem`qHBc`P>
ze2~F<M~iAGaTl3aDXTNrm)<&sO$-hktatF-h5J3PV0DOFl?mdiZ}{;2l59DEdgAmg
zE4R&l!gSfKt`&1%uyW*UG0UU-1jli@yJ~7@jAsHOvJWQtw?rkRn(vlZcc;^qoVQm?
zMfi%|I2laIk({ay@<m%yH2BQL5C^Ea<@Tdk->iLV3uAsSck0^(CPj1|!H<^1DPW<F
zFAR3Wm?)>D<W8C+C?&Y$F8)^2Y*O&o?55n>e#idgcDeM8URV@6_X8}sC-EzRo8tK#
zDPTes2EO01R!=3G9TNFqax=mJeuc#e0!jJV^$5l>^-X`-$0TuAE&HVR*stxunRm{X
zT&puFQIgBDQkuTb!-8aqnwLQX-4eG^^U*mz_i^b})L4zaQe|$7(B8z|6uQ5D&q+Oa
zy^?*bdNCjOAaQ}q9dRjY6@~?`SF|?fmvXUMsNyq}fxO>h*%kHS?+v;OU#-t&``HoY
zgS9!-hXdoOSi!%+U6qij8$$5u+q=kk>c^$G+pBnc?~uux13G$1ttP<cNb(3c4+Gpb
zAiUwf8*~0aov!XC{j6O<L@z=g2vU#Gqv?KP`at5OUmjnz%x>%n;7q?uYevPp#R=($
zIWo$QCobIHXj<V(v}tss57Z_1@Gy6ua^KC^=CSvFHBmBEA8HWlu$TjQ+5$gnG_Kbi
z320KAMv>3m)slw^_OpG>tTg?+osP<7H=7=^bobNh|7xJr<BXP)KeRa^3TgJPWm*a>
z_ray9?%u+k4ZVHsk1+0OS^Io?%cFj~tv-to-xLBc!Sd44$T4TWRcVlb9-dymQW^!Z
zY5{%#Sqde>UhA!xj37*VGw*NgXPsMPy$N$}NW`1OpNET#;wQARO>nIU)GvQ0uQB9E
zQ}=Y+*fCE`7sz91<nm>)g<8(H3Zs^114^A6)}7P}dPjcO&+^Cl)*NN?^$#oZH(!MQ
z?903A$z7p~K0r?Cl!t|``i;$6YR<uYlpNTbiQ)V3Vk0@;{gIa(w;N5IBSGY4vEmZ^
zN^<P}BphP=nM_zs%so5FID~MKyl(7$eX1n(Z@k(dQ}6JA?F>TQ<z+SxxgTnohjJi(
z5z=AR2NYg-o9L|iLRo=565@yVC`IV!#qjwHsZBU4t#u-r>BY0)#$%28<E1i>MA_iD
z0GC`dmVfbrq@@e45!BNzUaX}7-LyTV+WBe5Kb7pEZ>FN++^51;i<B|ob!6!$k$4`f
zA0iC&e)CS;w>5};Y37!6BK>$e{htvW%RE4RaI8k*qe}PVkj_S8WIro^O{VOibGmQs
zQVYnS-CUTsv@pN^Wzmyd<Q}{&11~*WqOlz8;-rbXc!%WA-ldrFAJLrYC8Jzt`Rd9$
zPDJSUsd9uk`z_p?Jsb+*(S3fn;~cFz{UB1s1zentt#vmYW7W7<pJlQjMD76iZcGSN
z5RyNCf*4h&O)OVk8z%^vOxylN(d2mdOQtp`3w!eBGU+)rosdEUi6*tKqIoC=j|$^V
zew!fK<@etyCzey*G)Z~bllObpjVf~t8e8aP<iE>acN7vDnC;?YO3#Z)@lmsDZ#?b%
z9AG}j@<)y>`)K|k`<A*M+()62t?}Q&x(d?N*PWhDLd=6G$MW6dsyA1=){gMk%kkFm
zaF@o*xr5VSwr0dHpaf5-bD;}8?p~tRh-!Uzjh=rrU=8yf^O}t>8876+Ev;(tdV$#c
z6rC+(9KTIqb~j6Jrh^lK1t($y$R|)H&r$J%P(|N$HvQ&TyJdDcy7z`loxjA3)*pPS
z_n1-$(u`y-^j_Tk$c~1sw<NNdQI`e}5^61%;b2|+{OXS_HM$9%%=1W)u+s#FDE^cU
z>!Orjz&E)YZ(F0Y7>l+2K-pb?vFs%y`_svvSH(B(Vjj6PW)e=tWg4$_$Wh-{FaRUH
zyEZ>&{NcGeI1@-jzF@c!diu|Wexb+-ISlv$C;kki9*cM*ajRB;I7~G$!suyXD!Csv
zEoq%7=aZ>{FqI0Iy35Cjx(Hoft{Tiv<%+4$^&WCXgihH%9;`|!*DVpd4MIgFtg}hp
z5MYW$#PWZ@IR8qK*GR6ZXPL?K77DrxNf9deEP=o6zBB+V<W-6q=P(cWE9F03(yYpw
zgGq{eT<-amrBvC4aqb?nXux;9nLXDd2W!^#F=cFSgw#nE{v-^;+Z8}rFW|W<Cr=Qk
zQDJ5&jpL1NJ^sgP$D03PiEkX_%fo#l;x=IvzhJotU%nk@vq2X7RT;dL*8aMTXRuqN
z_Q+{F)XdHYVFVi_e1_+8qQ+Uf@U(<$9}CeZiCHzyN#(FyzJQubq7W}}UPS&KOi6aP
z4r#cJ94?mq?}mWo(Nt^6&6-gB_Lt5M&c+$3hfGFu+u{2q=0?CqSH;J<fNwLcZHL(m
zNPZO|{6Roe{^*Q_z<n3}8kfnjR2Oyr0K2*Ae0`JlPvb;Ld3u8oyOtMe_Hh9=Y3Vg5
zW%v(|s7Gf0!D$6gmpRV+t*51J_$sqsz3KOgwY=^zD7E@?R~raz^-#1HDf_YxFybDb
z7@K9WZ<$fhH?fV86KP)2^>{p8*k3g6uNmE4sAmxoG0hJzkwn<C`9>z4EN;SQ_5yT}
z(tpLv%dGRU8(EKKJQMWM?)+saU-<D8F?sWyziAeKJcLzz%Rj}HUzSzxxttppxbr<K
z@Q;ZUT~mCp`6XDf;LB0D^<UQ8hghW&Ink}6-gWGNPUfzGJ3&bnkIofT`||5%)50Iy
zodaPpOx5URRbroZ<9LR*xM^ja($ClM#{7^-T%{+AJSoRy=xTn`pgqij8;TU;wGLuQ
z(_a7ifH{<uTkIr-cGec8PuY0J$ZQxpyMk~}^O-~CK3%no=l8}>$qBDYF|6lW&PLqt
zUN;#_!dc8+RTawpMd(&sA}RZlJM^?mICkag3_%}iq~&;n>u0KT&|ZzE@$ag%0#k7O
zXBS~K;2!{MlUEy&`qh3@wfin<pOlRY1_*LYoBYMwjE~EH#@~xXA+&?4G?3p;fsV{e
zqq_315XfW$39p8pq`<1tJhQ%Ip#t4}Me3O|c{nnRsi$C<EBnUWiV!K8=m{_QEb!}^
znRV=%BQf%liUh;ZTlt&#Xn9?v{jjk`twIMlE}Ls%6FKBkqi@vL-nHbR+#lUV!m-N2
z-O97-Z<pQbwzB1T+QO6f>e#Ex3wt&pl>M|de)MqeR0M)hs(_<4#>X~n*)Lq7x}y;o
z;G=8IhT~BL;@Gv~G~$7ReVKUe_R+7)Dh_ne?;3`0KYm@_nFzNpK5oSvs~S%GC7B0x
z%}g)d=3JUjS4r`hM?iVhLl>Q=QLYf?KFP2ZA-k8N>5BeQ+N!(!Pt00f+G7<nxU3h2
z!{Y^r$L-~xm}!#5Sj?7bg%3Z-Xp7NQ`q5;rEnVKsPRr`l)DSyF{d`|H#Q8qgCz{Oe
zEZ}+mHi3%Ei^xC9Wi;4ADdwX6JFAS2&l4okdxt>d_R7WWm71iJ{znZR41z1XMl?30
zsqdW_xQ7%baiiJB^qn!V+mUjazeC!Qw?8;0-zJEC@<I&t$`2~>fWQ=2d(OFn4!p~i
zBNar6D4C<)=%gw6=RXuQSmi0U0XFmjKdIaF5;V%$=$$A!57dLK?zECS`vz-JwJExd
z0<35D*6zi=EF1TDXdgn5W6`mG9`eur1F#}<jA}`y@n7Jy)#f_M`{mcJ^dfuqur!>d
zJZ9%frOQvoMLKpuSi;h8h$xHLo4-!5q+X^<F8li$eY;UyLB8PouAw|~L^1(>jpA-|
zReHsa+e6Po8)A8}y$Al0bZ0>kx|!SEZ%|{Jo_tz};<H%$qDJF8^@`n*Mv9IOpG8-N
zfMCqdYKDGGJpYA*{)hT_D*2M2pEsdo@>V=98dSZ>5)jHBW$@IirCy1irw^?UQ`kj#
z9xbJF7X9dG_c)>N_V+TFo+xWO<>k(sn)1%FJiA!RaeK=Ly%mx15N2$)-3?S|o65Ds
zx?Z))zO%q|>&%?q(#KG4qLj&M?XJ%bbXC*gKJFxDr7b}0By4w)eTP5W1#3Ix8<R(w
z3e{@{Ph3lji;dqgC@u<Fe7y&~j!1X&o^4^y4VIdTxR`K7x&S1zX%Cg!Iw{xZj^752
zT7|M2PtoDU3`^EvSzd{le=xyfz%cabwPgyUXl>ZS!jEyf{NT&~ZYas%dCj&%Pnwwv
zSphlJa^}&?Iuyrjgz$|+#QTtuf<|zQ5Qk-Qc9k-Tit_Y2)7{?1oW4@Ym}lc2s4B9M
zm^BcsuQZ~Zu#lty=DK#lky^r*eHqpd^qlb<$;FIwu~f!*`6Wba|N21)hx*Uc{h(`V
z<MngtVQP1%f9;i|ZQV6>?ZPNz8-KRbro*K)A7NZU(*L0*?n?#ZjyYc<c#j^Oh6u6A
z_N(@^`r8|Q{_h4NvR|TMMyYe5m9tdxvw6z5R1<9A=Q7sy)pk)`fdbyY953YUZyC0F
z6Rsht3$x_u1~039UfNtZvL)?}SU|~!YzQ{Phw2&d=A(N;O5n&w17pRc1r*kO9PBZw
zt&{~9sg@&1Zb%i@m|<iQrpBa_>kJRst;c#t?cTcUB!9uW4`70wH|5DExrfW7?P+uP
z$Vf)T8%%-p`fk`RiP&0@k$h-%FA*g8du)URoDaT#;++SO8#J$wEl4vG*k0;OZ1k<>
zR+CZ0D(dAmP8Q9<PMT9OXoUV{mh0$xzq$ifHHCyN`PK5&TbCbObl7%>a`&#Km*K51
zsBdFF)xiT?XWoSd?rPa|Gum_0AMQliQ)_ql7Fp$&onEY<g!|G%^|aTKf+!Jmbs^-B
z`P0+O%89h$-j%r_2~!>6KJIQC^J4ZSMVDt7K0K$MZm#deighAZe8f2BG}(RJTtZRv
zz|Sa{+vs*&5zX2vCpG)`Vof=yme+pLI(5DA9o=RKA8yaZcS<5au#Bg0c%<d?W`?i(
zy|2-70>jF6ZCKjcn(xhjm+o2!6JC2|lpCO3HBFuI;WR{V-(~M~0Z;yrB$!b&r|e>9
zu-LO?s$;6Ez;vX!y6RMX6<W_k2y@B?_ufiL1#2Xm)tgz3j(z2kfTik_^KLhUH?MHa
z0Ww1<Nr3Pf&6$JJgWFJNU|kWr!jQX~(6pJc20;WI-Xm`C^!^D?v9rL}OFQX0^nW*e
z7$iXb&+sBM<dZJ`I;G3a{b-{=H(Q-U+@WPxUh!J7H6+Eh4#;U{C;b{`9Wapsm*C%g
zIi#Z+`**A8eTd@0X?q1f1~H7#U&$d7<#YwnNN{QjyIU(M!4n3rGPXz}brFF=v#zht
z(<UCuH$FtH+k}dI=BxW|_pp-i!SikBXIYq#F_nO+-PL*fMZn$DCyAfB9_Q?yfl<fz
zmT-6Xy}QS47AB*uLg^z-KjZI;*1Bv`a}V*FYhat8kn94B8R3sOVg75Vc!LaZDO(Rc
zc5hmzbi4?|7hs2$$1Sx+>h*sXCAB&TD_MH9xLdeL1de2pO5>Cm$6xSw&5tkrVZ>+*
zEpo`f%=h)i&-gwj0(>D<Wna(w<E4Qk5FYZ71aKe}f~yzjMZLF%NiuHRuRblUzOY4d
z!2n<CLfeqNqs_JbvK0dX&`8z_d4&W3zSOy5jKM=jzg&sUX06O9Q9)tnY_QN5U2%Ko
z`}B5j9<mY^=>*DyeQW-(<8<n_Zq@#~ctn87INnqEd{B4J#1IMhTjYivAtUsb`F~8O
z9*p=rBjEY#E6SO^&PZdb^NrwrKxH{%w}>jEU7zdIQiYb{vX(>P%vwL7N`T*bosh7%
zy<DpVL$ZO;GD{4|LHw#|RSO9jqt69cE7Ih*UIx9D`}akDEW5xjp~MK77x0uw5cPVr
zRQ%(_Gj*vST*v2S3SE@lc5q~5Kx1JXx#bpt5%MInhGi++>(}kv<VWMP*a<Nc1xAA=
ziRZskb)5T8OYu4S*!aLWe2w9X1Zp?k+$Hmr*W^wf?_b}7_4(a|Q)NKvh0A&$4`oXH
zwr&Q{<wSf!C+&-Zm{H44Qv+X@^;9<yqurY2ImUhQb$R9MN8t=ey@lJa7SgY@+a=pt
zHm_0EKv>ouMtnLmtV?xU6PY|Na_^rD!V>ahziSJGPs>lRjzvU<;XN!}GRz+))%bWV
zHo-mCIT(HvI(5AIE-vt8=h-B)u993xfgiwv(Mct$mC@`ayZ4Ml{{8d<tzh@nFxyHv
zs6YomCh!2k=$;7!5+s6n4aP(F69NN_hFBu3xoe^8`$UQc9-wc%7&HBfk|Q4c=z%Md
z8CDa336XWQ(dpBwvs`Hqh<xI;S>4U%X<kTx;yHZ*;|fu6w23da>h|M<_-8rz@Eg!4
zEPLUS7#1DAf9NhA=Z(V5Qv480*O4Qh=s0(-_N7Z6me;!C2oh733vBs4DZ7y4Hky#!
zeOH8);gCRq_VkRKS`RtOGs*tCoM^Vpg57&bnY0&-=h%&8Z7Iv<#?<6B`lMPu%uPA%
z)1TAVBuH55>el9MjZfU2F_qHMRi*wXwQ*E?j3#Tcc&6t$`)i%$;G{NKsqKS~OWVgL
zc}MY{%)uXGSwy@~>z$9`YK1TCCCYWgFI;SE#2@h)=9-rrHSf7t^c9s%k0QNjlu2tM
z$Mu1+6OF=+m%~TIVimrc*sd_Gq2w35jt>SnH@|{LkJ@xpj7TtC&Wf*-|C)2SYumE3
z*tfHVL~B-!M2J*Qy%uEcmBD?U4*v@%p7S$D!EK2JOkNJk%U~wE^GquFKl8uR%t7e9
zd*&`p&hI0B*h=3npR#e*C_fDXeV@*>ZlAvuQrZKCU^T=~kx7-0?R*CwzQIT{remK^
zlDZk$?n^ZfuTAczOB>dtL(FOX9Q~+2Jnd46BQ-*T?>nzi*AFp+z)jwYG}Fx6$X8zu
z((T_fQ*FgrzroSK2?UA*pjAT;*<G6?YGo?I9VhCRLk9IB(RUtuP&DJS*I13&tI8lA
z7aQKVy1pfSWJ;66&7hB+9+Isw%1iw1m2zJw9Vo`FA}GbrJ6|x_Da=y`{Y?B}r!Buy
z4T&{cq<WYrFot{~`V|6VI?}=vo&}oQ9McITozI5Q?A{<vgSYptl(aX(8fMfIxwL$v
zK}un_sgC<Mxmj<V>%~Y)E2Qu^xJf%q*Lh)iMxM%uPTjljOV86ylcM;Zr+9q*XdtV1
zGK){J2ORY8YrG#%tvIv_@c)A!f6@Psf^pZjlBzW^Ex|3fRH{=8{^V7(`uIY@pCTHv
z!6Wsh<sDm@CqMP8(j^V#0Y7756=c++QO|oJcLd%^&jXmT<gP?AYK`fel}{9>2C8e0
z-o!vn#+(Hiw(vP0$IRpab28JKyFxa?)%_QaJLaS9rOhqWjoBT`^vwaji`!nYTb|?R
z6Y^Im;os~cHtOZRDR@?83k-J3IFk6?G!392VS+DHR^B@$-M~uYBXq_Mi=p-5(Ga07
z^468M9nAs!L8^SyVS;}=10k$vR+q`=o%oSTsKcZ_>CSURBVeq@VQ3)D!P~??bPhP3
zFQ0y%<+tsmta=$<9ndA$odTzmZ32>X<s8ymi_mQ>d@dUjEhNMKlKe;E3-ru4R@+YO
z#c3zw4nYdo!oj9KYa1UKhI1+<bOz{_Q`_kjB<(`P<Ps$p&>V`&Y&YePZ<=N3d%V|~
zyuSG)uc)s+RH13{!B0N^65E>;#+FB){=B$&+wF(xY%lCKrS`uw_#yML#PpB~y<hgN
zi33i4>6C+GT2GI@c8EvI%!6bd*<*-~NajD6d{(OrW9-y38fCtRbLF!r#UH@ZOyH|a
z#eK=NFFGmPKU!t-UZ-VBFM9kojSwjteRr(%G5})nxZC4#eX*D9J<;9KvmKO4@INM-
zyY{a)THKmthq3sY;<2^f#P8D9_KIu9hC=DE>ZKQ21!>8IL|$T4eDm!ascF<NA<+7$
zB-=9jTpb-_D~g#!ah#|;?%d^{w;I7abZPcluL&byawlk`K4fv|wYxDxd3;*aIG5F<
ztY3ZegHDa)sQ~E@v94YRL6!{VI8|wo5r`e5;;3&U;J{!ARH|SP-(UFe27_`i&vJDf
z^TF3&L5J}_mXLMYSb@mB({5yDBE*$dw~+C3GO_g@a2~fjG88W8?=1rZ{=HZF$f++&
z=_{F(v71K;D)OKPwcb>#<erriGmuqBORpc9xxy(}ipI1THo2J{5|0Nf=%h0$_@A==
zG7y^TTBRuIwkVLI%m}Q*Na<LBY@?)Wd)~H^sAofF-h;J-KK@b4TP@>Dm1PKBh%;C9
zb3zM84R9_*yi|}U-y=(Sk14Yn)w--~0io}v5hKuT`juUr{Byy=UF<vVw!Cz@zWf@7
z-m-6)d7KVn?cb;$ocU%EFs%HhF7(T>(`aP!OLeY*03yj`FFl|qEr-#^m5_{RX=Fdv
zBcI(CFi?Vd7J3@IX*IwJSd-l-Nv(A{7*r93eCb;+7R1b*`$FLhFx^>nuXb75lf|&1
z*-G=1yR1l|7*xn^d_9}?h^WT-hu8;4T^k8I9b%Wi(39_#3xKj^tc_MKY4~Fe>)QQA
zL4u7O&^bc{TVzFmK)+Rx7Z#_oo0Fiz?oWawg1d{=(yfE3aMh4|Dej~&d*g_A@wN-%
z+!@miFh#PYI`yVn9|ozQ7rhtcf7xXSW*&g7A($ap$nmRvzqj3wUI@9<v%#6gPB|4Z
zo)En0#(+&-yh)Tmm(2~L8&zS1SZN8)7G2Oc+MY<tdUmUi26Uv~7^FfcO$j%-MNCHM
zO#N^VY}wao=}Y)k&5+D1B;dIyVDYVk<#G8@06t;DC|~!*h?p@sYWwqJoaO|M_#&q1
zyaLJ!s?zTY)v<74AAhncV|!cvLn^p0+TLz512}JL(&J*;9Z)|jjYX_E<y(<Hd9S*J
zM>E3r<b#~BQ)9j{$Bpf<(1mX>{zf_@o_tj;y1-r7tNdwUIj%B8h-|edmDY=P*v#H`
zg0*C>AqAp<JUDp&q~mFjJjT?Nhb2Xo3+nC|zwekf^OCVj#dfmQqitPI7_Rl~Qw)2m
zjh2USYkI$ryRXQ4GViaf621CE5@WwRN7gj$C?N#{C=e(L>JvWa+uo)8Z7d`vC1*Fk
zR1^K0Dcy(Xpzrn*Lon$AG^(!@LNU|9;(lvX&aYrf|8%6yFDK*yid9?e6E*xG!Dm#?
zw<YrEM^$2pL_Q6!EexS6;cQ@^lC)W(tbG6C>d>7ZzBL5B>O=P5vueGtCz8e}tbbEM
zf)YIO#Vx^F4^Zd(4^5yBf>*ZdC};x<>|4E!(hP_LF(mEQ*lcGc{%FkAeJVCKw)!Lp
zAC@HaN|ArKr{ri!iMw9gmvd6EtLgtZI?K2w|F;cep@<lC2q@hhQYH)-CEd-C!2s!y
z_z5Bcqc*w}Ho9wci3~===mDc)gmj0`{?FUJxL@pZ-`jP6uk$<(`{ag7J2n5>*?j3e
zzK6o3?1Cl;A}a2x!h*wPsVa3WYA|H9Z{;`duj1cSy5muZw5esr+U9ebGRq-MVQ6zt
zxPXskfQc3<!P#4zr-M?YK~ANMD+y4dAFs~lLg78Y7gZW2ky3eEkqKJ=`13L*-SGSH
zH}-Q*?Nngqm3_1D9r8IEo@XBpE&fh(jAB}aB1d?-C;5^(+ho#L_8w|RQhveM=v#(M
z=f>x<)^WZUf*0S`DZ80=QcPB_*Jf@^P^}^oDdl9jjUC<-FPV(GT1iZ+ne0eNSRT*0
z@*coQR5|mR`SbGHDFf3VckU#fKd#ba>*c6HHUF)HIV(hS!ia`F=9zamjxs%>8Sz&L
zatOH777*A6NJOUz%qdMdxhPcmVG8D~SAId$bh5gGTg7Vg7=yW>=C;pT`dCDpncr}t
zIRSSwrJEKHqbK6IW~!0wfMaU#=_Tm7L#gir*yb^POzIP%bsP<tIVZu^Vdp{bhdgbP
zot9Ov$mvESNLeOq{M`6?_+b~K`e?@NV~cnpn}X35+N{PoEmvPzQ$9(1EqP&GFZQ}-
zs;brEdG|&wpBlH!wQN|;Q0#uy=Fwz!TOZ~8Xu!4j=EWQJYvRA(n|n9qH@U>s&zsSs
zpTx1et&e!`hGLGIcMbw{g~Q2PLhY=R>_ZMlN<%NLrmO_&#MG06#D&{``5~JJj246%
zF#lQ3SIf2EBa){PdYV%r_gbKRAnOhn21+mU#q;N+Cqh~z&ezp~=RzGN1Ck**h7=F{
zI@UqIS_2Ohr%b=3Tf4q}0%867YxjGFEMVI2cYoO5ov{3`<fXGzCAatktgG(hkGs;i
zU1dVqTlH7II?eT#jI)_G;sgY5ld()OzWeU~R44jCFyyp<;i;?9A|qh6AN9@(izjC@
zsBizz8Qmj{C+!)MNgs#{tm1~dYJzf_?losVwR#z(jxv!R!!(lk&f#wtbGo;u?D{^d
z+kyWg@5RVTPO!r6HJZJ?_X1C8-}D(mVYMIT2?$CRlRavsDjzQjV=<cz<W-Ng)Y`kr
zklqx)GgptWz7vetgxXSOKUIKk!y?-sym~r66`x!gPny3KN;DFq_gp!m;dYRR(Apnd
z8fB)8fVx&%Ko9+w74Fyhe>`QCj%6#OmJT!W5GZ3JA+YO0v^tkG=<ojy^KwtJ5#8fg
zT1&FYIwRYk$Sw}EXh~xQa(U}``Twqy;`*NMhMl}aYfQ)0iQC7yuf82fieS^fw6=jf
znzFWn2t)@>(cZpVdb>jGyFB_4l=9T#1EA;8E?)3i^eK^)`1t(U#d!cve6pq&N#4*v
z+tvDTP~?F>-`@0KAWOMsP|7Ul`Okn=iP55)_MNQ<5o!hnT6u~r@@pn))YKCn!NLb~
zW9hq=n672A(EVSxyuO7;HLbT=um9yL{Co3|%z=ReB-o-fZbiU<b;%U5&8B_k*^=H>
zw<7)0ToWmg4vZ|?^X>Bu?vus{C_%RNiR7%GWag|9#71=Ipd0s{($NH31=#O>xOZx#
z79xG~xJt+L-u|w6g~nW3f==(q(5P}zl?^qiqneC3^^&C~S3@M?*_&m7Q4=z)i`=UB
zy>Nrg3LO2pRC<(!(z(N(7Do*^!&W8(&7CkcU6$-lXUMgOeDX@?<}eNKN5YRgb*~Oo
zr^{;ea@qKW2D{<>;M8c&kXi~4H<uR&*PqLSD5QCvcy1BK>h5E@&IV&=@h~p|q{D9x
zy=Vy@)>cq)N6|{j3op*r#{5s1)IXlq_5<1MCLN4RKb^$gjeBb2lTcRZHy@IrI+kO-
z<yD#~ty#H_#~uT28DoW$CtRZjgAqBdTH3ZlAU`J|{=Z7Jg;E|#^^F!1O_QwZvo<KE
zQ~n!>4To0;U7UA$9-p&y&<bu9Uaq$6U%81GGa^i@tzTUqx*c;HxF)jX?+Zon{nT_z
zoX2GmQ>eA&57u>nsRY>;mKxhU8?~r4AxCi?&(tOdEGJJd)|;>#gs;q$=aRm;CDE<;
zeqL+cFK-<Cpv;s`oh~0a|8>xDLaBwRQ1)kzqNb@d#(D+8sMm%W6q!p%PWK*=yL?wD
z-vZVCP7J6^*9D{O>zh<`eH67JcHYIkLfBuu9eF?v=X@2{)VwNHzII1wixfVvZJ=6T
zRm#kK{85hAo7t9xXFmO1o)q&DV~*2rlb!cYwM&}C=({X<q$79Hd>RX*=WK7t?$&?x
zYQqkK3f4)Vo4h)1WmHFY7J&C>FYH<KyLrsge#^9pDWpr6GYsVWNvftqzb@P?m%nz0
zBOVfWnb2DdbU|**2cHBPspkvQnSAi*H=({xx9%A3>(0ibZD}=mo#~3g^&(q{OEg|z
zj>HA&U;#BAI*CN*5KkMoTBuKBc5#36!Vrz#fM^X@@72l40JrB<m*6eF_zz1zXaDGY
zvE_tQW}fF~P#Ktg#@aKh(TA@APcXX1+)fn<w-gKn=WIPIObyWa{@m=Ds9tQ{Q^x~d
z3B(wUPeaf-uX6TPN<v>0_~|Y*&F?M2N-z(?Bb`sWTbakf!z3x~xim8zK(XV=5;(-5
zPmM4ZFD~R{Y!!y26`e7u0KRGH{3Ge>hKh6V8C5`k^y&LIPX=Jm$?}6?`I|E@PFioy
zGX>I+nZqIxU-T6&QyY^;TnquEI;xyZGz0cJx%My(I|nSAE1p!NxOKpUKRpVQmeAIt
zBvmC%>t8F%yxWv$;NU5VFr;<1)!*4kb#4}0iBn7jCq!CY$uy}<F#P(H6qPMdebRAu
zpSwmoJFJIpkfOetS5@|jxgMZlx%Jg>vFUapRYW#;ts7ci?QB25;IFLOG}lpc#83?Y
zWQxV)oYi=!wwxH~3MCb>^98gV3|Qf!YKEI<fAP7ZR8e5S-|p2xXDX8Xn0b-JQiiq=
zCg%E>*>%9EXSBPXS48epnQB9s=wfS4RkmO;m>gK^RU<IR>CIGqHnS`qCFwH%JLBU!
zm5cN}?qG<UcQm#wB>x#9&ZQ5quOV$qJ6hEk$V<+1V~4-EK7ga_jkmi?sw7US$ovj8
zI7YBo?zc~@9;2Vz#n<i_q`G}Cm0hKsi(DAsebq#oXc(GLJxBUS3oFsm#&&Ss{j+>;
zo0Fk^O4#dYQs%c?zS}Sw=A41-{H}v{nEZq%PkMy45Hn^?#=_tKfViF<CYkDa_Sby8
zcvo=tWu8)R+oo{3)%BOH1=2}l-YH~z93^&JFnRuWiQBh$pG@d`W=lQQse<%;9AEtc
z9Bg~f=^QqK96ej;;O^#v4L*F3ZLaJ&Ubuo$rk5nleja!Oh&PvyrAdo%=jBE)%M7V*
zCf#_8yQtRL70H2=Y$ym$-<o0Gj?^bn%wo|wKa=rdlOaiaV&tF)K=B*SWQw&#Ct4T2
zahluD*gsK3wJ$nv>v)B}B{SK%3~DGE5D74t03HK|Qk=oHJMi4EFb!^F{=8lt4O<J1
z{a%bs!6*U9S<Ug^^zrq6F#?R`WxFvde(Qs$DLHbKHU`yb)jzlNxw|KVDtL*NtfP#c
zlMmYu@f_fGEaz4}L*%DXoOzfzuM^czmCU*c&J)r3bsh|9E+e_T#~eT<E?i6HO3q9q
zi{!WHt$3dXVcf-97S=vV#CxL<3o3%wWqd+o8e~1iVu2!&n>h_PM)|x_X%a1iQ&A<s
z;qG`A_bt-2ngZ;tBD?ebv>5L_XSK|$%oVuZoDny1915u!70ZlgfJwsiz%${@0;QSM
z+T*%;0G|780f-29P<OH|EeHJ{p&>&qu*HQ$W}7=7PX~XHOOx$oWK~MMCY!+mo{(VS
z?kd(h^;`6N$}JhowehO8B+K?TpZekvK~X|DsP+6y@gpLYyM^Xf%6;vo(#m{VFh^Dt
z!Sj^G>0ku+e1~Ya^_~|<dbieJ3TBMlE>PB{38Axx2bkuYXMM{mA<z?~#woXg&Wnk{
zklasiBF}ytEgbo~dubRK>M2xj1#*w>sf}d~p@WGO*-edWR%}b)&``%h0=ro5EqR$E
z&J6MWzWcG|XRbg%89e)o_aNnwUKZ(4=H!`6o6@n87MEMb^S3MV$tlegcl?s$NBGu+
zIzn*f6*lpLi^Hj(?bJezNxUoEjh<GOmSmn7Dz-iv&>^LL1)M>ea9*rba0SjXkfMps
z;L9~H7$@;T##U~KOmjv(q<ci-k@SNupwnB@$|G?l>(*(rlv=x>+jmcuKI5c~2;UwC
z8Kdm`Xgx`5rNGavz0jJEu3@(x_u^kk1EMY5kUhe+ucQ?FFrn>KiD?L{-r{#Hi;<P$
z=7&!d=SUGKJ^RvWen0JBnE3kkROzrJX#G9XCu|U5Hd?nh<4lx$+g*%_*wf41P&-g3
zvHU9{xj21q<t_g*f41aldr8-HD9Jrv^pB&XR`Noz^$pfZS>=(08`9WMYK#5~3d>9Z
z4IAOP^c2kHvbU-P={vqA4=Oqvtmr|0yu!0R7ZelyO0ld+w;M(!jemg<dY|~*zm0D1
znrd=9cG!8){n(nB$bxq5TDIIinw%SwBa#(Ajx2B9yxb2o&nkN2u#>^LrD>jq=*O7q
z2ds19#88^MGV+Wx+g8M6-eY2mh3OZB&P816R-}ZBdqs|60P1C`*mN}~z_;f%p5dt}
zo=r05y~eI_`pce2ZeN*pc3>|oovTI|C(#;lKK<L!a>$QvE`|6@mheA+2(Szj)BMlP
zL{=a5&!$I59;#QF$G^2>V)7;HqW_Vg94WOxyzbu7oZdwKMaiY}PTI%C5fEFDvZgwS
zQdIcNL+q$UM%Hdw1F6{hxG`;6a+?k>c9SLO_6RNUVNWNbxRAj?prGY2nV{ha?UmSb
zPvBB)dU~t08m{BGF=#VXe)HoW$w$w&QT!HbNAS2M)tT0Z{nz0O|44ouy_PQAQ}DhB
zreVpzDDbjyP+z+x=tM;~r!O^m2WP)IZ~8~l+je9~jMBWJ{70g4rA!nTkxyQcBW^N?
z&P%l=?tN{7a`E_2T_w+<q{+VTNiCD|ZK@k;BRV=?#3mucQi$p}FMvwuZ?Wf)C5xOa
zKtsS=jMhnTrlu8-6K664=*^k3CYNz624#*C=GK}Pc_(Md?BfXIQdAt3K5}0eic6-8
z-MrYn_(nwwNfUgCi%>qY{6@?`0y6s{i<5WDq%@-%8}0_F_fYslC-d;NxG)2^v_QHZ
zjKl8prd@D`feT-Mm&C;55>14Cf~}YN*`>X36o$L|+4NwP)qjR9#!wxXT_R;5Azkyo
zO-a6QHe|K?W*|<T$AzMjK$nPj2{9k8@Em8m4EH&?VT~aZsdR>5|BAo{r|Yo#V?>5x
zOax4BZa=D3)!0pl4U@RN!zld*C0I(O6G~#=m`)!u53297D%3HNeU|WU(%OQuDG?rh
z94ZuO@c{asU)yC!8!aT{0+-n2<#<1pGzc<HlrwnpTOwDkWuv7!(8^}C^Mi~dXSSL*
zsu~tqlZt}P5MPt{i?OL=1SDam4=39>p25szT_&e+f5}`1ESo8#l<Yq>*f-JX%s40E
zKbWsp(7g&qR$Z<wZ=$uhCH>02DLhA}%`Obo{B4w~sQ8U3rA#{2_YJNz=Yu?WR4Y}R
z-%T%PGiR$f%hoIRM_Mj9%J`oB3e=iOdKdlpxd)p@j-B}fGcALw)o6=*;2NRPzqAIp
z>wcIK2FSMq+X+;`EVUdIZNMCX7$<Mhmb!pzG1WR-Gr-&be)Cd=)<Iz!rqhXjk^sS&
z71Kk%u;@goigzuAhQ0O~HqizmpZ}56hT0Nf`tx`*c5)-GzjRbTubW26%#8Lk>c^4x
z=|=2hs#*q<#$lr$x~sij#N<6vuK2qKGkJ@MqWShFY<?HQhXF|p<d0KdS8PV64I^_E
zcsAvj5RpZ+qly*Abj&M4y9elD6AjUL`adq^_+(bEg+ZbCpN2)eY$qojvc{6vdaftC
z&gWH?`34%P<Dxk!<D_JtIFRw8Dy31TFLt9Mho!mp5oK{>6~2tCoq{WTbJI`q{7db&
zO*^5m4J`3{A4TxwgVu!<Xg#}qi7>ZTM1@sVOK!1Lgn18gx(w~~SE&{Rj?!^B_V7zc
zHL(OsZU}1YWSt#=^!V6J*<Q=9#OwO;Bw$Q?O(p|C<ip6H3yk*Sp4*dO4wLEh`b$<B
zX^Iy78|K!j2UFiWj&;o?UhnmFBr@fRVogGnaTg$(*!JQr4pBv{b*}+0vQ_!7L6l9~
z&|^MR4#GoM69J}65Mz#eDQTu=0byxt&`3!mAv#^p>A6z0i6-C}epFH*bl^Rr%30|U
zH*4}ny|QdT=l!3=qAK3Ezr=twpk)OH=jy{3%ysDK^=iOV6~TS*%zjdZc79^z?d<~T
zwe~V`LAQ~bTy6(sttZsU^SZKYJ3~iWQ+O}0P&;@(nZ;OakFb#=&Vh9UI+X7Z2yA<Y
zG->Ktx~ONGynD{46z1cigog{9RDh8O->vE2yXjdzuUEo(lQrj=-YjmW!T1k$jrO!R
zDo`36IxdseT@CpxLKB0fXsi*>x3x_|r6T@D{ITjaTHmdKnf-z1u_<S&p>Z>Y3oQo6
zm@4N<*9D@wltA59hhbGe9I<YQD*4sv+UKZ`0O{L8SFRT;IKK_J$M(T7LJmF(!MPF{
z<bxjJtnMQ<i60%3`$k3aflc-%0~^$dGeo0$x1OKl*QtdYW4GwW0XVuzs$8@1#?WR6
z&y};6S|B91QQvK2a+bXm^_#a&9UF+Wf?Arf&$fE+q=V(fe0=HQh1r!RhTR#33gl|d
z!4e{yvnpFpr*wZpu%S_yI&vE{IsHSh%okS6S0GTT#g+<6`%~Q~o2yey7a`LX3Wjw+
zpGHqIT`qrnRu%(YELShCV*oicx<n()$XDj@aETcJ3_bzXq<kapTIf>zpp)9g$Xlm7
z%d}1#H|SC<B`NSkg+PD-b?E%(D-+X<yglRTr|$;NveQM0q$SBwTAuxBY@Si0$h2iF
zm=3oXoqBlDz;U*~s8T;XGl|qCQY&Uqj^hgjSD5z}(J?C}gDKA1CHWzXBo78(fLrC3
zuFHls8YLnhAgGtGc9I#lw}_*?uChkPODf$);;(MWxJZARfdKNZz9d~ho~lBncI}R_
zY#kQrscbDIjIuf31_QA!y`1A2h*49D6@$g}BJZes7%UrSXhnW-02q{Rrt^Z>d2`Em
z2|CXQugp8fgs<lamNMk#zE1;w=lC_kJb_!@3Q?7dBUS-*qTzltruOxvnOP-u`8*+A
z_j2aX1QvQ7{bk>avz;?{y5?gA%wN`41M;e~`n<Fl@;{rgITqTU3MAyj%YK@gZ*s*;
zc0s`MG!GmT48JUhIYlD${bbQOji|u-zJ;v^xDGu4FKzQFUgA($uQc=5py*2$WJ^-v
zXHT3I%$4)Dm&ELcEt|DMg-lp;^5o{w!Y}DS3#6Bln%a%P$u%93+%%Tft=#dCga`=H
z|3?xZzTs~fkbI)tH|OkZVRp%VII%16yw5od&JonSqLNf)@!-&_l{|Z*;hC-QKazWc
zR3S+bEJAhdUW@*>Kh?>s|IFo3jxnSIvhQZhl`yI#-&fKDOMZ`1oLe&v`T#9vV0@Bm
zmpSbT#8p`J&@}b8EW9RDTZfcI8)pm--`=UChkRWR5`U%mA)cY#lf2VcjNDA9oi-ZP
zk9)D~L;lo|oJ1F;wNTKz=KS!bE5!T1aU2t)c&JmhdY|v798Mp^OYTo3;`0^r^x?-H
zGoDQMq<2z>Y=FhlXY8)6PcYnyO+>aIfUjKKr?SFynfn{H)*TnEo^XY|X@=d@V2(XH
zlqs9L|3fMJ11>ZAkQLGk1Iv5gA2luNal|N+`#vkiKfI#@vQry;!{OXyHGO9%wUM1}
zm*^ah?HCLyywJkSc8|FCQQ@!~o{h@+KZRuoP<9Fmy5oU}D$)%^%=8^^Tk$+V%a#C%
z(5W9;IgYD+(q2(|nV;Pb@bO{+(0cx?vv9wSVpu2e{S{NgphXEgiW^>h*3|tW_cY0G
z_4v(EPC4;|{orSlL|FG`?Ua=G&>I7H9OEmY^N@G;cQHLBV(eemj8fX6lk@>kjl6${
z{MJ;A=aV6;V7Ka_t!id~2<_eRh^MR#iv(2d^WA|ma#+{)#}(L-vY-tN=)Mr$FixQl
zU$g~yzWcIj2Wp3YoL938Dw`P_AaeXWvhqr{6V1N`ndz+U;ig~bNpp=enya46{(LrW
z)T?v;-)Nr8Yh+GQ|L%b&A;8SGPKT1g8Fh|5h8tW6l!LMBo;DjC$yOMj(?OyPuIPrs
zz0K#7iWfU)*sN2P>uywjZv3q4ePdD89$^oH@Yz5xu*dv0w*ohDcf5q?f=uXsW12i%
z=!<l{FO=VLw9c%fGE(Em3i;D=Zyes&b~kwX?pjC8;uq75^$jVQx{0%M#zh;&I6)RM
zoOpX`f9J9x77N?rVpiBvrhNvZvNNaS_8_F@Q)Ay_jZ7G3(@owBbq5#jq`GS>mI3jn
z4;hqofc*XDtv`qOB4c~zC+)R>08fRZR=)vLQ}O#9!@Y^BjalO#aIScf>K#QwDxm|K
z>wMb{B|JBwomZ;c@#aRGv-mTGTw6eb#pqBExA_Kv4(J&d2blK`IyVpx`BHz*rCRq9
z%F1MVtX^1RRBJRGjW4iCQq!<ERV|uQSpP>tEIr`0D@^3CtDXOHCjKJ{p+A-^()Q_o
zeg5uXrb^$SF9T5!-&&@>x*w(Qwx?l}^w=$=TaS#Awb%7qKHpNe26yFHFCgVw8PbH&
zdZjcA(nJdFqCuz{dw#O2yyF)oh<h3Z>x{V?X-5Yd&j^^!PM?Gu<J|uYR{u+9?wi|L
zzG|js1hcv9n>^8b2-Rb8t!pKds&En*O^|)>IDUVK0okCuV(N7+qUiNk?22ghP>1<s
z-do)lVx<}*=%uO_PrF|2W*Qc9PNTB9lW!jHpB-o$aHK*iiTKR6@%#ib!xIGsV#?X3
zdfv=b)A4l)XWpmTy_jrex~IOoY3a=`v{$o!OEh}Xv0{TEx!l9oP178#&v*Q|7lPzr
zJ1?UlV8PlBqFUh$v`+i1=JpDwArzzrXnTnab8Fot5N&GJyhkX9e<VwKK;DyP<{ie%
zDwR7rp#z6#odYLI3V!#na;H~avU>e`76be~!!N6+`uy!HlBxU?TVBw3g{VS0OSHua
z0Q1sVkJz$LUjRw8npz+7X^5%3%6(1B&?Ie%vxUW1efbItvq_d8M1xdUS0-eYop)|h
z#;a-PpOxvCR><7HyZt453>bWEJNqcP)@t11zB+Q$g*A`QsAI@=;XyA~m8{*|DgPsL
z1xAax6?V3CnVP???u`zXIFNp_zhBO|NUhI{UlC$DE^?WN?s?nk)xeP%4VtFws;*(w
zQM;rZhs|GaOwG!xY%UByl+pB21$2DFgwUN8K+HLMeI0+c*!m=3T>9I<IG^Z;P3TXS
zb6v-gM6Y*3%*7SW4c6oX59IQNHdfN?2x))j#rSq{9E5c6^!u;}*bnBF!8`-9fCOXR
zBIV<g4qJy(?w(mj2@^=FyxI$RDKwR*X{X)oc;J08J$3v?Z%NLH{eJnh+C0h@t%poR
z$x!n{V^Jp4RQ*nr@Fl#3T`}=LeCA2t!#>New;UmAQ|1R20GIK*M+tLf9VY|xsx?U;
z(R^ebyM4=dt0c;XYx(lvXRe<(Lzaqd045(Cg$Wl6c!zSNRZe!xYjLMe$&q&=9DEAB
z|44+z-C)0R4WPZ^UeE5bTvtXy%<~)`!Bx<zUinaF#h^*`qA2Y!;N`wC$IY_pJR?<p
z0WhWfK24g2bB>GZ)uS{D&6S$JatqaVg&-OP@R1Ae`UMDxhnaz^!qq8czj-^s@hsAL
zPJcm4#Sxp1!*2#Eyi;m$(^v#;lB9cd;PY{>X*D6V+~Dca&VU<Eu;}6Pw_fbc>ijZ}
z5MtC(t7;hXT`Ue54nv_crGWJ*7N*mc`)Q7IT$)7Xz!*j>$|O>%4SV;nD_sKwIULF<
z7RmrVetfQlMd;`=a><4Q)!gf(#Y3A`zFs}i(kPjx9Z{MGWBT3}TA6EWrq`s`O1brC
zMux;2a&1a*?zG4VfGTT|W*~#S-0hQ#HQzc<<A}Ra(f6%)T3vXXGbw8w8b;VqW4pwo
z<u~d&E?5Z|tbNvQ*kGQxe!zm8xGwpNi&p7_od3JqOzq0~eD%p5JI562&?HIBkyk<7
zvqN`mWaz~gTaF1zNmXGF*<S>v`1O!#QI{uhJ|Bl15rR|~*-XpOPI|N&v6U4Ho~ccc
zrsSaixMN*!8Bmm}zu`rhprr{<#7UPt3P^=PQ!I|SritreM*>b(@gdZk<k;tReTl0L
z-t~l4J^7=1D?B()Yal^a`^t|`EMbwBbhdVxx@Dke6l|SH4}6c-VXJ9HJGU^Z>Nw=2
zE9=wPYE25g*5Iijz$ri8R*G<KmFw+5wl&y~lmjX2(WmH&?NPm$#svY-UkEgX3Gg>A
z!o#kq4|v+_I4%~lr0)3xisZUHjA};YcxTcvlk4^BpOolXF4TpLuKr58lKQ;zGlMaz
z`9Ed-gRUl-k_lzW4@5W$->EciH>!m_pJl#}o5E74;9Xs-w$@*zJ-tcq)lDB^9%Myt
z>BGA{vtfMm;=q|2Ft7G_y$b9jb=kBsAVFuM_XkkmXpZ7>&k9celLLExU#y1CJtF)w
z&rwFAI;YV)Zo>I!cQ?(qx`y3Ywqi`ShW8v?Ow`0&bNgN~lO0`%_+PvH_*3-~)!;j1
znSHdo$*H9rb*+2-{tdw*^w@_xm$(N;ta-`8HZ{#v59)q8ifZe{uFjX2zA<qfOGh05
zcf8=A#6Wzr((Qvib9ksZll9=I7j%QO_NFGJ>KVefGyfOx_JVA_^;xSH1Q<DZ!7&q8
zvhqiHvEi?@M)uGf`^T?YOGnKEWmwLC168El?Vi)Kt5O{|vnVoMixiP!IfJH*?+NX0
z!S<w{Kz}EOlD9D=L%wQ`Ct)b-U*03e3oGv9xZZBtGwlJYZ*d0L-P}P~j4ftisM-RU
zo-^)gbx`30bPl)>X!XHR2onN;yIfQRF7Ca%nAaG&k78vbH<H@;>ePC6i+%1-C?U*F
zBXt79P(rRKXGIWTeD}p#1-kDW@J#5^!i3~v9u)tR^frAy_NjNL6=-I2?}a7TgRl5j
z9#z*ady5|2$`mO%XCTG?cNO6;X5YW|qDRUHDN=<I!<$~_k+B)GfzqOgxsfxApUVE@
z!b1KC0{V7LLkn4nj61u<WE?%Q!~V#nDh~icDLOawM8`4R7DX#Mo&N4w9Y1T0RXjc>
z-}6XKE_|>PPdptI#EcNGSF`*AwXS)H$d{!X_R{l<XkX%FC#<7`lZcVd*h6uW(plS;
zO2!j0GF&}hoo<Ud`AWU5^D}<9^WU2gmkMiu#b0_Ne)^^8x^M@nUG&%VdqP#i_g3D8
z{cv4pGZpVLAdSXlgaY{QNIh>bX7Fi`&q%#Dp$_GSJYdccP_Ik;iWmfz#Ez?(IoR51
z6fP68YN<=#TruYgF&K&8_m{Hpsk{tMw@=&dHFNL(Q|Od{ph#?ba^4FAE2KwT^6Wrs
z0iv5Cqz0}HmJr6y$TLxlngBS%So0HeKX<G<Kkz&Adv&8;#b29k!j+Lak$awr+*yP9
zU0!T0DziV5(Kpj0{&0N;u~o+=K2~OWFLsuEO{`DA*2PxmV_;?I2uq!EDXn^$ndsih
zK<7jZ6;JQMg`EE<Ed3v;nGae_YfGbEw7mA^oQC}bgg-NJ%?C*@`3^}1Fj2i^CwTd}
z)n&loz8mnxR%Sc2!mbT;?V7$G+b%Ja8#*N1`PbCzr7JA`B6Z+`;<f5RKaq#yu^AxT
zb=2~47S^73a!7QQ&$DO*rD;sBLOqi@pmB*!q-U>mm)d${ETr^Xv~&QuO-d}?=j&q0
zBpTbLc434+tbMI>muqPyEzrz`RLXcR-TKl=Jla~o9q<KJ1^z?mGiQNkjGppab32A&
zdCRpcA;MStsbG#&BtZxe`-1?j|3`8N#0QLlVtxLR0LQV?+M~s;lI(#MPVo6?fZAP+
zm^hubhzl9ZW409>%5##Z?qE(#*trg!-s?#PBpf|*^!zWq^?I*UZ-i?t9fW}zYw(yy
zHDPc_?da7suEx;u$kE*?xwEc0)At0>9ZJ)=DG4=KkKHI|K?Z|4DlJXD9zMA<pAJcb
zOGy_s(g)qT4E^xkp8ZI$HRZ}E%0_$h5xC%>tSeZaldCE^uxR?lAH_yT$y8`5Zb|}H
z7lK}<Pt+XqCc;Y?J)`-WOs#-yIh8F-Os_6WNnKaa2@boz_jeNc?Qnk_wur%iA47h>
zXcPVFFJj0aAs$(6UitT_vC<Mx50!Rqn<Z)PZyjC0=m&pd12&Lp)2~{&flyM}ByH3n
zKboR}t<R1-m)faez}7EkdGVXg{K+Mrl=mn?bzY8Zl?-S|Fpbuqpj#N10i{jf^6Cpl
z0p%@>^c&xd5@FNf|43pikMggb{*iQR9UJ~n^YHN>N%=+ay|dHBK;ekZfHxi8OE2$a
z3nke7ruvra;r~OQ$#FMJ%XE|biT->3TS*7OsS!wP1>P@GZub6joY9QP6ksnZ*)@Oz
z%WocCsrAvg*%p6kRZSZeLn_L(nd-fRrzM0YQjPtnl$c696&w01R4lMMwjq`$%9eYr
z-hDSUnX4j3o&NX7+MT1{j!0E+OrWGr1Cw!z7E<$s7OgYMKGld8EXi4#sX-ek>esiW
z?8Ey%^Kw@>)a1R>pYGt#am!V57F(g4e$^dIZ0CPxVvFbF+{Zu1*ALWH-b?Cz63ie`
z4yc$ryZa!M*q-c$GS>ZPOrc9aeovoXadVXXs=W*w6;R_)hG{yRFC=(4bTX1tpWsDb
zt#ehr9blSQmar|oC+AGVmk8juuHOwdqv}dEZWqnqnT|)F%lSz%MlD&@HsF-GMD^3S
z-4d()+rke{cTW2#9=en=GrV_V#zh;f-yF)jTX#!Mi$W8gQl<M2G^=1;WW;t&%A&-O
z8{>S|f~J)bywYt9rjpTO)BX@6rK##;5zQ2x1P8dDlw~e!nRCgs1g>fzS2<`pt854E
z^XpscurAGf^Gc0lIlrx9o5*yF!hvQZM^5kY0IqnGTo>h7PoNCEL-Vt6T*$1?$!h-L
zw-!c?@@SN3t`C5a-v^?@qMRCV=a)-_$}$sH?L4+Aly@iWZ6X9_8<a-70`BHoxk^CK
zNHr#poM0~7xaI4DNM()m0;TJ)k{no-nSqu>>T_6e8Gb+2@LQkSk72+D6TYzxE9uOL
z7R^l4uJIvnDm{pGNGP?+<I8Kb5W}8H%oHJNR5p$kc7y&pMf#C<#j3{)X_^>yNyWCw
zNC{2!dU_IMy=6fS4XxDaC_`5eA*AuU;U?2EKUmuhdF$|Yj+&G7?9F+RzH|DeONFSU
z(&Ra);>qi?+O8Se(OpiF^iAPWgM+!7X<nz5fmNg`Hs&&ss`accV`bLv>b}k$LF?ur
z9GSm&OI`*z;Q0$p1s!vt^x~Es`_IIn=ju9_M2n1mA=mB@7A@I*;H8<F*knpKpCwux
zkUIE^?^@Qg@^3Mo0C(inK#775>6>6k1gDqNeZ9m-%f*>nKUwpf$QdP>mii)I!&FI@
z2DULmoj9I^q3iah1T`F*5-rQ~JLTCZ6)#=nU{W6V_q^Jsf?7ioLEqv@9vP>eGsj%;
z)Jz>G&z)L29OKFXQ6S{U=ijJ3F_w6bxmqvx+YwmbnucfARw&~gxDvk`>GPkqw@5`e
zz(mhr_h+fK@hX)DR|8wNTgpI-CT=Rwt;9qMnbt9C+8aASv#|b1qWPhdgY0nGF9pRf
z83x*?=4@IpJ^@oW)|To^OYu(n@($8*XGEKCwW~{}QOu>nbqATM6F=&yNHXnoI^gE2
z2=}wqwo>$jI->tpxIXUsq*CxrrT5Ps)AgZtgYehdJZ)+!?ccA@tQm9XBWJ8W6f^FA
zVrbM3D6rM{^o&s2&3kEH%}cauWum{nd@@9nkDi|%J$EQ<VJ7qc#F_0wT+$_{Xv{r~
za+Vy1L5mc<XToIx@}CEk>?3o_l1ka<QC*|XX^sa`qeY&}l$R5yP){&N6SG|WQQ<%x
z%<wPBd&5PL+Sx2)39hlSQ6}Yit|ll#4GgfV;e(;H?P{0+l%<zzMKgmVc&yfCrkX79
zXI5Y9^Rl@#sP43)alXb?<wEB(U%A|nMf%2<JQajcF0+x{%<DHALt{X$YfB4OHA3IS
zu{q~e$MRWB-C$<Y+pG8{#OvK$LicDab+Euc5*?x|NB3HJGN`6@bgwP_z)iQ+^NQs*
zM}}4A_XCaVDpXjyXpl;hc{%?RjJ!|Bio3>QX=M%hS~aJ&uzfY%m)neuZaC`Cn*rRN
z$29&{=TiN^XAmL|x$SZ4IhV>l`g0I`qvT*He=O8oCGT@yrr5|((EMk{UyTC46@L#-
z$tS(Tdo7Fg(9Sk7w#%`2NGVRQql`#g_O_ureeg3z&H#H0veaNm%Kg8K>cbG~25k=D
zE=u^8y@!Ceee(jY_#s7`I;|=sahu5SFBz6M2U*cWG)%9F7b?82ylu!Pjk<lu=vi_r
z^}ApQu{45z$V_?Y`*F&6_xlg&b+Pg7r$Xp+!}m<y9mueI#Nlu7IF2gR#(w@Ro<Eh#
zX^NkT-GcE&>l6j$PGTZiE9qOBQSxO9xI*BQP^o0y;D03d6<+^D%tmwgM@GD>gZ};I
z<r>d;+7s>T5OURE9yeoW=Nb$V7`AwsO=LGzbG1Ppqz*ER>#ZmTG$f6=PqSE*F9q`W
z`@7kl6Zg)?WY%PhTLC_Qj#2(Ue2Jr_fqD#WbKSa(^b`}<yPb7%$(*8w_H@On=W*IH
zj8ck^Y2WUrR?(D+v3<UZuTZ4L%Rs(XLl{Uoq?g4g!gfOiLsCiX-dTrf(|@ik;yM5w
zIJHy6F+GZN9fGJxo%YA2K9#%xc>+E|LT`(@D4NOY!X%YjU05=5;XuKkh37Q{sI|Vv
zt`^GQq+m8l!_9mn(<B8$PiQiX?Bc5#j9HF}8qYOSoGF#7_wu$@gCiM{jtwPzo12p@
zHg%pF2O2Ly3{Nyks(2|YmZ6a?Y+A}948qP^6gA}!I^EnWgPat!6I}XE<m_|Cal>;&
zRC3N3V4Mi0GEkNA1{cS2PrKQGCt2{FKK%V-FgLC1$ea_#EU5MCjFpiSVF~2!Jw_ke
zk|WPn>7|#Sf9ihB2&=o1A<n23{hwp@uHJHdC|2sNg(PPZVM<!U_V8Tz_((F)a6H`L
zb<XbYa;ujv6t>itXZ|jfFQ(D8dr}M=qXti&ayVMej(8Dm^!?_1WLe;kB?`wItEu~v
z=fcpsco2*%D~;@QW)C-9xMA_46x?;#IqEygInX}I^4Ob@^R#oVRbSsS?zrF$Y)LM_
z%@4^FU4my>nN%Cm!PJ&JI2nw8gt1b|$RAFvJiTIz^(R|lriFA&Mj|Q-(ypT3*Lx+V
zlZpt4*RA}6_jBBMXr~79M7P!0`q%BdN!O;WS8L-A;{u<zWxsjN_LZjK19Ij<^olFq
z#;zkk0slzSmxnG=P4-02nN``{rHupY`#Cw|G;-mh6BmjOQu}Gnc38FOVbxItm3=W<
zXpW<ocdEoIU&FAo*ILKv;PGxE6`N2ZeLr7QV5M5@!YKS2O*;@JtX!9M3`_1v>BvDP
zm+8~W8?qjT&5y*^q%%xvL&~%Q?BHga(XCPr-Fmr#5BP<A|B>*E_a>ILRm$`EOH3XN
z*{bC3X42Io64g6jVzsiD&R#d9uT<${tn%{pX<83(_qY?B5_{NsqJh9xxn;o_fPQO?
zHLrhIl_{rdl!jb(x@th?VB4P~``<eBnLiJ4Qu#c^?+g(mPyVoT)VGMt&72PK29HU2
zek%Qz(}f>mn2-lK-!-wU?}~V}yP5DMSbbc!QZI43F<RDK3i6z&2PKCaV}@3b)+((%
zXU*BQnobA!EY`pWdgY*OI`wb1wSp7YaxPW&SKeAWN{(?aTl~bpwKlNkZ6PYCVl-*@
zPsnKfQa=(aqOZfW+f*Ie_zAe|AhE^JUqAO|Kf#i9wl_D_;sAc`>bOjC#~7i&pi=N(
zF5b0OzJ9ww^1Hjgf|UA1*-^8-ueml6arcrXZPTS5CsUx>NR1p%Q}4O(BC$8;`y|Pt
zCHK(bhiEk99HX(08CjcFygi_^968;kY93<NIxJ1iKYp?7vI#KHO9qTdI8?TwRM&>{
zwMYZOc&8KGgA&>2&G8SR(Y(sJ{E~cr)N^$CkqfVy2Dp~fH$d+GQll<ec&9;^^onMz
zcqPG7Tj{(9nKk8?DE)NcRUVA%LP-?5XjKB`%fd)!V@pv48992T^?OK%ZH@()6mLtl
zjt|@sxF@nIgZN}f%DeI>`MEfIwP1LQiPaUt95)rDW8P8>ahW@@e9#KDeSv-PC9}d~
zDN>H>&0kg1mPuQ<q{K`|Ub-hrA6uc@XCYQ%*XQ$IO*fD7$2neR**;CB>v7`r+@o=w
zl`tgX4;ukSKwYinP5|MaQBVW*w|5&4ql+tjfAgy&YQmtv3lN6W)h^vov6rcW=dn6d
z=?YfMp^=+S+R>a|dWNIJnjRmGl|+w=0Ox4j8DH>?)X6C2Cy4Hk5Rrp*ywoL?0w>^0
z1NTRb+HapWTAb%iZG9jp<>anM9l|<Qsftt?pCt-_VIN!FJ@<#3YrF~7-qsa*<%62#
zcf6j!N;qjn`F8Mdh7VB=Nz8SyoJe3$zJ6oCB`9J43SdY(v%QrknqcGD0I`Hpd1Zx}
z5z{6{$G=T6b6&0%=e#<u2z}#{y9k$96<nfeRRcgu;l)=WX~(Tt$mh*SRcErX!l#5k
zHt5jZG$Xx{R)b7vRt^~S!bfqg?5{pkeQ0h@<NIFB4-eLW^adm3C_+Kz{Re9c{Mvc0
zqyW2-A0UD3XwGfEMG)JT(FZh@KsYhVXyr~2_856A^K2ty`psP8W%W#<PMC|uZ-vbI
zasX)lXiNj7?9%+w{#QcMy?uAYU8RgRQ9V0sR<vo6V_!l(i3`v7j-gh5t1Nf3RG@ZT
zdfqtxVv(llxjtKsUJsU6#U-P#7VZ<}U`wJZ7S%m_u`-+G&mx~#$@z5rkkT><4wtpI
zW<bTs^&RYXGbq_AyQr=Zu%o?-C#0v+mo}4XpucJlqzO_4#r>W`SRO`F;17YB(binH
zwyxtlv00sDj8rZZth4>|A!F3nKtyvd26mLpQ_LsafYN9}MhiHUr804gMzAdV5<cR#
zl<wx1f^Cqc^Vb!F5jI~lD8--}=XlA)Foesiyq7#_N8Ml}nHubGkn1T+8Wl17k?be}
z`vRBnl=Wxu^3ps{nmop+N_M@_aqm=O{d<IQ9(ofnA5-aU^$8o3t>1*zzg$TNj}+jA
zlvq;k=4|peScsx^IMRJ;yf_)XfYYC|JQ$ijbme7g53LTc6MPSeX9Y4NQUhgD-lTU`
zwP~hK2h8(em@FCLH5alkW?B3qOR4pxwbfu%v^%d)GGwC~;g%Z`t<m!}S4dWq|9qv$
zWW%?3`W+JcrL->6?7&NnGo7F|JGD)$T#go<%W{mi#>|Pte+hq*OgdMMq%slC2`BcS
zh?3~2#&cQZi6>MRmb^ND>ETx<*r=>TKeB#?kKm+Q^jFRiDpZ&bMh6b%9lZh4o(#1I
z|6$!M-cqm}KC&dfg0dbQ2cOAj)~(CUKN*De$cT<4ZAT|Z^%iJUru^+meTl!HN!20t
zc3I?Lk9;Ld1~Z)-;`=6xZ2p_Tez>oA+(M^6hgx-AfT*pan8kE5w=cV^#hIMx+Yg)4
z*CKw?$QY`#*Wa#H&{}T08E4$_F0tH1GH&S2Y(${<px9Aq5xM-~#<72J8d=8dbN8<2
zb7vmH^wf0b{R1mM0;`6DG>-aFdM-lM(tB@PvfP`ssT>y1_^O@Z>f=lyl_T2xo!_IZ
z(U;3X4+a<pa2>jj@%u2v5&jl>*Si$@KJI5P)-yz>QUe>>BP=IxKR&m6v>*2!Z_;0N
z|L#WU5Y(LV-fpV>S3LCgAzZ?{W7r(lWEy&4IKpS}1P}^&&}&^~Jvil%VsW;b-2Tc4
z)^MxBj@;P%sryd2?YmZunCSn+5M-JrR;Dmb&8SaJ*?aqh=d;c0Wh`kSm?GJAjcCTp
z)wguMG|>}xopSnrOroYXqS1=h?i+pLgA5;(Uzh(m@;BA|awapVd#oq*XrG9j+2r#K
z;ZF9EKez}l_`3YlK-UWhJogB7(VU7u5y_FF_*!A}vObR%@NpTXsK(z@ASMAR>tPWW
z2+erVt8e`_<Z|hcl!x>Jv^FtL=BX6`Q~mO|e`?rb{6B<^VjX(Q$Up;(C$Ey6P;AFP
z>4*!Mpp~?jUumD83}x4izrDgcOzgL$(me3uKDTM$={5Oa;1%2HlTkc1IC9~0tv(tt
zOn6T^GC1<hIvW@{za`~xc`BoajAt1~(~%>|bdU#Z{UdA`|2T260LO?10r<fx^V_Cv
z-4;vwny>i=ZCt8@h-xLVcrfuvNouwlGjW_}FyPAR&28D*O|x_sMW(6Gte4nQXT$mD
zKXhI1eMD~atvXr1swUMv4xbDS7str(DdmkPn0Z3(nv8z=M<S2wHxZ8jt|)S{aizyP
zXXvX5jPsY~tN=_^{gD0jV<k2_SN{R{#2)*6hBa5A5MM+TXFSN*2Xv!g<>2t}5U*dn
zX8O{XYdJg1caYwM0OhVD1F=o{AY(Uxr$r6CSV(o{f7WQqZwN%;aK7dN6CN*n)2J~c
zPT$8{Jvn+7`+f6|HJkAB-Auh@0pk%#I(W&u46kD;KYqKxRHz9ev7jWGzKN3UqFx#D
zdQ8lRPG~v>17{PQU!FSY)sED|bicX(x=t+*#)*Z_T{2%Ez(P9#v!=evKmQ+K`bJAT
z)QT{>3|YE<(EECp0eD?-Ftj(AuNC+iR8t^MdEpaAr8>{Bo(ymsMGJkqi1ewJKhTNF
z`^u3_U#(Rj7Z);@zmw{u8WQH5vC{Ql#xECQHwQ`Uk>dmh%LnuL!xTi!d3B_4v;Bsg
z;M*{DP=n2Gg?*B&B#kro<aIm8pAck@1RftiqsHT&s`%q*`a>QyF{gqRoA}%pWy-7_
zSM}JH5UF`YcG*%Oi;g==<21dNTV?!WKW{f1VN!qZQsMg0%=Klrlwf?qB{9V@$6%QE
z$o61lb5zRm6>{k;_nao#`R>3`wZJauwYz2}0;r!{k&p>~oGL7aDZ!D#RjVB{RxEJl
zU#{KM9@XcoRh1-v<VKYii(|8WRnyEBM5~la%?&j2N`gs85ez_hQ=5yZBQnPpN2ijK
z{;W(YC}q;XiDIRo3LLKc7NU!KtW#tU+VS)Z|8vX+-ee37jNTgbj4hZzV^vGCejMA5
z<$vr`t!e&bbTQ)l!N5}zElJnzib*`bZ|M0HUPxK)+@waGbRi@JoF8D<=-QpyO+3&C
zB<fem!ZP);U2UqM9uU40@cR`t-_7mwUZFOUIjcWGb~Tm!m?X#12SuhBRG<L4)WyOw
zX4|g(Ks^Gis#^ADU7|ieRP{~G*q~jGE;qsAOxUqcY{J1)x9#!xAV{M0@pwiN@^F)X
z&?6}A=gec2dxAFHA1UYj#okQL_b!Y}PVE|9y0o8*&T-<OiHt8~(=2Xd4d|^e0x*$^
zCC>NzdLUV;fQHo~l2<(!#I@=H+TpXFo#@!^BGIqy=GJXDXJ7RwT$cg(PC1kSH<!@h
zI*{$@;)_ES2h7@P_G3UVhg%Es*1GInPIELA(%of~-4>@YN@_$&u&r6L1GCt!9u7|(
zQ6%xJd%VfA4=w7AF=V7iyS+Ks&D&E+uXPBPI<-guc9}@DG*V66<{JA>q(qwx@AB9_
zX0g+Dg<vy4-)PdTUzMWY%U=*XSQ;iA6W!S66#NKY3=5WUw&f|f=g_u(Vc$yN6M-8$
zq<pL8=#trqG?jKJ(lb;OO#8C92a-mJVxP8@%{AM@bvrl4(vw$gpZp_vAndk%X|=k@
z<X)E;&jqQ3O{~Y3<LU03h-%zbRjHZZRlOF=rIvNra6INW@h%$=$qQS5|65FD;?x!5
zU+toIe#?!PBK?r4=^YD^!Mqh6FFba$m@ZT!!u?9_(x${6d!WX55=%v0A0L?+h(S1>
zcwFJQU?p}YtF9kd(nUE5_^9<@EM52MeK1GNlK6xYzpIo_f3EL_PQH$CE;2y{`O`i2
zdg)x!uD%0xq*tp>AxFC@_gYljghtzOZ;D1*Zz;CT6-CoMGu@bLPMwuppkg8yG+iS7
zCjrvOmezX5<++ZZ*6mGw*nc6+;`+GX24=dQzxfym8x6Dt#@FJS6JNRJ5R~;nA4UzP
z6X4r8vrLD#dIhmOOs`)@rTsSGeO3LGbOp@m?%XYCg0bMO$|du9&}-Ue-Z~AyA_sJe
zl$4mrfRn-Zu|#G<3Yci~Ta<<TTmq?ZulDx}X-@tl(Tq`bSlW!u{Yp8fI^8kaZNKAO
zjbKfIzc6)f8wy~I{0-7HGI=i=z41yAmtp32_lhb9*sJy;uEy9H2z-?t+u?4ksQ|Zi
zK6KMI3^g={1cB}&XU%_quNJk?0ND-y@K|pBJ68#2GpMRD<K$tCP?=>XT59y87(k=E
z*}8#|GiURM`e|5+qR49Shk7a630`t^3{>$GHlC|a7n%;Xe&~#zc?h~4@T_Iodo2fx
z0^+g>k5>94|I<%F3F@k53Qu(4^+~)>U#IXCUom+m$uhrANNW{t)pSz9<qATlDmfW$
z)qV(v*vy<@?23DKXlIhY{r#jwA>k-r7WE^bGrys@8%ri}AniCHNNl}7o0cmR&4pX?
zR7o<QWNqjg8%t}>@G-J|10oK}gWs4<t}fu!Z2&;82O-;Kf62gqT=-XfqLn7j9ud0q
zPh14+{B1|avt?uU5k4piUq;PBW_HEf+L8u+P2t{P4(?pLN9komB`2pjC5o_^$e##i
z7I2u~lB7mCgLG|~#jbEKSo0yyCF~Yy;7lVp>{mn$Y?p?s!kJ%4G>VOM$!NdhyOig`
z=HMRm$2aRHs*FSdl$WaXhX+G>jp;4TAXQ?ReK<hN^ah`cY)cm<UwcMu(_i3~Hvp4G
z#K-;6a-Z&^eB;B7;Ybic;7a(K7%*7B{4@f&V&J!g3>o~!m^O(q-@7?8sp2edW|k)n
zl_^$Ktix0mQJi&b-39K1o`fAe%&d5?#to6tl~hUgaG8=1_4==Iw*NM}em%D-EqT2>
z$mi<_UqceMoyycBwb+l%;JhQEyKwKC;gE!Q^8HcWC0721L^@n|J2jl(%fEei{F<qp
z?!dAkW0m1`gxwl6STc1W=cQBKs{rzm)&>JryIJq9<qy9uQpotd;3!aqk3t2+lWEO7
zzG*eFic810Ss>}w^SdH;E@z(<)zE`)AK$gVvbF&|qPp!;_2sGQ$!W*=`}j8j@DeeE
zcmJtfiZJw!Re6A%AiNKy=!CoV4t@Laqpty0hp-%9nLZmPkkXmdUAoZW+w&aOC4v;+
z)x-0#2`d0j{7!-`W&suWI2NG5Gb!w&5EmJO1Y6Z!*tfBTk_zAs6RX)#w15jc01<f@
zvc*O;yu<{$Mr|DV#r1V$<Zl7)`ugKpCpMkw-fgcZ1=Zvtr2LW_*J+LIN*8YViAqVU
zM5k7&0NV@i2nT$H$06)gp7&8%+@&)=;FszS+-qVp%s{>M2#d=O%XvIa^WE^B{e@M#
zq;I>!bNbM)_uAA^<fQmCO!Rls_gkOXu5O?8#3$~2k7f#*s}OT7dwZ{?f^BhRQ|q9^
z!;#Yey-#@iP!^In7e*0qeSEUri;W*|m;w)L0_p#Zvt_33WQq>|`YZuF5g`Tqa&{se
z^EL<yRcu5esk`mA$DI>KwMZ*b_LRq}X+Bu9Rp3}7*=xCfB*SBwS$Q4KyBEIw_{3>x
zw>~bH5eCVkPQ2^EzE_ekT(2+jw|GQu;^zO6be`dCzF`~p@6f8!ma0{>_ExcXYlI-w
z-dpTYdsCysu7s#vdv9V#Q6pyU5yU3;-t^7;zTa{j`IP6#bKlo>Ugz~YSE8HIU!eK;
zu+OhC?<lpDQ}tZ(LCxoM9CFuWbJ<htF^K`IDIEgF8L^f3G|5<}P~+<~V@50icq#3N
zv;~Q{5>8|&rkgVs4CL(%-_TR)UB#zVbJd<eb@&^d)ZV_y(OSVGDTfL7R-8x#o{M6m
zS}+yAwbYYrMv+@%M)B%S(z|OZjja1XmckeX1KmS0SK|zW435BlM!-0qg+skrIhSyk
z{hNfX7vJ~k`;);1Zo|D2X7D!k6~9AVieEjj3mYU*u%%30UF<xpr+%ofSb(26KVMy!
z!q4IZ6@=_~#BFP`4o!GViDc#8D{3@m#A>823^Z)ac`yZch<FYSXW~*FQficqq0)ow
zz~JP^UoR7I<}VvKWi<W~s0360BT)QDP|&tTdF6ljkKlA;(f!8kfZ_ASLZ;Em-Qyyu
zpac!Cj{y_=I%&6N;|a{!xa#N{rMsek1Ostbczl6;@Ye9H;>EL`;LYGGX_?SF@&o+3
zOy?3x;2#0*;DTv6h{-rzQCWiG6@L?N&G1Rkx3Wz<>87SWNV#bBAAz)|H2G3fJwxK-
zK4kgj%+*JKxr1Q_t%FHQ(iHAsp2X{=E6M5%1>RUGmoIpm!Y9P%X;VXNz~Zg66gABD
zXSEs6_zq3LGX~YZp%)Fc^)+fVka7*|+)ChGa#?pqpUgi3R$!A`c&I>wH#jpG32{@4
zp6^?RWDD*UU-|y8X6P{LVGq+XPKN5HNl(l2tFr^X9ldWBzUj7YA&Y`Lsd0}JVpWYm
ziz)T*pX2wo&BU9<gz<46(0YV(W*_d#D?!n+;=CX3^ZN8|RsMU|_#bai%Qf}4>AfHC
z=M`wb8k8`b5(rENspt!;rSh1YdCo~+Gz#ME#<Y<Zcps6FvZP2<;#vxnubB`&o{`_@
zHMeved1R40?WZwLI<bM5t!;Ln#Bf4}WDv|QSN;nYrkLKg14Y%kn3o8>%zS%{%1tt&
zv_v>MOZU!&+J2_+$XB7WL^?hjuza`9sTOlPDC`y2)uF0Zke6g-iC?Ssx>bK9UYb)s
z5dQhQ$ht|51CPw-X#4rq^gt6=E0%k3X4p(Q7Z3*c5RYN;3{_4S;O7<GyI%7>&{W%{
zp00izvx9e1_tR~D&&8@1`BU46!L|QHpKU8oW22{-MgBUF0GO(8gHOLe1&D-kp9Nlc
z9U6PPxkMrDnW4G+3aJIudOvjCw@!qNv;Wvu2B+D))|0KQbdLt2EN8kdOIOFD%fjNq
zOyS1a`{<#W(iX~Y^cO$P>8+YNn^BJt;hE4|*IejmDxNKs@2*!oT^nijoEAENXpoy;
ztGz%XAH|uzv%$vlw>EN(MNdIvxv`_#hO>}~Pkq&U`Pyet9BE&!YQbt9Q<~##i)Hzy
zTK3DD-#UTEUcwyV*j~g3%eHrrOv}MXRM))LE_#cIkJhKpAxr_LOXk`e|7teb^F1Hr
z7T=klRMg@Cx!9Co9;9mmwss)wg{N;>S1>mi_GXCM&XWl0%Gu(iM|-s~qI+NWb)r1`
zjonj9iM{L<wMvv+&hXs}ccNi;UxKH6u?ERO)P6Onv@$*)z(`?G3y)Ri@d}T7xFx&U
z?vEmF{=Nn7_|`hH@^vW9a{g(s$XA9bwukxAM%jO?6;4g3Om41i0Es8|W8Qc7Ms#>h
z{cX+~ayZ@~%9tZqD_EynQespTUL@T)xOuGz1Z02}qjVIN!b`@D6HqSOap)p#H$8C7
zBTax|aVcG$1xqXkKA;KrHqBGP38M~e%Xbj<HG0g!1`O3=1wqS9jWvA*kIQ1q<ch_;
z=MPi#=$=}Eg=&6D+!WmbFLvV^*X6f);A7U|$w$q;19kLjvGIAKD;9=72gD?<EoY5%
zEqrEjwqhJ+jj>EAi<98lyRILZ$_XKC&>N~*Cni;h=6AJWf*l6c>=Y`r(=URYqb#}h
z{bbGax8;bLj~=IjTNNaF+i~<7ZOhEN26RBZ+y$x5ZYW<o=(bDV%z(L|o(;+1tUJg&
z1VcX42(zV~T^tQel*nh^|5=3%znS^KGM$(5K|0(o#Yu+qrE~BQ{YbO+nd?@-+m~Ci
z^54L8Jt$JLk#gfta#UN+g0xPD`*1gPn*};AV__<@{B6vG{iUccYe_-r(8s$H8<;1>
zHOfL<_q~>P^fLbrB{_0uq$iFEEo3cLH=;ZAOoCT^TC@cH;B{8DNv<o~VKGZmG>3Bk
zy{!J4S>QBg&-28wroLa8Xjgj-se_>sM1a14ss3wxDPXKAo2r~Vs&3cgI^8ypw+&uD
zqA{={(!?K9sByjMaagRl1dEHVR<YX{k>Q@{M1pp32@_RencZyqQ*Slj6~Weu|0t-f
ztN95Sovy;Ox6@LgIXOc!8b4fkitrxS6pw3u<s!SnXh$$p>y*=AZCUy|I4#A=yy;3R
zEHA!=f)~Ol?g-c26BVLrVbvDwQyU@wN5K135D*m!jv=mJlX&>;?ik!mv8q4d;Sif%
zFX6wyWguRVur?E%;4+DU;)RwR6F77ULH?bXhGFz5k^7QQP@6W8C!{X9j43;2C;!KN
ztCKcf4o#Mq*Hk53Hqa*D9%9yUCT)aT9v!1y65ddiW;r@4UU&#)%C(}dNl2ZZ*|GWL
z8B52M>)F9n7$QA5;oVizC*IA`vbs%u;ulCAplFug$A1#vR9_h^c@iix)<2o<;oCNj
zLNVal)Nb|a!z)B?W{#TDL8q+4MJx%8?)x~6o0^GJ+ima_+-m`CSUPy|PChdL&iNEi
z!tS;ev9bu90vuSBygg0^;^T#^F@I}`R|Jj->V8gZyiyUulVt(Fmac3}vZwj>lWQNK
zecGG;=Bgks)`%o*B@Xao*L|#jM{lx7H*F^aOnZeiJQ4fw$4rv*rQ~O+vw;>nFZVR2
z@r92J<&f?kMUDW54^sREm`6B;O$~hYN-o2^d@UB?2e#AbFncx`yAQs?pOe;2OmJIP
z3v+6==)uGpRAbV?+aWP);`4!8=Cxmsu!Nko(9lZj17$4XWsq#A^W4%UPb&6NyukW_
z^%L)Q#u7GLR`h=>#Ddc<!)d+x(162~&F6e+gW11Yk1k)AeUr$4%Ke&$Bz61qPBKLU
zMHrfVn40^$v`D-8{q4_RXPoXc;xnH^sHnx|yq#ds#kgR(Jo<!y3SPe@`X@0XBZ+5&
z`3@moe`=kc{yYpJm5*pWdZnunmZVoUhNBb<Y+h&EV&@K7#?Q62imZA0hhN`%bKkd3
zc1c?u2%Vx%_%P`5W2TUwU|_}4NgBjt{icy}6~J=fH258#@b|vO{6jo#Sk%B5@*sc`
z80|AdMQS{RPF9;gDi*NV{n~HgZv%bua2I|-4#_6&>m8auFrOzMx^p8ICW*Os2Ke=>
zdE@P`P6$L60LMk%J}qh`mylZwV)Vz2{Ug}0Rk|Wi0;<c)JiWIu-e$J}Il-O+B;=O-
zp0DKd{eMbymV%Cx%@?xOx4iGH-C^b&BRxfjbnH&`CqsZ!tLHVtGLi6Fuvmv_ZAM75
zm_q_4q3nE!7u;+fgw%W!!R1}K*m6aZkE8Rkij?De+>@{4VzRvrb13cxKzZNy<jZ@i
zxZ&J+mAU&9tS=c`N#4gI^lIyUT1@KfCfLH?=W556=@PeHc9ZB*fpm|mZf+w%N*0y1
ze-8IyQEGjfJ%aY}tWUjwQ_V?UXRq@nyS3%hVxq*HTZb+M_n$&mN7{yXJ_X*a*jvhv
zK|Qz#9kX+xYJ5+UY>d-P!sh|;*82N_6$0!{<%32oQAFF2e0gMvzZhFT9aT^MmMBrb
zYQzUC(OuLAYNiS1d;%??|1JAIq=$HjsE*+wwQ;E7kBpJYoM=5tIh9zM%pcV@UzDo2
zph>u7_2^P^5Jes?8zFHk)`q3rYogIq<&IY%ygA-a4nPd#&p{JKw-(3~6g{SMyLnJ$
zp=j*V9aqs`*v*<~kqbsbapIJY2JU9hgc-Zh%5L&IwCoes+Q*78m>x2d&Prsy7{xQz
z)of)HrCjZEOFN=-ypB9{Km2D_lw9WvOo_BX<Z;SD-#AkNIjkFUXXxZ6azp(hna^5`
zRFo#|g(^0z__UvRKK#2b6#8}V<WpuxXQAOkC8zAt4%4IpNi^{ah10S60d~M6ocZ&G
zr0t`Vo4^qpa-?tGL2FPXIUSGC*^pLrG4`BVQ%@1_F|<dc8AKjHrunx7))oD;e}Z~?
ztY&x{7H5-txN^pv5vI@+*x1~jdpm05R$a@@M)LO{3|3@r#f4!>R1-%=_(5+PbBjKz
zE%?zAIgF>otorQ2@MU>BK6>D5sw`mMF%=_lbVZoJ&^{9Fz~rgY*c=6o;Vg{jDV73c
zyXoG|G<!myOdX%UU><+7#ffDL=xpka?%N@#SEBXZ-JnzxM1b=Tc1D?DLM-P2UR`y1
zcqkY))Bt;cCd0_MW1fxbYYXqQ03ZaBq{ES&kxeru;x;Ty6YMaO{%QHaPhdj}?qAV(
zf9HQU6`~VPTtDc{QXc4i)VH4=h%U49b7$g;(y4K_7ooH(>aopcpb#GVg<y~0O}$mU
zj$ob;D3dGM0_zNlO->ln^A;BV8DU{~4E47-R;ZZ%Ike)~*l3WkD<BxbUKY@z>-dZi
zu7*o;ee0So@u>OsU@xM@dV}kpQ}<r+?x2nfVM%P{_il9!xaW{fLP>jCozgguPGTEM
zMCHM}<O}+Yp8yhB10zU3;$c4Sggjcy@G%y%ooIRhxp`eMhL2MOV+Gjz#3Bj<;vcso
z->K`6Y)A^Vp*3gZh`Lz#bc-=*qn<gECaZs>DAcYg=gvDO5(N+Jy3*(5=PRTEKJBI)
zKbeCY8JCl6G_VbR)mzp%A7`Wbw9kz8ACO5ozS?#yk)B3#NLpu?6=s3eHj3KC@jB8U
zmiuEA^p4TSWq-Sn^Dh;YcD=q-&6Vn`W)#i?HE$Ain$|62*gb5>tmIP`TV`Qqu+mis
zr7N8x$~JK`RXdVMBbUz;i&EpV0<HZ5bv+S07wg|M;L=R2Vm|1P3-H-E61Gdxbo=p$
zGI1;yBxX<9#KcuWD46<76P@#TPb=VfNVB#or=+X*ZDC?{VYf`+!aL7tGZt4~0y^a~
zz53$2hL#cz7b-HPJl*t?Cpt+m-TmKJ)81Bd?iwZ4YAL_q)bR~F!IGM;`Y(%C8$Hn4
z4xP@9FR@DeKH@+8Jek!u%@1P4v&3D;Jw`vxZlCw?PIh2kP>wn<mqyL;`%Z6jB=Fya
z)dmU~v1OL?$FW(_IuA%G5RZ{#0>~q491eOw3sWee{>Le6*^8zHU^9#Ru`@+h3SIWm
z1(Wyf^0C1IWQOR5+SBTM)gr;!Ej^3A8xIP~Ln}AN;Cco%j70DdD0i1EvPHnbsumG|
zpk1wbP3t=JB#LV8ytsbQJk^J}$hr_6K?yFuET9-EDb22%cY0SW!4#QaRas;?NSgC7
zp&|ygFjG;NnxI`I9e0B78lT|1SB6`>qE*ti9oHTF=pR8D!<KZ#QX9!Z8$QQxF5sDB
z!Qr<K{3lPL@u|YNwb(2F+nTP^yNZ&NL-i58%wff6rf%WRY=@awAs2dA!3V*o!z<XZ
z;M}&CnaeUEH*CvNK}k#h2&l!3oBk0zqt<5g@RFp)ovhB0rxdKLZqw$m)hrSS(Uvrf
zCZ$vC+G!0LxP}uwmC`!*J-gaaqy%1a7G(R~(np5T@$vZuZi^M>rvmX^xtiKDpkWTH
zcm=jGIqkJ631@6%3?T8|QRWnX%QNPrc2_T!^7>zKgvzhIV)_z)iQ5Znh@MRGtf$s}
zg($EA<CK)(3-fuM2KhK~8_W=C{o{o~CB~~CEnW3d?WNwb6eJeKDyjatJlElcPxL{J
zbOWQ`GGh8j`eyuoF<V`FH%nS4&ZPJQKX+<ylt>lMVV`J@3ZCk6<Zsl*iVel9&C+=4
z;eo&wL~3t$eU0jXrJHKj96C#qE?Axz(_pv3{^s}>6WD(Q`ni*W&{F>x=vBwtsy49{
zZ4++N!?=TMJ+X<N)O_a}c<wss2v}$Nu7PFMvS>stN%^7Fe8u)xnSv}AB7VP5_97-J
zK7&}B<(VvhGu}VZQxw)mNDm<<Ky5$bqOWKbZ--Fv)#XlpJ#Q6I%a7`Ko@O&78=_es
zZ3xz+k4+(yj^Xk$LhmQ&x_N|xS8>#Q8wp-~3)gGfvGFkyu`kAKzBuu1t_eYFJ)%vH
z6f)UEw_v~8G<rA;CAtieUO1qpoay&->)q7Ipq%9zn>_ZbPwuC5%oW%43{l(oY!cH1
zkCVY{$y@jRE7P<<s>6tzo?E?JS+gzE=2KI~%A}sVAyR0~y4_bjSUcIJdOnH4o82$f
zEP83LLw8a|n+0h9ZYHAmn>ybd`^HGV5b-Z(fUz3?weZ;VP`>aMAZaD`APQK<Y@)do
zQPe(k)jrc5v+dcY{_C^{HfYo~iqe|+r<0f3Mi`eRaUIoj7k|in*azs}#d)%9%|^5$
z(xLE4DdHXBzQQUyhNQt7MPxJ`^pj<bwKoAo`$s(=l6%GK8uToC9++v%f8}cKmh@Zx
z27WCze3GQ<v@(8STGeh_lN^-xBi}Vi?bABPPZ%ORazD9=FU)HtjUKd^lm-ahdHNkI
z2VnhF&1z=sX2d$%f-fV_etVF3e=3iXU-z)?k1wti`~6WDvr6%!JxsX$%gK8b;6liU
zSYl~L-)WHqHuE`jQeJsMHs-4x$;rSBtzW6k1HT~3yxhy8HbTEZcFh`BDW=`@FDd7L
z2)uunPQ}x@qaNxy9)I8LM?}>0A2QI$E{*0bxespn-F!kqnj`@U`CG7xBuN;4fKCYh
z%;(gPCS)evyKmDD6TTm25N`sjkJqSvZrC2*tg~eQg7<;b_g5DyB-Nxd=`T<hGlcJF
zN>>P<_5&f2BnFSI>Aqp!io9O;dXKsUSfGGjj-(e)w2j5-oXh8>6Y|amfn<L2Uf-62
zsim!`GWI<17UENCgH7DsGkm+%T#SDM!qRh2B3lPa_n&p%k<q3LtN$YiU+^?je|$oc
zuGCEne0wrD3iKk?MdJ<6WTx+&M&BNt4NT+V%@iZJV~GIT4GzD*vC{@?_!W)J-Hz6|
z<NhtPlkA#d-g>RVtK7zJtpwx)Go<D3$)yXL8im|0#$p+h0%K^Hg5KA*jKC*7ue`2J
zi2mq46gg`XD|nQpsm|j0mUGm8c@$ZZZ<X&}yCx@~{S($&ujMV6&a3K()YN9h<Z)h<
z)n9MOWOQcoO|N@0Us;g=F%?oJLOPcmJ(#`rWP4epNkzT0|9bUpp^@um+ebAK=#_HL
zoxsby&#NoD+5g>RHbNZH)qs8N#$xRHU875dh4=yw>v)n&YVDZ-s#%bxV4+5y#aZpm
zx8r_pvMMG?ZDB-#brtI!rt8`{QvZ#=q=`K(5rVGLLRUVB=F!wTEa;k8r}Hr;oyNv#
z2}3`%ru8SJGefj>MBqGXe|Ut~Zrvh}?9qBmert{-dF=torL4r!@S}I9J+hd2@ZEv{
zA8D7SmmJw!4*9GZXlvu3H_*6fwD*+o11nqUgrZXpFGHo4>HbE0_KTC=dYuewpN=)J
zN_*i$rh>xLjHsoJ8~40JkM!iA4S0pzir+loU0U*eza+R|tFOZ7%cQfrjhdXpc|NtQ
z&?AY=axc>+A9FnZ>Ex=CO7&p8Ex6n>?0~{R{3Wo9m{;0eGNogtt^FNdt{BzfCEi)1
zDlsK0SLbH^M^Il=JufDquK%*qjiqgk#8lbwRfQC}$%(qzVw049O(M+4A@xikVTs|D
zJ|cc4>34^?{wdWMs68zYSLYU4m5@*ov9awT-pa1sB#WzFNlQuGcu-w-pT4-;l-Ot|
z{g^{DheP4Pd)Q5nlDoF4eOGH%l#3k`HBMoM*7O<edU5T`!XT61B{PRNY2?GKP))B6
zRx}G$YyFiuf{p@Nn(s=Y%9-N5aFB>wY&dLYO-}!wWoCLFRnjZO;$*412F<FTGlI^3
z7^Kyt@lJY0{|=T1rt!7UQPyM}H~K7i822)y?9F94LJ6%=K*co_!IA}{k0H@pC%(P5
z(dwivoMov3^C;LM9DP;Ej5ruOxbr^noa~<s`FZO?M7ZCD<cWC(r`duQhN|+*lFf*`
zJ7Lc1EG)Wzdy2-HTqkI;ddHnRfXp55xIaSlsdV+NLy~tti$>2Mt>PFG#Yg89k8KMa
z{&t%fXwP@^rFQV;gStnUZHrH~CQupt-P6LU9M;Oq_60rEKg()B<j-ZNV+@ZVEI`A-
zhv6SRogcYiiFg2go&N}!hm5XriZdPwQ5uR+U?DoYOdoxEh-M$D$^7{7!jNUC+g%Ul
zkU>q3!{8ib#vZ+zh_+mHP55BJj=~F`8%~4bmw^313XO@}1V^vYN_c(ngB9)RJ{tI%
z=Yy};lGMxys;=4IAd81z^yT}crw*P>vRjdb!!&XQG%TxPeiqfyIDT2-J)-FcYzguH
zBQU`e*JGG-!rr&m_J(!a*B)@JJXHOz`(*)hc#hWm&5V5TIX3>kzEos2uV}tjHcyfI
zodI_i-rY|2e8Qv@y>!3sN3@UrU#O?odhsNQkUC5EdHk!quKh5BlI#+67L9tJe+yN8
znk1S1(l_?)2&Q_W(3qC{V0((YiEg(OWj)U*O|=-7JlQUM{dDi*dJ(~q64jSOlbV)_
zyb(Da?3x08LP?W+&>U9~A)QG~XR>dhB>h|GReR?P^5I-AOI8|IRR*t7qBzCQDGaLz
zHNU+7Z!h)ds1vN-);e4;#f!JE6xOMP@N&`S{TTn&Q>!g@2E@Vnqc0^ZVTMvV<K|fB
z8&yGNtO@rW_&>`Y@LGrwHJz;CcazK%{1!3Np%2KAWJZZwndy?#6!}JDLX%tFs?j=m
z-lAJ)epHsu!k8{l>G{#SosCnr$Rhm90t<h+Kz3h_y$`o7UBMW}v1xrD0CW6rp{r5l
zQ>`JlfPRSqmv<nIaG9Lvb;G>;tRFXSB5$|yIB43<TU&GJ8?Z=DH3eUeP%B6gYF_dR
z_Iz?O1Q^3@U}aL|*{%_8Mjn@3!&t$K&FryGMR0}6`23N}i7RyCkM1iCzW${0zsn7S
zKv7ql3vWpGRkVfw%ej{I*N!$YjVA846kLkwTAKfQCI(K#T=c79hzt<}0Joup51Pb-
zQo#U}ZUOKYv+FP}rK#j^!HRq^Mz-|(I}VnH{W$^BC+bu}uP~yU$$3e+cWihoeYtyz
zAAU^0LG+BgN_dC<Ld3anF0e5xnl$&MTW^W<HI}XM%^#B_cJ{%_`0a8AiqzW4YU<hE
zlkiYQaBI<!GtvWUxatw^6W#d80dp6yk`OOE`8<jpmtaM+Mr!N{qRp>-%bUaby+0$s
zYtWKwk<7=9lS!<PMi}1;Xek#?SW%hFSB=%*sZ$qT2*WpV1)4x4am}TTt}rMse(P$`
zPcv4w&B=Yipn*TOPDstr&=(ILpi$4!nVnexs3kMr1C`(nVNMlu%92OGc~PbD*E+O~
zb;LJxFHL3w#kP_hh0k0&A|%BX!Tb|E%#eNTL!Zj63ac?D_6HEE(TStwy<Mh?>l8Yw
z%I~L;+BV}!F}u`!Z6(||D5<!1|7yKiutM8d^Ymf<&_9A<Qej*|04RxMtS}Ct$Ph5<
zNn<3To$+G2VT=k!3B9(Vse|WQIiyliQo+)zeEm0o<+BpqLOHdJ$dlD8<g?ukuAEw3
zw^%c}sS$>2{VnF;Grk?$sVlyMpp;t>G&AZSL4?fs9DWkV@~B|%A3^()8TGCVW33!h
zbaG;~lc<}^J;&p2H#hWi<Zd_FJ)OmtjPO*-hbBY`qMsa&I<DBT?=X;1vAJ774Tzbz
zE?>n94L6@@T{qnsoZ+r;5lb=>q%|cOu|t5`GTwEV&%;{rL~%yz#Hz0k8GQyY&rx%W
zPs!`d=PQj!Bu2g%<gmM@EF-><?d``4hMwmjDbo;74+qzc81<0iZ&r|`zZskc>q6S2
zBo=Wm0RFT~AQ<b8qkvtANXMUKk0!U9d4ZGmT=Z+UgI0=$h#&z;a(R4l`Hx^lWY2~L
zXPWGyC>Fq=eN)7Tg0x2rZhBe`u$&D!1DH~jydwi^IC(p*gDxb<T@6gkKiM++upy2q
ze*l((K#azvBbC;qmtL<(pP4_CT+1>0?9<6i*$s3WvW_rA83~3weVq1v=xI!HhrFH|
z1Hf8(&*6o;#}_WCp3kLcrRJM1If>H(rmqM$xQVLXpoVOZ3<0-sqVsPgWylYm_UL#;
z&BRtu106e<1~USGJ|z8Q`Q)~e;7?2#&1>!-kgC6c-$?4m)*pOcr13FBTF?)ZWeA@5
zqMwk?B!vpgy99*PxQ79VSN@YPUUZ0AsA9|+-ReI&S-q{&NT~hZaKrFN<ZeFDc=jYv
zal~L!=Ig1oUjex=D#fI-rZkus_m6<VP-VwvYP5XXdiz-?>Xl?y5?(`JaF$VjI1#@<
zM;fPEc#<7hQyi@uZ!nID<Py;)#2^Xg?^)}rk;L;+`liFg`dmZYe*_w{u+T*+rWWDn
zz|Zd6I|SB*qq2!t<M{Lr#gHCX)<I<(M1d-Pvgz~&H;Q_NS65Nz7*S<6#Eil4&4yV0
zH1!H0u%=tLmp@Bsnr7r{niKjOaNi6zII?tQJL2Z~C9Fn|^RDYgkhC=Z1Sm0q@f{^j
z%m<lT`(=K{_2v|A39}5MF2_zNa)0W&-rO;zUU+0N{8X0CLOCCtN)h2|IQ3dz+m38|
zjJk)?K0g0P@N})Ka}S6dvY<Mp?$jsAJH!Ci`blO2v7c!EPCef5*MmFqnNKNzij=Hb
zF_Li?kcUx^R|rvw^6{)^-dGHCs#hdt693-CM=|r^#WrKwemTKJ-zO2{-X4j#h|5e|
zipqbl{d2yWeT;^%ejF+O(Kb=q5s>&t8!>iI<I?7*QL~PEP24F2uHHiaXjHwhG{nn$
zP<eT_l-j=Fs|i_ny~Dn3oI{6r@m&zu)t>E6iE-P?UdK#Xcu-L);bMxiDvC~XR8Re)
zuz*^)8c*BVvB~n2Kg8~b+aNUbF5ZoU`ZkKsMLVj(>{;e%h&UK{bte~ik9-XHdtRaw
zWg|S!mD{AJEqC!lbTFH~qLTfDI(Upj+*3J3w5wZv*GB4i0JJ6R%9F+mEGEVRaBI(y
zYNNe?v|8W?nmeH5l~aOfu^;=^sW=`k;_N4csqskW5dSUGgIU~v_Q$XC#Pe2p$J@O$
z(B4$R5RG@1w_Mn07~r(psAv4*GclohUS7!z!*ttrySCeRuBpwPXg6p1ey#Ds_wrK1
zw~G@r<!DbpnNn)j5V)j|6~_AUGa20)YU6ZJOj+$mqhMjUIQ-JiwZuCv-^zgMS`VJ2
zwUlG&=km3D_;#J4;8j&6NGii6;+s|h7cOe-$(j2%A{Lh%Kt~0eP+B%9`ti59^LN^)
z5m3+5jP(2j0<5j$8M<KQhS=9WqWm4CNV#GVDGbARs%z<RLT;tE!I!26UYxJ<veJ@Y
z^{6^WNjLt?t;aLeT%m@RtUr5MfI|y2O}=IKqJ2LKw#WA!K~D!Z+*nu=zA%Zl>^A$~
z0>;|;cwojrc83>wRoLw~iYjnER`GoTA)|DbFIJ%5!*(B<&82$~8D2*3+F4~_KZ3F{
zF7nsd@8&vC<`jwk8Jnb~v6l}v>ijKSalbaz$|y_?msop8(mhIg$|8)xU|vhTx*fpq
zY+5!KNY(J_hWP7-$Mp+6{3LjH*)X_Y!)buV797&21CDWLgM1njfKSkCwb3m)wQLl7
zf%#CC=^SkwMqA#d-HIs&Iqg<%0N9P~J=8eI;zFS<DQ-im9_Ln1=31-Vz#rYf)`C@@
zdVXg4q=i*pG20dt3q!#x6r?JI8K3^<7Xf=mI_Fwa)Zfn<F(gxWYshlDK=#9R+CpKD
z%<rNoX7cxtohNDOrm8A3VZ#cWiDqojnuHu63~DH7Mb?{oaMs1C+g+*2KM^qT45i`y
zx!Fn;%99gdMQauKvy4esxh%kZ);S`75{$D{e*9=1gHLiip3ckVw$o2@0YaIp&%ni(
zFVga8b=Ka2p-~-nih5J7^iOF@4a{C^6j~1PIb7SMdWMdHQ@GPVYkgyd&E${ucJmfL
z3WTw^!hPP&cHuKIfgRJW`*BoFJ~m17T~8rJ`5ICsPOn}GE06t3rUrh4;-eVP2TkW`
zA`#r}FKtvb$Wq0wtg*U)L=V%=r!*vGiv!JfKgK!*T{Jt4Ok54h#%w0I0%P!$F$vm-
z7U+p=l!_IZOMwc@a!p_y%}zr%Ic&g_)&-$yEGu?aM#tFiwRt~Kf<g7wJ7X;$#jA=t
z{DFK>e_(bMfe-qS>HbG>dH>MtRDDUBcqRDRW`N$G0GXu<W$rft5=?8S*1c9@xRbv}
zzQfOoPODE^A)D+wtE=P1aYxO}af2i5ML}hk!&xlOu@S<!r>5zQe?<o8uPvCz+DDz^
zNBN1Qi=P*^VAGgaPi&+aEqS6XO~UoM9VSvI2dT%?H4xYI?d>uCpU_=a5AEYVCS*I9
z+$EdZO*Bk}Ddw3-$@8p??zaiW(SSAwF}?VMu&&cI7zt{cX&tjMpmnYiaTvp;fCB#%
zZQ`iE^uaW+yDK!f^jGg{2Vbq^%a`Nw(`W=#5;dI4O28M3w{B|HKk|C4#-pBTTgyvI
z&`(!fDG1+|e%IEUsQ2?KowJ_gVgq$l;j^C{#Ipq@Y&2JN%YK6LAle14UKr$M3;U6w
zaE{ZPXKIF7MbDJa?k<>j#`J5o6YdvT@zM;k;I~daM*gyTlvDWoWoUI<2vIr}iv)Me
z?9Q{X&f^9ZOlOMo!t4@;Kej5!reIL^xfvv^2yyWr36{njwR5*MTzu9kQ4ZfNGrn)a
zjVC5-iiB)tHLcaNG#V7G%VPGcl0Q}Vi}636djph5>Y*T`2-Anc3&bTq6x}uIZ6AUJ
z?Tuo50sRG{Ti_n6Os6SYeWBMmLSwkHs~C)L|Ck!sprJga-F1Q;0OLoT@9&0rZ2!g+
zuqwlsxiQSUsPmJ-*=-2lp4S8qXdF?b`1!ou&@uLFxnS)8m(+?8|B;LtwS|AS3z7Lk
zR?HJ`)VNK34ahl)<&`)@&b!6FpJP$80Ag27CmxqyPvfq$I_iz+ieio<IFO)fN1@q#
zAXQth6vU1_{2NOQUm{}TzlKUD`ZQM{NqC!h^{SY7NLSUaz0j`BIj-lD6jaZX%FVAu
z>a6--#i-DHp#6c1#Lcx)x1n>h(*Z@+{85xOi>~?A4m3v|;YNtUI~SRLHs-CE!;YL%
zzu=z<6Y;>Q?iaz+JN4>WxTvCfjo%kt{)u#mpj|rC=oK-V>^}=Vf_aXQ%)kq3ur0Z)
zzn43jvgu4(el538&u$sAu3(M!3?jE?w{rgocH2}}Gg10xm$z}r9DSO$9DsyGy~N=%
z&#=pc+zmDk38x5URD*|QGUeBgsSA~mYRKGc+RbF{600Y>FuW2H&=tWa;hG3%S=mnZ
z%X!m00nrXm9H{pZx@1{6@F#&ars_dV;GS2L;Iaq+CYU<a#ljqex=gEn*vYOzHobv_
zWIYt^`Ordm6MLD7)@XM5LUQkXw(9R@dYl-ezko|XEr@V;!J=O<O1`Yw71sW6w_E4E
z?LE@D4gPEc-EIZ>!!{&1qS?ah`|F2IteRJAfgzN$2UQGIpcDN^TNW2w)hs*>olhe^
ztXDVvety#A@=iu|knf&6iZ&a+Cv5hO|H^*#ds2tT{Br)YF?si-4wk8pw*m|WNAfbn
z<E`-nZQz+~`fu8NYLKpYGMp!=`CW3-E69(y@N0|2kcIom3;ti^PZ6D$YaS#gq?mP&
z|6ng;;J;zJ-4ycF?}Q<%9~=cPnZ^#{3u~@zOaAKD<bLup-VLoTaiaQfwd0Gp(1Pw;
z&qzY&Pf#*N`0^E=!m3RdYkcg7%Jcsy(1Y@Adt1Zfiuj!2+ipz)|2<Oq_wV9#=vRUn
z5tE}QgpI~v+IvFlO580xlFua|pKEN4et*Hh-}LRo_yC>?<%kKbSwBP<MqZo4UG6LT
z1TbhCp_z8^D9*nwR;FqP7i!VL{3YAzQ#lW-B@_$BqHwrB*ESbGae4ho=iYKR)bEib
z<ogL4o`yIDTJK(R&a=TcFLpy7<UNk!_*20PLGyeameD(W7LE(P)WqwhVJNqxipOU!
zef=_{@g_&Khu*w<G^)%a=Klh^<GMUwvgID&S7OHfU^{V=O|VoqINNrbk=t~Eyo>)L
zaAW@m?QZ;igUnE&H8~)f=GVvGFs%^8pP8%WM>|=6<@EZ!?6DYBpop~o&ep_ib<@YK
zZmTDeds4TRrUitv^xk0;BRBFEOl5Q`$Y0a=m1yyvNhoIYZgFCbekktGNx|3{HDw*p
z$bTbyg$*I(vbdXlop2Tx95CP2f(W+yrnPt#2rB=IF9M(jgIAx%h?I*2eZ@1s_SP@0
zdH=D#Yu@0vdfCtwmc*Si%$quvV8z}aYkHc-P{1qrxBuB3I^;RqgJ!3@Y2c*)=5S+u
zNEsJn?oM5u#jYui&OYBc@09FqZR0{na5>L`>B?(0afoO&B)CGT5#_ksr%>9OL??MF
zc0}ufB!&GJ;@dd38FAHj>mrUFvUkq?I&VZ?F#c0q!>8c1L$f^@zh=r*&L0}*Dj@PQ
zJvG=lG9;fzAraG-na>MuvFn4jB;%CNBVR|X41t@FDnv8C_}Ld%%jSkGF+#(zel|sA
zjVO9rO!sAxTeBIVF(O&9h_T!69|1e`YRR*Vx-Va4OWOE7CZZ)hZHhzW4%})6OV(5n
z($A(1<hNJhx;)?nMAh=+l<a9V*NmwVqcKQ#=frq1PbPOwd~A506{rcA4OfojPU%f}
z-%ucmv$9@HDR1i)59Z*J3q&p-lTQ6E@EUR5QqQLx_aP`(7fOv@d$eQWg`@v|_r`c-
zatyda7c0^hu>Z`U9!vutdZwsYHnxR#%L-3a5S<K-B$)i208HnmuHV-^>uS<j<%1)^
z9?MdOaOT}Ev6LoVkdR7FVohQ%m@azC!v*0}lf#=|7+xn*mljx)uKCDtbBXT@?vye5
zy5+UcfHJ4ra$$m&zGSOf)EFY4slerOh?neSLPd2T4?Simw#(t?jdeT>gIXjwR86Na
zGEtcNOJAz-B;#`f$+;#9*SA{(T~DwSN~0j<p9G*r98Xni<=QM-i0#+o=0Da9ejD_{
zpuQu6G01(5c5mM(&=??f`#-WhYdy^^EQVUsNL*)ANM}5lXBt62oxe>D<Y6-fi>NZ0
zb>Xg-s}Dy?k(<9Ps9;nTI0Lrmd$~J5-~S+!Vrury07vKM*ll*ZRdd^)KJx1%nTswe
z_#Ax(5-@TMT9_$z1$Ct^&QE@?OYk457BQXJVCm&VitDgk`ZwF;?pkHlVrt}nBr&Ma
zo$a4?=<bk}WM_?=_pA+gx_mIYUh!nJELNVGDqtTj9I~UVM|qj%aefFd-I`;2^B@^w
z3lie4hUwcuTz|5TAN~Y`$Vzg-vLfv0z1ymM4#uqf`b1*e$O_D(rB{Z`UvNb;fmQYL
z;|PPo2uA!2H^6x-K3KDc=I5@eZ0pe{(5$#H+-f}B?-xJssi}M7&fwnAQxA0Eh*8~;
zo2&QUBmKvvep4kMX^TJsiwi_6+#f0@8tW`89d_}FLEOb>Dz`PpTA-0+))*@Ef<|kz
z_Z5C3V64>@;b+BR!XoINP@Q4~+(Sfxx)$wD1K)K{3vHOob2E+&@r*EMS9SxlS9<m6
z`OUmQAhx3yQ<t25i-#Ld3!*nM$77r^dXK#4mxe$s1}2qh-wzvhuh(%P^Jehd==aty
z|1^a7UoUANdAOUrxpXtH_{dWk7d?;3wS}p*B-n=)bK*GX2AnHZsO<bK6FH%yVr`DV
zab&#Ok<z}d&%jy$wI-ehd{PH2AEL+OwYt^XRT8SQSH{NjwKGlem#cS(e!FW#xzK2I
zz>NP|czpuriUn2sm@D7!i~U%w>ZX}3T+rK#Z=C@QW{ZA!z3N-X=~_j}Xq={^dhr>A
z{^j|*L@{nsws7C|fZrJjlP+xxs-sB^_J!A<J~Ar6884T5=qTKM1iQ&<X?)AMxa-Vd
zkzjG&K+%)*7r0_?`UQ8%GJ`_p11&AfIRDY}K}Wiz#un;JJ0MvWzJtvN$u#&QD+zx^
zXE(+2ln5vrCrZ&ajn0gO5+dqL1@Y2=W-%GdABZAeNq5o&r`U66MM+Jo)Rm@hWBHoz
zdxuQC1gU^{MK<<R7ZfxHfE}?fnvT{@4+P;lHFEjAf9J^-eG9}@sEyeBlK<i!z$^_h
zdf!-N%T=sf0eBa{*PFO8!AusrQQ!7ysy>(|IQjKIg8g@U8dc?dzt#9<(E`OAZDWFY
z&e>|Fv+$dsCy4q2f^5bJ6xZw>*fd2U+KNAmr8Qq!h!*10^I<HW7EvTP8Gd^V=0u}}
zAmfC9?^`93HslT+fc!rqW0YE6yWS^55u7#I`EY)jBtNE)KVo>P)mQO(oRDq+Zv4uP
zsWhv(1=CzaaYDZ7z%)KZrlRb7h;nz;Oyr5SWiCpy?C9Mm*R(&vgMd52OlV-kKcf9U
zHB_A{o=GR?VyfIU>~f&Lle~tpZ2t(hZ4NS7Ld+$OO~+07ohz#yQexHOW0Sm+T>~q^
z9(jg-(cy+u{n6Je36yVC=@QY!cROIMW<!a0IE;qcgjwo#YJbVg)E%XhWhDrsTkNnz
zvdL16@FDyK(?5b~M*WuM45>!JMj^X~j#(@akFWwQXdoxMSXh`$(|5$WSW}Mp@lH1d
zKfieT3`|NG^2t~ci8A{vq%@3Wo>&YcSjKgyQP;Jz1$0ufE89AoFCBI4<xr4;_4+Xi
zj2V3+N;ayE5y!(xdbLzrCEPJ0eFq~bk&%z$PRQ8AiKf)$DY%BFy10Z~K|W@V&)xgg
z=4KKEFL&Ah!n*72BGjhfrTVboufWlN1QHNFc&(0L{G`;2Y_~#hlftX@CCV-JNE>Vk
z1CVe0lup5B_)%AY`fZP9*JYr9qN?xM{(ffg*vV8LjUxV0pQ=}6Ud4^e7#faVC?*Hi
zc*Ld2dH0B#z#id}FP;b7RzkVIhJq=u56WKkw&MS%16n?^=#`jbKA$(QX|-Re@J^Z+
z8@^QD_YN;V&{j^;3nTcrA}|lr1)rn&SPKz*t;Zs2$-<K%&Fy0rEwH+|fws?V*atBt
z4>D2IRiqR2?@XWOSt;o3BVsh3mN<6hfP7i8$oB)g$s<m$2U-x7#DPbqAlU8eR04mA
zb)(W3WyizO{c@G?=`Qoan9I`MkqXq)_v%QRd)lp4P%;+Ov{e@A>%mdZ8eYtK`^^iH
zB{Wwr8OqJ7>|T^F;{9JtvgJ(0epDbei`NP<E=4I-?J}cO_Sz!wRfyY2Dq%@<DrdD}
zTe&S^gi^u7HC>_;^?qFc&rGMz^1fslCcLEmZU5&AnWC77xL)VgT08%AFvbIuZ&nWJ
z{0+Mibg>uxL=g0{VmJx+72j7Qx$JF>ANjFd_<F^E!>vXH(VJZ=3j9vZnHjn8?a94o
zTQT;HiW&Oy#CBf57?_Mw9XxWi{1H%eaeJ^lsDU!RAnyAqIB;?mXM5T^_`2!pDo}{2
zxhm}zxLjHruu(9+Mk&e5J=Fe93y*tx;#8QKBy(e2lt>-E{q3aVr$((xIv?)KWB&Nw
z)ju!FUvqo~xwJX47tRpzi9Nq?y!Q-g%S`R{!G${cxn62-%xA(2id5F$eb<YEW-2+n
zdCEqL;#WaQl2sZ~pThlIe+_F}QI*o<WL2e93oSl%32=H+GxXk(zxvl@d>r8ZJQ~g0
zwHOiL()j^fLWx7U2elQ@RnGJUN1pu0M1;Q5br05}_%+gP7Uh|G87SJbjW2^IK%1m=
z>VKrUifZs2bf+~wvvI_9hOe@gvzs1POvImH12989z0}G0>H>XvUvn|}5#|-}?&piL
zeT2fBjr}Cq6UTRb58B(Pot!v&Nz@45EXW6qd=G9cYNV7s&IGGmd3YsY!yGeJCypjW
zPlG0+SNgO`@&W<or?kIx3`MUOmy-+x68;g~<A!)Bk?1xDswSQQkg>OX-!IC;b9ns1
zpz|TZEf!mSFA8h)Va4scxFFsf`ufhl2|0LN%61P?+J&pPZBUZ-wnDTmwZd|$@6ce^
zGM}w{_GmeJTYHZT$8p<Y1<J1A9cI&8dZdVc-2H>^rgVu<nNKw>*k5v94L+8TBic*p
z73)59DJx_)OlX*365j$ZX$8$n1~P2_^oTI7#77<_WNO5VB3_AfJ1@f=-&a<e^+o@t
zm}oIO#&wf4vv6;|H4)wC$q8-^x=itmJ9<%{OOyA|H@p1qq9xa3R*i)_hxViwTynW?
z^{qeWg$9C;nLw-D5Ymu#8$A}Uve)jH#qlRj%izV8J64EVZ_mWl4x#*yppkSk+Ns5!
zTY)@Kf#!P>mEN6DHG?X}ycDsrC_7_H{&<6@H(t^os5~fplIMWN_3^`Ba5+Hg)^krR
zYSVRPGN%iUudE7lg106qX>!y6E2^W(98={V<e2b9bhM7HMnV-HAo=<9jd<kD7_H>R
zK%UEivpUsub@&@RvH&noX_jvfD%+IaaGD2LL&msVSJ0vM(!etI?aRdoj~V&$T<hkN
z`s2R+r(JA_wjJGSfYt2RQ#=@*Q?yF^L{-`W#-@!be{$C#yoVS#HZ+uyVu;u}$zv9x
zXfc8Jr(10GqZ{felHenxMH6#X^LSg%_@1Bx_DtNKToqzyA}fQmzp%QMj1d3wE)!&A
z>Q1I6R-K!~lqR0CytCsiK|L0g=OaHhlT^T)4wN{h)WqLWkzV`21zmx~CEhr7-$b)y
z&V5n8jMUMY!7H_koPqmq_|QBu0bTY;aPg7N(-KXn(M-?zK(})Dh==Dr?jOP|0ZYa@
ztXAs(ncWfWgLiZt>298u;T9paR&FuHJl3P|&X=2S>Nzw9M<E^~kp&mTA!Q%pxPr?K
zRPJGHqRF0!!_9YLwS!#M7Su|9_}((jWf&ufB2>-`?&G9{Fa=?#zn%lN`W0j~9M<Y-
zX|Yq>QJOA~Gn4N1ZzOUI&g+q><)w#%$w5$Z&(;C@J5Y%?Sq{05^49p%@yyqhgR87_
z5DsNS^gXZAp;!IRJ1iPjtmel+mB);?ZG_?~C90ij+Z*gC2~xGa*mU0fuuORud#Ck;
z%+-?i&2Jh%n7pe^FDH3a_(zJ>?@1?<u@v)32FE%^eP5-|C;%~Sd!Jsd*L^n7?)WZI
z=h)!YxkT%$FZjLNv@p%9eS;{$bhE)B+U27b23n2S9Q;QxZb$PWWN<c>RhLKlge<n9
zKfn9?ezBwr+^nJJN&JRiP=)S5rm)_an9!dLi0Vrc-i?Q!%;A96zKkaks;8{C+m{gM
z!RF0@Cip1reQoO;T6AVVeQI^pUaa)^7gTBCj**6A6>288%3>z#W3-3zvM(mBGiK4g
zR5E3{Ygq&T&DWBrmU7^#GKrnD^FhHe3Pd~TaR=PZAhN(aG#WY`U7K5vu@>eUvM&kx
z3%%`7P6nhebY!lXU*&jBl137fSx#{@#%exjfisWD>&DF9R>#s0YQm8A4kn4HA?Nhu
zdPN64Eix?qr-B|<xC&#Z9@s+Sj!`nO601PbRl-Rp{lh*1#B9`(USJOQ<=i;RaaCHF
zPr|_vP?F$JZ*U`|YZ+E4Ymq{1d5qZ(1&I1`w?<a`$*=bXz_o^TmDW=fc5sbzp2P>y
zvsSrtsLPhRAG*b|DYfCLakS9$J`P38+wv1_{SLN%7~1g42C(4au$A{g6tJZspTk-3
zO+^2ki5J8dn&Hv$w%PPj7CEROJ(AnOR-$|nb?d(lvnQD^V?@&^(@g8k-mF_KVvIPs
zTEKS&GOF74)*R>yG8au|PyD^Lq_d=5mI7jdn1MNsurHypnwNs`<_8HMwgmCglh@UT
z@w*NW5t_-#E*Hu){--RmC17(LM@o|6Jc$`K^CY<-;!!>g=s%<Sr9SUh4mczBBz`9I
zWBTjh09g4zQ$nHurJDx@PkMv;cFKT1g9C_;?=P7wk)=ypTHkyw4;!NTg<1|3-``zR
zcTZO@v(TJkmP)#0Y1jON*s>y<zCiQ&Ue59oEHliDKao|1YX7;|D*o5z9!Zw$v7ZjP
z{;!0-Z#raut?4xGCt(+D;<hEvr6)<9)I#lo;C@~P-kQf#@{v#WhT@UOzqUP=#z}KE
zz+FB8qRKq5C`C>xq}E0~_T4-U$YdH2`<HEt26b5sO;L$L#%6@_FfpEVmk;y}k(B7)
z(B@Z%xa!2N;``Fk5th{IKO6+I<@NmzEzt<4Sy<r@eM+nd1x2bjdvP;+al?Gq2H|uM
zY`U8t+T_)0L@fbj0@sa<QWQ2$?A8n2z8i|*e)=d7j1xXgcgb~_WiQ4DAf8IDdEuO_
zg54D+i)ctmdysroH5w&3AGY36NFi?za}NebBawQFz7FD6x-^X&LZ${P2I9)BzZ@eS
zks$G0sF4qk807I330<Wgoj_^u4T3p;h`yM;!uc}q%O89~gmL^>L0+gD0eTlhSTrjk
zX+bCciBOJj+bozmKiyzNI590qrr1Mq$-?r^1LNEs7@Tt=s(8v;RD&P=9?D$BpaY*3
z<ediv&_4?}(^<Xow`RXyorssl!TG`flE=wDjFdxGE%96~RFc&s7cP&&<mV4x(;h*>
zw0Ui_*)pDT+kNX=T=Qpr#kvkbg;*zTI~xiniv$PIA`Ks6V~%-{#LU?A8fN7<UXm0g
z>etN{)(1*vN9|m1n^s;G))Mb`gS>>~n^zBn=}(iLsJwssJuIu=cya`(_E~vtS4b-+
zkYN|IevMzFm#;qjqJ_?OA)Gf4Z43CAi~$_TSI@1m(y|ORJ592SV(AOf-*91!+ABjZ
z_1^|q_OPPt2GA$kbY}y?=rDZTO9e#xiC6n-G5YrC?<S3e`3=c0@11qe8W9W6#2Fsg
zB|4>pu(2a_VDdfXiokS<by>@*j{H3It32h<!y1>NiRAKLkv;Qi06wE@a8>hRMLB@X
zUHGNHrMB{6Mnfsx^BJ9(Sh>J$CjEz<vX1Q=3P8R4+{Xdd$Y>X==+*Eq+t}k_8>e^9
z-=|IDk?l=yjPcuNmmcr;ED-VyPBCAy2}|y=oc{*UT7Ix-PbN1xd-o=X5QypiP$`$z
z&&d*4-9eL6pqL99&w!r~Wn|{3A~}Tp-XCaW#Mo_0`1IIl=s`5%0gwrtM}I=LzYU9M
zj}y8(nBj2|>CfxNKC_RAcoAFb8y8*RtksDC{B-@`5yxp0Y~2lu6>F@Yc|GCnJs<*q
zTQBta{z-52RJ1V1Bgvn_^3AScZ+c0FjNTLvw7fvg7*GdTeh8tZC3Cq~=`!@{bGI%K
z9bd!r!GBUtamSjm+l4M~LpqNqr=YF{uqaX*+AhPRb<zQr<l7_F`*H)3kTu6K2AR<V
zlr?=VlW5d0S2GfjV;sy(&9;|U)+?`CYIr%;eifeNwd5H__`4$L7250MR4XQj+e)H~
zuO5Omr_-%HI}Kh;(_KRoL0(UUXU&G^PUKdasw#d~^GGMpO6QIiz+I?p3-Ts6PjeU$
zW4amcwSh|gKx^D$(aFmAOZK=m&bbtgV7T%ygWeKWUGqU8Uxq+Z5e}*-*V7$K>)0x;
z0MgnW`lLeidM_`@x<8LX+!c|rCE=RlNXg7Gk*^#2@syZ+0dK|O<8NUsmmZh(6YQ2?
z=;+x+I;2cbQF?e-NTH#R7K)Bzip$$SSd@iZ@D`J2WS@`gre6vG@}_UgM?S?=GyUg0
zE7np}hj2%jko!USGTHmbf%{01bc{q($t8Xe<%|1Mz1P?y>5i~(x?~t6Gs+5Rxn50Q
zI)Rj{eGgHq1M^pq1iHZnd1w-W>_5?d9<upgq;IP~nQYq2Y@Ca?C&m|6MK~_stXrpf
zLGbW1GU#8Yiwz;}mgGqMpp0fqVa28odjkMfIMA!9MJsFQ$1aoJF07Mg^rgs2r~D5^
z=i$%R--h8XR8^JQtF67&-rA}agbHHsQ6Z69v1zF)qAC$;Rcr4pHl?*IEwLgcYVW<*
z@6G!sB<Fn2dCv2_@9WYWE*PeVzq+lLR%$}pKOJrk6W0f@zE2r2l7L)r;`2^j`uLpp
z73=aR=Y{@q8`j|c?l)@+i;y46$7$x2j)G$8>}}H76c~T&xvA8JY5o*)+H&AVf8+Vy
zGF*Su6MkbVP@k$aR|qHdc{C4ePaCwEb-HKGFB!>8J-sRhX^6VmPEJo@D_9FPi0AN#
ziI9sn5oxaz17`@UjxCx!TAbZKROZ`y>F(UK_)565A+nILRFMcQc%fEYf&&p!3dKcO
zjDArSOq#SIt}Nqhv_$;|S#udFy~rUgtjCraeY*cntfRvKuN@i(C|2cVoLhBQ4nBkA
zBMvq;PP-d>kL6QE<<ixxk0nEnq{@bqYu)(<jN>0As;4$3r-$Q($7<4~E;cutA7TM^
z2@0NQmn?ZH*)wPBi2%{x^a=HD$v{5ddaw6m?~>7VddFfI)*jGl?V53bep4DOznf7!
z#Nv?Wj|Qe?l>CoD!9H%%2;YKhDt!^r#84a4EDqR;#Xw!Go;~)|ZQGG~{MQ<kLnL0+
zX>(;D-rSMP=b+1U7}v{c&RH9X(zU{ajMe0NApicQp%%8@EB9;o{EcXrYJV?L7aVLL
zSj%4Pb-ACDk@6*a-tEr^sVetcWwBblFN0)~sBG*ZF{~;vg<6%&ND;I6@Gzu*-1Av(
zpb&DUK|f+?QnqwqOzZq*4>b)XA68;zs4_*k@SRlhpk@9a*zXIsh#MoTR3<t8w%@5T
zj=F-~f0;n!6BPyzX@Iu&5lZo%Txhhl*a$oHQ0^fVXZM^)_>iT<qWBgOGR}ffZQ`|l
z&iYOpe?EuV$g*X#1?vD$;7k~A*E(LIbRIeJHL`9t)Mc)wcPmJEwAaW0&sTEh8lB?0
znWO8GotMA6+_pHF8)Dh7>DT#z1goC0<o|b<%J4(a*qO`fVgf=chHvfI_Vnr`qU!TI
zHcQP6V!ODpibv0~pVoEFz<KhPqWcHA&9?Cq33&cDNQL_ZjHY7V@yg79;xz+58UlcW
z<;<cl{<}6jEZ}>;@Kwv<uW=vxqm~>Oln6ZsX*2y80gpF_ZnBh0q)d4n*^{JGZ%Uv`
z_;2eh`HEid7h;XrnXz?z*IpSPv~2e+G>%a*9z{Yb@#1`OX2k87=8tiT?d1K3fs5w1
z2DxrQXk+~rD^4?vvDhr+GhYF5$38HRJA;zKerfY)=p0QTlOV|mCeZ?ww$hQXefo^A
z@e7rmsd_$r9TUh23d19>CK(MWB##WkxysxKkuS?(h5Ak`m_Q2NM(d3*aaKLz#JztU
zep(^F{e4=*le}xun3sGh*YK&cShYY{B=6<;heMY~;8M~(X(JS6dS6W}Kfct!6Bc>i
zR3y-mJv(M$<4kGb()-5lYRNMyEDm?%l^5F!tR_Y{vvx^<6GnQS)Ia~QEQGBiO(D&M
zqHb}qc!3fIfZgcyBB%4$ffB7A1K?StJ>~g4c*jtF=WYQIX>L#^8N)4H>Xs2K2V+XQ
z1G6Qs%;NMsU)U{u{AAfZbL4fNmWrs<+>~Sf{_5Jx4=q7zwVr?B33D6HN;*Ld6aF@r
z?M-^r>8P3xF@z>!w<EJuXwa08m+zfp{mg&YNX#>YqUIJ6sen~&1jid+^pLXb>SD5A
z?n5hKJ9*CWcm<v8BKAcNk*&%M91v`YU3FL%>Yq}jOdgOU<dv-q7{u~w?980<W2qHl
zsuNmTXhKIqoNb;ADPAnBR_GM@!)wvD?D{qHV=3R76`sr;MEjUSe-UnRG(EK41J`&F
z`|(S9pqB@m$34@f`~o9dSx#x8ZM;J&N+k%0Cogvy9}}H^H9>8+gwW8fB3dYYXAL%m
zN8<bQM!E=-$u2quy$z(*G~*%mx$RdP3^%Md;9=E+P9uT$vXZ9C!ZGRjLGoBXqC>Q~
zc;?b^t#=%PfG-Hni{YvdZ#jN)k;PMuP-E-SbY^&YZg-51hh_a0C#vMqQ(M)kMfGsJ
za@R@K0wn%mIEkfC;>u+SNrfJTNq#%4G0**RReZR9sj!+&rd5BqiZ}=@2BhOaW-z<c
zqc2Y)B+$O0UvNIDG1AkKdE7e4EbwhEtZh{*y(n3vB5%Z3QC<bVRh<)7)9gYMT|-bm
z%^Zeef@}tNAjYEBi+*&s$ORFZtm`6e`@(iWK3Ymzf_rMOLK?!FpJs56E0UPjL0J;3
z>4|&DcYT43bg542-~byrKWnSHK!G)*+lo4?H$+MkYG~01GPl3nt=~rNb)v)X+EKeG
zw96=<q)&fg|KyneNMo>b?=jrdR{wk-@S4Y^m18!J$RBb%c!Lfpt1ayU%uo9yFd>-0
zo+}ixpqmQq8srkAm?Iz~CpbcuymmZd%S6eC&xQnhO+(hNj=v{vJP+Bnc>0zfGCk<R
zlGbru+kKfXk|449*^`GQNIvPtk}iATz)I9DGe@tR$%Z}CJEt_yHv=sx0)3VHx%(P!
z9}(4(#<E_vpU+4BxyMeqF&6R<0d_h5KJlno!JBt*w`DcN`%8p98)cBnt1rm(%68hB
zK0?Z~#<D0sz2nZ=Pp^q@(vSPzet+BE^U8%2p7J4u1m$=%RRc}vxy9haOUL@;EBOJW
zSxFWRd2qkB^wT}my#1D_7C+^0LEp0>R`ASf7>&{+WRcnP-GC{MhwVXv>k1#zynZX!
zOt&24Xg_%c{6v0T(&&UP->5MVqYZJn9-H^pZqoVztT&S_#qb#v*M~DF+!VVlr2*|D
zm+T*PD1LGvaNLl80frac_$0aJ^a$(V_IoNRmd&5v*p`U+T`Xv9Oc>LNpLZNx(=blX
z5oY}Z4@$;(OZ;~&8y6P?2H8IG(xH0_C*2dN8SoILeOXYCbaidb{JaMVP1EPh|2iJe
z!};Z_XXv!g?GYPexd5Ivl|LJ^Gn4hbbVp+1`c?Afl|S04Rb<mWlGL6UrH+3|Q_uAz
zasvgegte4pqf=t_q{M@WF(QkgPd;;FE8ApH5&bwuOTzIqHEL+t$j}EKTdu4ESfdAN
zVc=2NfIKYZ@Kr{++~;0k04RH}wtp?y6a6(A+~1D5#gh~!aQ@#loOr`#pwh07x#S~Z
zw!4YuL0(UnPWYKunl^9ouTVZ-bvRd&oiPTG&XLi{^Y@tB3$d)pu-umo@3@p*o$Se$
z$d*efOt%>eF*Qn4M5D#q`S%3%o<2%$3;kOg@6hM5)!1-RCMXJKNJjs`S-I#vXIPo(
zlraX1Y%GEa@A;CpTC(>1+i?l?z<1i91MS>L73bUpWpDE;WQClL?tYoMAt`rW1ZbX=
z;9cNz*rVxS*|Fpqngv*7LL^!70LIrG$c7ff^-h0y#4P^9pr(^={pSkZe3+n?-kCSA
zb*@$IeGkMqa*T*pO1gc9H=M~h9-0c20lJhV^FMxx+1`vC=qwmT(yYGUT}bXIptY&}
zQ}2;~vGYP?%|^iq9!i(#)i9ag@2xD230^GI7$A=}XePZ87M-n#)u*!$R}y<8m`*xk
z$e5$6$CB??{o1*&Sd}{@ZR+7Zg}y!Bhe0}1uI+$|--B0j_zM~G1?&Cq+WnJ=&UOKN
z`W-I151HW)i);6l$&8hDzZgZ+TtZR1{vBKd?Jk{J!EL*3L86-Y)Txk!5$}^g=F03>
z$dPz!xAHMHWW}oN>(X?X`VOSD|96qJ|AZI(xa;KK9W3r4{Rq=Ky-wrP#bx2=R-TT0
z2hEE5g&x#8DkESYAl4U@tr48G)-9RpLHBS_s0AUF^5AAcb;8e!ad?|sMo1tvK+yI6
zr}a2mknQ%tFOlIdc2jOHE3SFhB!lNWQE}z*b9>R4=$e|63r8mB-j5=&@w^3wv60#2
z5&>HU@K37Hcnsn4w6laLn=_7p=m~zcE%w6tip!QGy;KAZDqdz8kkP+qL&eThZjveV
z0W1Y7yKN6Ap5*NT{=4QA?9ctJU=v=iG~6?8F?SKOvr2MZ&KTVE=t{Bp8y)*##5Te}
z+$`g$!=E*^JWyhA(q_;M7zG{)l<oUnbg)oQQNI4)wV=jP%u(0oyf&>ATBu&UXlXYU
zC>__9o{xV&@_g=8VW8GIwiUWPvTml~q-vkJdAD_;@sQ^LN?Q6jF*&ryZG`H}KIziS
zib{kEJJ!O)Q@`nyy5zN-{-B=F<^ueyaTD(@d`t3d?iZ}huYcq83@@7N^X0Sxyjv@K
zB>f4e^Xp*D^dmWrolDLC^`qSr9sB&z1PIMsW#$ikX;;>L1kOOPM9LW;zB#v_ZsD10
z9>t#f`r}w<DhEwGVu+QYqMJuwPIQVN*es%uzglz@hp3pea;kN=B;Pt+jV+wcj8nhe
zlB=Nu`eSX3bWGbHgJtZdD5I8iVF*&kE3Rg({#7O}e{cD=b@*~x_X7Ej9kpbQ5@s})
zLX>%%5r!6L9l12~DK`o8(yO45kjsy8BgQ%}a`S?tnud;h(LvhWB3)831==~Dx*Gy;
zv_-J8ueYDyFg|g26>k+kyDW`Lun<JJ&>0z}SEn)^3`t54H<#5ar={o6m*a6D)8coY
z0jTdu!eqV?-*uo1I<8;el*`zB_;r9i)bz`c=@;-|(crUmlk}(h1L*-ZA>Ye^slXYX
z)xiAGq%sBV347G=nPn=cwai1`W7RYPQ9**q+aZ_(z6q|i&1I|j*Nl!O(#SpVpF*yN
zm;eO2<C_jFOm<!U=_*7@IlQrRzJQrsw`JnaqZIN8*(5R6;5$uoiX-T=7GdA@`vt1h
zjK8sk2lknGDfa)(%JV=8WhM#s_mYsNp3mk7-hKi;%7?=1=UL}Q*v++``Imi6_tojx
z?=BnJ7;8Wks6-Ra=Y#TS7<&wOt`)uSk4qF!kgFaRab-5lH)&W$_3(X+Z5E;UDJYeA
z&1J-LLk5vxj1I4WzW72YDWk2gLPA_wQKDuuCj-hgiwVOc_DTE#W#?`Z2T}<t+QGrD
z9)0Cg%d)5vg;d>dNF&{7XJ8^rfvM}-yM?zUzrMC;mfPNSR2;D4_iVF=PT{)w-8H0~
zR|6J9Ttlm#qT<WNt@~|v^#;E5$c~z+X3SNmc!+k!>TAHXC6ddgPRX8zbpRtq2kU!i
z+sY6z$!zj*xMWHH_r)s_{i$BVp>I4%A9pXx=i3lc^McR`#;L@8BDXs2szb}m{3ece
zRj0OQp>~PxqzA8EROGO7NhLXnFI6foRKujO9hB`fvZz>k`AsZa3XpG3BIm_@;cKC6
z^)TbGK>=8plfdt|&9sg!7~lB4_~rz2MSREGygfn*59lmN6bo6V%t?-)c>DnuSDraO
zW?WRrqGODzNzP%i{ul7hOWKpl;?UZuOF0L2znipySXf!-uCIGO_C2U9!0ujk1+Mp{
zgm>FTW<x4=Wu4{LYL@bSWz;ZU$Hebss0dr^nsv%oncKhH7I+x^E;!=y^^|15V0OT8
z%<6N^E67#-6*;yzs{XD2sf+i+i>u`XL$%t{+WD~<{B?&VU8*)W#Ks5Pb(%*yJ+3Xz
zkzS3{+c68WMiW>@0z)MooG9+_Qu6ka%R5X<B}wK_I3PJT@W($40=St5&vVDLH68V1
ze?6Hh-%PV~i*>oI6<1+;(^fQa!5oD>HFrU__+o(xK@{$u((gZ@+gu0~6Zrdgf0ZHs
zY^Pba+%ON9<XU+fL|nzXmhf`>icb!UPMv-a*9~t`GDj(}+TIr?TiOTS8md*53TT}6
zMV-w~rvFub_^Gy?M)HwPuzFJ{^Ns1FTB=#{MTvP{iv?Jw&|obXAc)rn!;Oo-TG4l}
zM`A1$5V{F7zG6WC*eCX^s<ZcByQ$ww4dBfAI-e;9r8@3xy~erQ!^PZr_D@sa+=Abl
z5m}6Rt3Ya}&?PR}zon55`p*3^xw4QW<W+&>l)shO&`p`Ni#H3;zI}@FAw4quXd4)c
z%i_#Pi>)wb-q;EA?fXuMq(pvZz;0p=1W06?QB}=du_O`iw=&*6ESjwJl9_27#2Yky
zpB3eAvuwV}SbxiaiB7T=34ZU1xBK_;x9)aNFrgvW@(pLWg!}cb>18_m&NObTw*6m}
z%Qq;^dYH7W1qOP)8{qoB`26`3wl6vRXF+q?#qCE3Nv2BP^tpe}Et(+enn$f~L~)v)
z6;E7P=__$J5<dO0P<r4PfO+F`Gn(hACwJKZ(n&>VKifQxs8VRI-K6lG!K~D9Yx(qS
z-hb;(AFYsm&5?Rj%Egm{opHU@LyIA2_QY4e8}Ne52){$q5Z1$2%=*TOOY)nR>8mnw
zoP06G&1dgz9=9!nr>!@-OG;$x4!I0&T|a}TMfWZUS9-Adh4U75D}&G%a>R?PMs0?a
zq!D3#>a2e|zVSYp35JVy2s;!+$?Pkn$!AJO_rURX)w>tr-7^7k#(prqFSk!(e@@Bd
zZxV@0O=R#ISjsf;(pFb^)H-lClV3iombwm~lVEULL*%^W9(hHcX5&mqPCu&bLw}8*
z9I21A0kk^2Kh!Z*1LgD{cXyi=2=Cf$-XaL|7qkZKBO#9f`l3|25UVt6xbf!ZJM@Yq
zYELt7>xo}yy`5UM-O9YPiW#czjUI>`vq#@S{h(Gs>=^u$OnERN+7NCOnschcB>Jb=
zhX|11H#ft|mfp0$4&EN(EgMl6g)3!tyAK$uW%jt_F<5jTRh=zE<&ww<^d=3lG@2r%
z{mdqE85`u48D-k+O?hv$^8Lw>)?ZMqU*42O)!I#Q9|)iK`9DrkEgQ3{Xke1~!qj6@
z56BQ%kMDGdZD7HYfK_Fow~|w6AAIgn=m*$WKuRs#3WzBh=C|Ujd5)$oODV`;l>vGH
zKao;8l4-3}e4b4dT<4}vkdK>0OY6}UD|#DFvoCFZZq5ULLMD77{3@uQ1$n!erqv)t
z^g$ms(=E*;7o+E9Qg`oKK5xVs|5E`oZ>u?sqJOAmahIKS-ph+O9GVGuw>j@Vf*8XH
z1Dy2uTtiZcIKPs?VI8v&8IE|K7r>vJAKB^!F3XnbUDtF!ZYN?L`}up_JXZN$f9|pJ
zsw64~$97LC{U(aO;z*v)3+q_s?@@HAf01MSsoB-e6UtGqYtlITRISfCsW;?oA%(hr
za`ZhHV7r%U%^7fRoSc1t?D&hX=8_D>@wN|<VXExL+L~JvLzIxkDdu9zR6Y8C7r7)F
z;6hip9<#T$@qJ<1ed@+(Cau;310esJ`B2Cb+Q(5~rbqfTS3A3dEmMrWG=$G=hN|S5
zD%gYU{zP>={lx?|H@!oPZ9iXV%`t9!f~+qwV1u=6<DSC*#_6-kDCt@88Dp1`pvEf=
z5e(w)g1@@Ul%3vwBD-mV*uPZPABZjUReCa>w!i+ZWdTxWbb#{k4319i_pZ0PHuAo}
z8cKQ!txUkk2P^9fd03&V9W1j<D{L(GASNoo#S$O)_FieVYnSW+Z9cCRpYI<J`K%00
z-AZFgzG@8neY4P@N#bp)(HBv)$Oem_`)YjYNmgpTE8j?-uyr*WULdSRx^RrCMWAzs
zM#t)w@3j3mTNbX6ezN*CR&7+id?Lb^ax-R-kpugNtL33h@}|Q`B3HVp6q+wY;e0Mb
z534X}dz0g7w58X3YHz;H;fN&Op{!{nXc7LLC0+Z}C!xp0n5=Cc%<}*M=ABzT7&l-c
zi@nUfmHlDrX;7C|*!_dXGG2O%It15sJVkP^Zv*yDMr~k_%L<=q;bYs=ze<;TKkXSK
zReB7QqVmFF@DOsw5{<YwJ<Bzo|IjF*;}|nsgsoLYhlo-Uo)&nb4AV-zgZf8U9xF#x
zh>wj+>6IKYAs_g>>o+Z@hWD<gB{G)Fn2yto4yecW+{-9*&U6J&oU&xz8;J+ds+eu3
zpGWvu{7OpEJQ!Rt4~6sxGGW44lxnm-CM!KnL&Xgp87HjpGCy%)phmRq8;&##Hv@SN
zZS`(pXIdy`>+sUkT3S4=;JbBw3RBg9EKa!O3ocpBmg<OJ4s8=yy^co0#s@j0r_f?E
zGAt{KOLh&TLhP8@vv{RR4C+r(&<0J#bhHaMcZs$#X`j9H7&<xbZjyz#rC;{=6??Z-
z?;rLQJMc8B#S+5=8B`Q6c#TbWOnL6eXKaU5n1v9UAXaj^k2KqCOOLx%+n*nYSc@>o
zLY!afKUC!RdRvLgVb*lgBA>^;ko!3TD`DdG;9Bp+^HmZjOTFIR=g~fBXkg15PtdWH
zmA7)KMAXb%FlR8|diD3}icoafB#TDn)SD9Q>4PSH5R1A)3P~=Sci8{tW-Hc@Z2Yy_
zeJ?%1R2p%ad(<y^12bDx8^^wwzrP(nC1M73eF$JfY=h?qhQt=7CkuAfY=qk<ty)pl
zuWr-3M2lxnJb)Cda=RH>f&M1!P}sdjboa*f*D0rJfBFl=uP63tjTvkGFzXMvel^)T
zJd%NMlVi;bB4sZp+r8GyaMg$K0S(Ad!8i?!+D1_-HzfOG_|<8J7$`uu*0fDoPp@U5
zQ~<RnrCDn-Rjp5aYwSrmCd%UhKV1Zr%A=JvsPE|3Z<@#906P87T#KcEZVq@FE}^Ji
zmlr=eB~h@o55FBDR`8+O$+v2*-Est%=ggDepx(Vy{4LJ?E4$_x>7aA^m5yoAx-OsX
z(Qz3_@8cgG4Fz7)^~nUqj(1)nS$Rg?{)RCCUdiL&4CwB|)87HZ!GQC{^M^MtBQ9SY
zgz+Ej1f^Wj@2B-$2_F8=%YLwX?=u%9KpMCq6b=mqbK+eM{JW?2YWZr)V=~pRTZ+)a
zKQjLuv~GG&WB~3o-*7p4Qt=a0_=M;ZyBTOFZ6t>A^g2;+PZDvn#kw=ERQQA&Y{axF
zrRqjnXa3854(Hk#e01EaRjX3&a7FcO+WWm-EHqX>tLaB|5Dk&Tk8<XMEeqoy+2&Zo
zi`c7pqezmTaZgBtf79igfbGa?DFm85A+$u;RAQgQm85fA1uDo#!YvZQ9~A8!wSP^g
zQ|q-ju*@zs5XG6;a%sw_Gw-BH5bqgXuT_Di!N2DjvOR#$)9~26UXiEc90yLKe}__2
zz+?JssnSU_{|8y-xR6+HJ67{Ai>_Qh`jI0~bZouB|6>+^YZGI9GYEB0ad7s=I#Q&k
z2=n7bRKpLf)cG>p5KOKZj-TML67RQZ*eNQc@6c5|b9Q`kzm?*PeeBA}ArFkC+TKub
zwctot4sRNKKF~*M2U_L^bbhi&-d1mvrZ{JX#&*`+TQb#dy{)=fRckp<X-)N;m5HGt
z>i+pG`Ilxq3TllTSk)_lr5})*<5bkxOV#Wtd%v8rD_&??em5ixjwcFDQH!enDpk$s
z@56kT_ww=j&QPm%=Xuz!>?`R$Nf8;x(noZmLp3z2i$RatcD(wC^G^3}(F8u|0|Vb?
zboCLaYZ%|o^K{X?c695zc|)EL+W)Xo`9W-jK-cknPguuDa<)z>azLGJf=zdI+Ma}a
zdBEkk!N1jMEBW5`%hxHpsP!|q?631F4Ds<b-%dhv^e;z_OC*uM!bJMR^%J}Na@R5s
zO}?95?%U^5>kWksQo+xSqT3>bfytX^%SQM}!xY1S{xU9g8$S9Ny(i}894zSxT2sHZ
zQApP}$=iYUdF>v1Xk)*7b2U09bC21$%g64ME|#f*{F-^Wwfz#6R#MKxfeM}xq5mZ9
z<+!2_Iol3(W7~72*1kp-d-=>Q?76n5J?h4G@zkhnlMaW3y0O5*JSV7|zueZT=jvzg
zeN-G=e4$0w|F&|fr?hdyCnQSgr;BJkoM}HxLfC!8zjDJ^TxD>=#u)KRAr(%^H7I11
z@+-NyPTxm0oCNhNzRO=m*(b^+nit<4aw?7WMD5)BynBy>u-MrHO3@Hj+%tRTafcnE
zAOkD;x;7IRhXiS;wXn*?eS;Gd1&W4(V~l{pf)a|jez}IPY+XhLJloZs%dDNY1s-h;
zweF;sDqHH|X(>A^OXO$aK#~M-qBLSZw6R1#*P~=37MvxM<fG7bw4X7X@9wEM930&C
zDE2|TXfIpxw!njUNUfM*Iq;dgDlFqzc4TtbEc<WHmh{_Eu(kceWj0*nQi5&eYi*fe
zA4;L3K!qzy!-7uvthUGTY_bOvMv|GP-U#FI>qY&@U~g;EkIPz#ksY97nT&1$-y=x<
zl(kZcVHwx$(rC^6P?_VUdEj%X0=NEF?{;iZI`N1I|lbAu@tTKFvdoqA}elVKTg
z^JGf8h=tk_aJ0W<{&#zT;cB^zhj}?lr}(DvDDh=m!xf>_(k;7*%<UhR1QBULOqQ&Q
zIc9a<>jzCpKJD{{6h$JKdAc}a{rpSFs%<AQDJT~g#10Qp>bPBmKP?R;9`-L<y$E=(
zTsm=^B{-m)-;_oFU>L{UoB(M+lnRt?$?-E0M$A|@-r6sni1?A|S$AK3wOo~iJ?4=n
z!3!YTsSiNI?EC_i^%DW@b`y23Ds_TDRDD~+E06v`bd7h}*VKh?btNL?tyAM3K^q%a
zm$+ElkzF|vn&Q)?hq3%7)SuEqYn=^U`icj2)~z}}TDu<=GWL|DFwrM$U2tE|R!H?d
zF{nqXI|E-Q%c&FD5aK{SICp%yCSq9md`-u78><QecM8h?eGN}z@ksf!*>N&EVx%J1
zGgR=BfJ_KAdC$#e(j5OAt#jF4W>(C>Rcn}6F2;;`B$gz7kV@>pLX<C}#U*~5D(9VP
znu>P*$l?VJ62{RltP;#?m*MSx=A3bc<yp6r%C)BnZg%kgbpGhaY~VFl*d}esJ=Ea1
zR>|S@OX>N*YR~Uo6<vw{6p6BUf^2N2^Jr|&`g#N$%2&mi(>y#N_2DIZP~}sD{d$XM
z69$HP^s(OX4kZ2^xL@eVy*b@!6rTOmpjbzIp;Z1pYsPTs?L{%?TF=XmZ~S?UxrK=$
zo=~pBzH()cgp|BW3EGOTP0T1CUtXq}4s;Y^b-NxGTVfI%`$!&A+8bA5%`MumyOosQ
zx8DO4P)h9oRAB!_%f4Nb)=JO}!dP9d@nQJE1?g68ij>EJP!iVqNMs#%ssIOjT_8V*
zk2k7Kr$R?i9iJ>LW&I59xwK~pzxEKvCY9*(17dl;ttLw0hY&rv{83UM%{An2djmRZ
zQ@4|%i!3w2fVauH0bdC-P1d`SAH50585Rt;99!#JJ-`#^Zs;Ky#D;t3U_c4Er(S-e
z$M|bAn6GKBu_wbaP)@XSc#Uuh#Qr6+_RMHiUgr_<r!z-mBCz6SO9=(;)EZJC+-aGp
zOlpdGiF$!~M8+yA4#f>Fzw2p6-$<y3#d#?)PlyQfUPz5re!O>@S%_rtG|T-);UqOF
z)Lr)nKS$Fzpb{}pICq)L$^}?eCxc{rvbj18{6ATLhoA<Oi?&*_$@Fkhd7}Ovuh7QO
z8Rr5@pW_ZJi<hS-)r)RDkr5Nuo*<OIp_gPzj@_k5h&gTnlXVl9SBo|uLS0&F0bs0p
z;((mjIigIpR@L5?QCmzFpTBp~NBm*H%LMQ^SIYNv1vq9g^3~X?bCJs-BfIqTBBnqo
zIgiE!mys#?^>fC%6?3p>fj;^lwFi)C+PAzwl8LJcV0XI<E+@)A!sXl0{C~*SBxR(f
zb~s99GjQ5vd=2N5>9%s~UxPDpQCK16p;+u)(6d(AlYUo@`1}+#^$elr0(`G)XS<|E
z{th@liLpMvm&seU25Z;XCoDgIIAj*N=;wg6LchQ$L|FmKG53XpC@Z!_J!|~|*czu0
zyoSUf7NP~F5F?DpS>LELJMPy~sMTkavPu9nq$X~s>$7qMi}5N>__e<t2x6QW;-S`=
z?#WV?O-(U%<`&ojDV)CoSPz%tb$}i3?d_pIaI~Tk{6ASuU8r27o2TTHE7H8<x%kYs
zW)$5wP#Y`KW5yh%6`DgLgmJ0v+C15_X4F9!ufPRYZFqLUCE&HB^>_?E*l7GvHZfzD
zE6D#0wFX#@-$2N0@})FCO}jK7HCk)446Y4=pL|q+H7P3^w}knSKSl%RpR`h>d6FLS
zxlUPHkGK`n{aV^v_z|}f?qVX-ug3Y;LMTHCc1Pf1G07d{DLviFMiE52<L|1eZo^^Z
zhFW3V#5xUIijqiF@JF4m+tbX@&HEV3uOZ_E5d$o&%H?e+Th_gP*S~`=c8tP(z+6$A
z?XC-!so}4KzTa<~VE;~W@=IQuJT<d<(lD^JTa;n`NKC5TkHEZ%JXdsm-xt&egdF!b
zUz7h;y&|qamZfpkSZ)8tikb1)(8AcaL&e|Ux1)Vjl`>|TB9svkfA?Dzsr~qU*U<dg
zEbV02YeG16*q+?iz6azMvOLRn(t{Qpbpqv#H1wrg_-*y3PgMw#s2D*qwD`7TB!7qt
zfht^v=iV*D7Y^H>=nR7ebmwDk561I_NZ<&owSMf_d2y6f_i4`%Rr_=8TJnnTrkl^B
zeBO37s{V!b3uWbhy~zFGFBn%=_kHnKS<R1v2>s0*)DHc!fcgsM`}N-w-cpP+KWpgG
zO$hnSMp4F^uW7Y6fAT;nrH{-6YOP1fr1f+~4s)h(p-geRNkW7#b(>boWeMXd^xyk?
zi~o%NRoYpmJ_BzFuy5?+q~q6wf6D-&ed3Lld`7CSZV5)%>kdjc-NN>6*!jvoYk$J)
zqTLs3J^xr?g65{;eOH8bpZi}IWr6K$RX=Ipa?tQPX!pHMMb>#FPG78sgP5d$8WGjc
z4kZQ6*XGg6p<8)Q3~e(`0h?)dm24md=JGoC3mdP`o3)OC;iUt*OP4wdZ^+z8P2^Bc
zGh|vSwQTy(^|y~L=D%zHa;{dMVMFM4$Frdqw=>i8sD55($bX8*PM^<SLd2qc{LXkH
z=!L!~H>1onD~Un_h>>9x?>PbkUN{M<HPy3C3>`WzPxji|eDtAfo{hUiUlq~ibCtp{
zc%pGgi}9zE3)n`P<$}k`uk`O=K|qz=rnJ`C9|Q9XT=(|SJRrzs{85AM2yvlCe>B`#
z$+R;3o;P6b2VSrA)=KO{JxzJ7OCMl_dB0q@wf3iXe)?YULCXS^0ZGJe5k;;{uhFPd
zzaP5^%YUF3pttuLjyB7q*W0|A=dT?y7%uPaWe_xf=T9QY6Bd8ka(*cz<^Bs+Aw-e}
z$tF|SX3WiPUwzLR2{q`EUPC1zES&A8w3AEes(2;xe~SGjuBSD3O#%ucgIymd*{zeC
zl=H?d6w#Z1^nF3^4113|`wMl)uo&qqD$tyFTM@a=N0Z3m>J!MHRD7uH6*n8=lnJWq
zkv9K_c{u_=dvzhmGRx+wMHVsDjH0f}m(Xim9mi?!=7oiL@92XhtPpE5qaoxJKEwzj
zqlXc=^;Pe|v%uR6B*qchMNwR&$;Y%8g*>^3k~q(JPc#MRd<2CCUq!IX+ZTm!e7mLL
z#Hm1IGoOo1{srgSvGm;BNsecq<~ygYdQGa=XNv;MU<iL>mcqv`Up|qJfAvqaICl2B
zJ${N~jFf8#pNN{`LrtA3`+>rA@<XEWo9$#SnaUZ+Z@967x^5=`d9M(Vi4BbP50}Zy
za$Q+v9eMN(FEo=f73zC5Bui?FmL&_*jX+DBUa;TD0YfF`;@vCvf4T#j(D+{7FObC%
zrG8_a$69i39g%bv-o#g#l;G}fV-og!{-|elSA*21ukEBKSgsoT<8#8KYN1OhWArrC
z@+xRZs90}Aw6Q_*l5MrjU&`65Z)S3Dh<#z5opmiTy(#=S8ST<Ije3&(QoJs}&PII9
zIQg%*)EmFKi$R5{_ztBn;0?$oHE$TdmV3?7X2xIy58iNCIr90}l4f6#6$UtOK62AZ
z9rt&xoG;0zE=s=`+0vK4yo$kYG%5St3e&~Mg?c4u?V5g!)>fD-p*rgCFEdUqp2?0n
zdU!B+mH6Ma%8s?=IeVrP_!-B}m+bDtcAEVCZ~<HU4>4pq0SDd(4bIgG{uSKbP`DzR
zLWj93y{bKkSRuFn#|F@T#qy+$)`wXa@E!YZvIvtDdjX%dM_$ObbIf<#TmSv9UGN)U
zdp1~Hmnf94?7eXoVYD9n)MTZ1{2;3bKn~;!zL#3l<~2R`JX_ZCfv!wNfpza*@_%}2
zuaKUgTHSEYC<v@HzVrdjZZg`o>mJIu3f}^qkee}6vU8ATb57Js(*2n)Tn#-0YYXLc
z*QZxQGRt8&88sgoZ`^lBzkxxL;cu)yn{0ZDlbN(<&!OwmB3brRgxAD*KK?8+_MS*K
zc`yLv6N(Do`FvV8e~sETA}S8Zu5uE4S%#(cFfStvO2&pqxG5j=7{?SN5ZjhYwlWF=
zBrzuLY_Yr&P$JY>^vT{P7qy=3nF|GMC`?AKf27tEV{*e5Aa;0j3R^Z}$t}7OA|f>w
z;4GdVQNa*e6KN47tB{h6sl&I=s8yr$A?b0RLQV9AF0L)syBF0!wne*Dc6-clNyecB
zNpCRHQw&sgWY;)o;K-2xK}gRpI1d=MNXfj^cC0PpMaAI-*}>xmIEFRl?cn2Xt-R?w
zfPe1FE+%-hNo%VipN>Wn3;0aBAtW;6i^^OMwhoTILKmJxC72PCZ4Twi6ac)!AYe+p
zZy(bn#w4~U3ltl2{@w~BkteVfvkW}N3}BHI>Xec2Eo5Aq&?4s1+q+5<m>R~`*cCl{
z%wJM&hJ_UVkqdIdNpn0)U3l?Irn@JOy)$3=<A;)&*}cqhh-i!K9KP2{Ph6Y9xWI^I
zGm(sdN_Exlw`JU7s$Yz$wFq>?m6Xn8hx&}mg4g-iP>|W(*)D}3rsbSBab4H|V&-%Z
zN51!iLS9Jq4u|)4S_%I#zq5u9^IRTG^ynM1;T3q_%aHr-fUs-2DC6h=NdO0{&`xb)
zyw4_8X?SR<@Gh-b0%*de%z-@*VUy3$9e^zqX`A@d<$Gqcg|c^b8oU#p+zd!>7l|c9
ziv)6J@@;@I9eA-^y=|eNez^}k|81tj<j?`(L<z0nc*7pONMS9uzq30NP!n%?OeuHX
zZ-ZFSYZX|s)yK(oLG?o0_&QiBGc3)wqb>*ErL=xpdm07^4nzL%waoRBr~?n{-8*~-
zQGI|S?;8`Dqj|5$qL)E#g`{M^)a8?q*^HF;_@mlqXvQe_-3)+a<7joPdzC+gP(HP^
zTNhh;ES`KYPGG9%Sp1S!S9&AYc#?lGD6dM&^1)*FZGvd|@ujpKm1wZ2;c;oWdZFT6
z6-@O*O^re=aMKNBT9hI+?CpPxjTSm?>%gAVka`{*P}$~$F3aKt5=A{kqR`Nhu?$Nc
z-L<&C&IK(2zgF4fHks~Z3DG^~EJ>P{<nOh~f6nU%+ngt{>kFDOfj^dcISlk=jw$6M
zvv_qEPu?vL)4j8QdH3jV2-`t>#_-E{wB@V$|E|qRmM^bG3_kw{-=9BH_sg#SQl7gn
zMAn$gtI+&f_$+MmZ-+zBY&-g3AabC!zbq;Z&ME^o$X4QaVlgL_X_t|tEd&-^^WbUI
zzVbgl3RBkD9Y@zOa-DYb9T{L~xBovR(v~7w%h4?)swi7F=qCAeHJa(iySO3$>gYck
z(Vcn<P><LvOQ`Hal1u>kbrI#KITu@X{2F?lT=Z~Ch$y&u^4P3lGYa~4nF`uWqpIi-
zMWSTZ?~st~raJfzqEOg>vqI)E8lz0xY8ct%-6ghV5I>#=bVfO1?@c#!t%TnU9Xb`w
zeK`-M4)}#Rpv_E0<08v42Cd{-mT3fjP5&c&Y7Y3d5$pXYkGu1;>(?`v>%i?8XNLkh
z*Jtenixshbbiaig_XkPn2TWN-Piy3+e^4rlwu&mp=YGJM3srcGDNs-Vaj@u-HX^a(
zfA-2$*6Z#dj+z&a`QV-QRl2SE+SKJ9c^?K9|C*a<{%?NNln}DacIKfuLPl(4p7|G@
zFMl!iCPbkRaltoSJrq!kE8@}|(u5$XDjP8ln)cUKWb`;Y6^`-IQyZ~_A1zTluOHdq
zzBBm*YTWByLTLFdb&`;)bS59Wl~o8lr<?~OZb!UC#*LzJhc|<zE@YIN%ka>OU)D%_
z+R&Mlqi)RXTr>~|U!5=zN}XK}(LG;!`1K;~YdBq5#N6dhI5I9aoTVay;&MOZz^(a?
zl2QJc(&`}9YQJ0D2h9cFr>L)0UEdXacwr=(M?~`AC<~2w*0R9NJO^gI_RodQlH&%g
zw#FwH1?@hu%bzava(Wpx<YvOtW%at;-+PFB3ir?S)I%AsUcHt9b-(<#@e`zJDULOV
z?{=#rn-ryzb95lO**+V@&>snPlX6Cm#l~#Po*uL1f33`%sbz~X)Le!|*h-dVGNb=#
zzv2!cPv(8>*`hbs+w%XerO@sHb?)+HSjx^l0lo04Ir1D@N?D8go9`ndFu!U(*a*L<
z6;@JB_wVJ98gY`cACw@GWV6E<QnisVzxcxXeX{I3RTN@zSwZ;l+!_NrBs;W$bw$7b
z*5;WAQI^J&`@By`JV6-ny;q=H@VzK_#HAZd3r|SXM;ZKC_rpwxNV+u*1*7!kJs@^>
z4;^X6{O^{uq#oc9=()F8RNTY!98meKyf^0CLBZ&v44_#V=i(Tj`QTWiEu1s?FQk>T
zNq)-WVV2@S#gqFX$Dxf1z{%2Sw0b4T`wcF?<PSlTBmwKMY3Z^chpqUPgdU)+`w(S}
zf9LQ{B{X1&zJ=+ct#<NMYlJFqs{S{Kz*I!_$Rdy6VlGZWSRIg7n16A>Ibrsb3?BSp
zPh^o0xkzCE!d=Uh9dEG5(N6C?t1wKsjgwq<w`XbxY6rZz)E;{(u%;PrJz{G;Nra+-
z(;OC<?|=`~0vg~$EXYv0kaV6XXgZwt@X1K`%p&}>M|kc>D(A@!VQ+o_V1`ryp8D;A
z3SX{Vy8Bz_E1u#|xaP}b(b9ss#|Ufs{Ob&sw_+Z9fEI&%u?oG89mmXXJW|@Ade?2V
zKAkIj{Gfmupzj8G@b7JhlLxW9{T)9+p)>vq-5zryzd`^dGwH>?rC`xNSJ>jv<){cK
z#nP58_S82al3-wG)b62PY64EsVLa4nSb9JPfS6K1+esB6lKtaj)`3(8d8^3)&J4@#
zG+&ePj_-S08~(Hd^Z_D6Mcn{DV{gMm)x_hsTb9#PwTZ=i2fnCH6{YIu)TX&@YQKhr
zXFngr-Yy%GA`!z+0VQ54G<$p|_#yx-6c%4tE3%qyc&zh2TPYd6F+~<F`-YpkN}A~8
z1#&g@NOxxVf5S7VN9v3Bk?w&`WTzG+iqf$a52_FQ1(OG-1Fv|kG!ZvDs1FC49G}El
zY80!TdC$W++)V1f4FVsHYqL9puZX)rvgkVFt`Y?U`K{cwt$BXf_}h_TvAjX;j0mXD
zxP}yv$kxIla-?LCj?)tD^o^x`ELS<SEQGDK7V4k9OuUr(?^@Z@^@y_<C)wRs!dD+I
zj0*pR%M_3U425GK+p|q2^-{b<jXZt18*(37h79jN9g7$aPrQ6JVwXLxy*gDUN1i74
zA0}K$og|(`?N%mS`0gggrT)AKnBxk%F#74Ax2>K>L|rx2vzE+-T+->_&*0kXiFqZ$
zpbOsKJ6?P!iQi1|Wa==g^r4MxZIaN~Y4d(0P-jXSqH;s}{it$lOZ%k!%*$edjNIX!
zIU8%FwcbpriZ-w(RohW)9JiaP56)*VsBO(z@Hc!q;u|NGvYKBVwlB=Liki7|K+hv?
zTWvVw4Qp~psn~NFUq>^XRuaXfe1P6sS9+hAX@0A74n-twrFDg|b>CO>T7<rOYO*!4
zzu+yh<KDC34Ejs=G;{#7Fqf}2_$MW)@hn>~lOb#+W#_(LY^e*v&NnxoBSb#E^+?(2
zD!d?B1nqJ313l!<<pXmcqb1L!Dif@N-43q%=Uk4g!Jnfu;SH<t@e#gEs-`UzJQ;Y{
zdHd_RY0`$7*OB{A(A_trk0-6AIhKRYCHBb}NYFh(-4{0Om^EvUwP_@o5=-^mE0)pt
z8n?#LV6}U>Yz^{9n+cXsg!k7jRTOjaTjb89is^Ph!Nr)0T^@{8zAh#D%GvTlz&t>H
zLlC)}cGc9b;OD2;hV@5USaO*Sge-BH)rwE1FH}401(WAZp9;mA2Bv6NztyI)P=$jb
zf%MXT0Cq=rAkSm-=<YZ_hc<fSThuCmzP!BOlNaK`mp6T#?!Rj^SaBPuv#GzO6t$Vz
z^mY&iu)?$b%p+P^nC4i%Th0$+#<Z%T8LOXI0dWYNXj0~q$o@4UN8-r&(Tl3v&`sG|
z4Y}Nn*LP={mi5Y{*3zm7kOno&iQ3Yz;qt8@>I7a}_q^)cR(tAd)p6Z50JDT7z|6Kv
zgh9A=R)S}=p;dPHWLd7U$Nt+ygTMTbmChXBB2dnhOL=h}+>`Zj+-Ok)Mzknp)=8&N
zB3S-Y`Yr0ssz9F!P5fk=-6xB?34^d!8_ePc7@@zbCcRuU=;|v0apXvbi)ucl7tsFD
z@F1YKU39cZJJ!-)*~gOK+m;07EdSHt6(_dZe2hINK^bs&(1@RD(jwF_D$ni`uc&_6
zeH-Y+J>flt!BS-%Z)Y>cmnw5(P&h@kMGW1Q;Y6g<v}!tSaD*uLAr2Ut{wph|FX@It
z+sDh=&|2FEbu~_)#GPbVM}UZ_hm@UOLSGerJ}j#yC{S*?EJYX30$3_VVpc#yxzPWv
z9X)eppWK+kK+?_eB2o^<T;?6^pj5Gpg%%OxI<zUYa<4~r?1{M+pWJONH0}#sh4`{Y
z2G*{;kK!luNl&Su-sT6dvG-Xjob&JAOSN+G7ANQ?*$AT{Dk@rF{bC*Orxy?I7}eJ4
z@%)UJvg^J&%|j25+}VGS$LAO9vyL-o&EQ&;=m}P%(?5I#8d2?7vp1v5ISUF#&7>}z
z`ew)_V{HGP4V^sA&@+S$81bntx;hZk_-G<th|uh4Y^P2Bfsv7f^r=|K_iv%CxC2wY
zkB9Ld0!a!X5i&9XUlEZcHc<!QmPteC1G#4%CreOSfD&!O?1JmJadTF2st4+&q=TLf
z4`juOIGW$9$QL|LldB%GB=$V4xN`yEbCKrNxf9+b|7lhd=TN1$-9zAeKC|Ps^Na)?
z{KSF8?s;n3wm8(7KMy$QZ2ut1Lm{$%iy(3GNlBH5#-pQ`rkb*4N#CV1^b5_iOTGT|
zJQi}qyw9=%TQ>NEHu|WOTp#%xwlBLfK2#5jpP`QOPeQ?op}2!*{hb{*FqBKr9glxa
z@qcO_+%W}`*w~-7pD4_}4RM&HWR^u%X0jTSkDO^}mUaja&StOf9;0b~v$7LJqpSbe
z<3)8phgAh#3;f}_L)Z4%_<<<mu5OZ3EK#)Vn@(lnm>`Gx=?e0cHwR($MY`h_dnpfv
z^doj30fn$yv4p0!*(u6<ntKK%NmPV>{LSB^C%Qwflx;1qsLvE~AlzPshGA--Cpw_}
z0}^xsR!V)p%950xIMkD*e=_e?)7C(NA+v-Vo7+)-+Sr|q>!A)mQ!_uvh`rV8`c>6H
z$$KyF>t+)&Oo#RkjB9@EUBahOrBx(eS=iF@o4hTWqwR0m_Y7FOxr@RZ3sheb^X{-^
z3=K2fphGqSnyw8<)yd}#&x|k_d}sfEnxx-D@@B!_;KzZ(*&WTN*GB@Ak-ZnKU~;SI
z;^}|axGUG(9^y}2O*Px8O1bHO{daA!pL)QaX5#5Z`lU?9K|wQ>i+=uS;?lcBteY`v
z>JI+{R4vOh^3d6S!?UjlV-us@NBfCVGkm^K+V-&#E?D!QlJ9DhhGFjTl=N(aE=gX-
zER(ugIF)4^>4~Ern)1dzS@T<FadrlFPtON$IV^K!5`7-89k_x%78RR;`4O!vOQrp-
zaxd<cT}$~WGGdw5ufL~1m6SX2j>qVk9@bDtN}#eorTpqPnWBQ`dSx^rz9Hrdc{yDo
zX(<?(G?h(37J1<!ipFd7zlYA0#Ya*I91xaTT>E>qLTzsfH|JBt^WefyQ`bzupn_Gu
z7wUDUr#T<@qyrE&c9W&r=us}^JS+L^q2BZ|@(k^St?FXn8}Kos$80t@{_+MmSIuj`
zDCUdS7TFB_LKrE7LD&S$nCuJ~<D@}|Uu%liB}WJ{frZPl`wl>~8rd9-E~F8S3J`uC
zHsVlK)4SO7B45rHi}&w=lny!kgoPM4N18f6@!I~J;ZVJm3lJ?O2W>nKX}w@}HjLI_
zaqNr+yY`j|82i+KVWBdYwlVLICZO!5NgIVF(&{P~POVuiH7f3*Pnh5QxK*bFBMQ(N
z71bc;AC-RHoU1lP@nvij9pKW9Ad=U-x8p-!r^(kf%TgH<`N-1M!PO}6^v&X@-e#l8
zixt}4XB~R|+Qq2+&3#fQLxNT;o2F26D{-(ICJYk!JYKPB7G7av@<TdX(7;Ku;9$rk
zto`$Ah$y1UTwhs$eszs9tKw+(*WynSKO@HZ{PRuI&#TrWnO`^qPYJ5}{K{^y8`}ZR
zrvrb<80kQsulM#InM-asZ?!s%n)HfQ=dsd{h047;Swg`dO)Ja4ZIUS9;0J}cc`0k+
z^-o`1OLjO^hBl1A$Pi<gu@*modz@O7w=STxPG&5w0I?W8qUL)L<t^53uB_5QA&)kd
zqJ}=^Ju&y(aeFXph*rg?>-B)WrlHH!g>C{_%bLAKZF<$9EHXsSRsL$8ZZtW5{NVBh
zoT_0$7&u6CdcuU+ao5wt++iMG-Xzc4{_T2k!EMs=31i;v5cyTsmQT_LdCC~CqZvXz
zkSGkWqZ;OV{PdT_zCCozwvASW;4tvaOkb)e+@N70QaPcda;%t$GqXj$-mwQ6tf|KA
zLdarXlxdCghD)^*M6bcA%CrWiGW@mr(D0Fzed*;BV>8_$fj#%fdBoAL-^klQ7ASb7
zt8Ym&vVjQYx0;g6LIQoKG^64|7DnKYZN!gcRt~xoG|SK7kv;{0Jp|#&s0V=znP^vq
ziv`J6=A5}DzsnUZ2ftqO!^!{_-_+Iu+B&prfu))#go{dPTP2YqCML>wJZ=TZCmX%v
zo5fIUArz=4PrWL?adbtPma&^S%E|GUmmkTPT_g3?HOu|=lhVz}u-}XWtUubUOj<oa
zkN#q;k};GgzvW%FxntqHxWWjmaqnz7j5A!|ae+epbYbZ7+9g)}A1A@OD84xwZckJ+
zbC_Q(oJqP~ArqXSs~Fa@Y*oe!*r;}5rWu;p|0UEEPPLWP212@yQu)o_LUr-gzM#s}
zO2#H(hh`uRkX%p9vUYNXGHFme2_|DGN_$`#FKCdgt9g&5IvK`MmgT&EL$s*_wincA
zc9}~N_gfEqc=-hKI1qN9=G3{;Y-aRQCZ}sqLC|AD$3my585RvamZ2l=Iaz{-G&QK}
z$E5~a`dfSurVI+y3I%4*TaY=6E-2JRMxk#})-!!FZ2G(4Kbjl7OzZ^s(Eu?}zK!5&
zer|x!Nl)2w@tJ}y=i^I2?L?&l%n0M^@QgRB4$#beV&!x|w&4$`HcP8f3cC$U-&q>_
zIRc8Wv!OLEkZrNDgPqb?>E^slejd*y>5F9xlFf)Km6pQX&6eGbGF8p8cEt#k&h%1i
z6)2(VVv1#TCwsHj<(MEdT~dBYJkp2ysc$ZKTS0PmG|mAYa2wy_A7u5UJQj4&w^~g`
z6oQ520VcmU?k7hV!$J`cL+ym{Nc)9~d|HueiEZcz_6eftu0>Z;{v?V7%@Rynv>$q3
zk<3$*WH<eHARRB8TrRukexai~=G@`he->{FoNRLmdoqb=AW}ThGgy|$o0otX@}?p4
z#w<s6!>*}4V>8?KlHXvgQDwI^3T`IJd1#Mx`v2_KY?}$Ir3SO9-efIvHcFK<>dzPS
zdm?+?#qMT0QKU83oDn?iu4C*khHteHFs0?rQpuQrS~a+j<z!&=1W>jm*%iWm(gTCC
zTEJ;?V+h`#cfmqm`pTa`QZV?EP>c+?ZdIEK9n#tu6KzaownAW9cg-^9(`DBTEUee+
z|Fxf33vLeAWm@vhREC0Z!0stn%1fqaV_EIlHtB^mcE&GPaT3$O1#+QdNF+qi49G&6
z2l=e*x%yrtt)%s>loJPdr2Xn7Z=GGXHH9G=TeJobCnrffdH+YzS;jT_zHQu}N+^QT
zC7^W2Xb_PaY?L6~F`9vNg9xa=MvP7sMt2RVAvGH58Zf#M>E{3JdAYau?$33f*Kr=l
z@ufBqeE)5MRoN5#&yAv-wiM3Fg)b$>gD2*G@u$%HaEb(p1=(Mg>&#j0c~Zv>`%0Y;
zP32fsu>Rg<*9B9Bi0Z_u8qKX;j#L$IlP!wBt-3;mxGS<0z1+VQy1tkh+`Z2~&AY3)
zpTWLCwX~!BF5hynd1big-BOe^to$=je}%pHohWbi78pL)`J0yHDajg)sT@xEchl}C
z3D)l!o6vg77Ut<vdQuIRsD>*D`v9=CmNEMZM+N77H}FfC&E?p_!0so7IwDagvs}1`
zRX^b1%d8_afA=#<ETB8O;viM9$1w-SKPY~XzVssBFpZSLSEH!S!Vu5bxL*0`?GXC1
zP6e`Q5spCNbNk?M^nz6s!O^EW&_PBS>5Im>-t~l!Qo)(_UIG!FM4r_?diEW%zx=|9
z_!vlAtfIew2tJ>89zIq=U!{cu2O)EBR;ndW`e#QaGk${c<~A~X7MIc9^v9CMxYw8N
zuUTDr0A>vnRzD#?tU?1eqxMONZ(p=oy#|YPD&C1VgM0q@wfpR#inXyZrGC^b4AMc#
z!SymvD_%pUCL{Q*=-3k;J{j|EgL6#x2=>?Zf6blUA0ZCGgQAcs*UFf3?6isknsnWf
zVxY6U;-$=Y(kG$X>e&U`Mfs%67DcKh<C_uzqJOx-vQP8zi>Z;>M@eGP>QEBpn}99_
z>rgX^f#EVN6KPV(vM`@`Y(_1qk#KcKGn*9;MO*a?y)~|o2RUv?^PIL5^-09<V+W<x
z_TxBjiw4&w+UMVPj!Hx=%V^#tj_xX~s_tceh)=AC0$uD>EMd_ZsRoeg4P_hSHwkew
z;e=Ia3GV4-J-FLD@U$$Oyy@=<!gm<J?HEf*hSS?`TY~WlTZQ&41w>=|*#-X=sb{tE
zTVZlXfBcVJt{VsWE58X|2g#M@uf&@y`Rop{uXje|GX*_85ZgRS8P(6q<L3~gBCH3$
znObIczJqix*ua8DHnGIL#Sd?w+LzYs-<uq*-+O1U?U3r52Krf6s>AK@VF^Oc!J;B?
z&vIM)@R3fKPmI=I4#f`~5F7O(2BNgRd;g)bq(X{c^@%TTyNHc3Q)uiK*eR_<qPVv4
zyM}`bQyBmo$J}*jKxL4jI(gnUe|Tdv9@j~GC-bPpl{GT;YUA2!Edpwx=wZ#+B;nr9
zxO!aK{xMd!b)8V5=&O!u61|6Vveh6g5Rs@`lhSf7<hmsef0)hDmL8cmY0Qpdyw5oS
zp$=AK_FTK@eRxIqHwXH~dnMF)XTgsoDBD^k!!*XB+xcQOqmS)`^mbX<lY;Q>vH>Ui
zlVv->-U3b$_hZzq$bR=-Wu7-n>f<d_V^w#_G_HX$-gak>4?4dmYL*W9bU;EISvs+1
zw}~d#uHV&P`yK|A#(2lM<;TUKHl&m}%$q9eC<?b1^pRK@esRahgOuD#F9EQ2M`?>o
zKE8r|;(Jfng#4$ls<4T6d^8e2iat$S4f`BSwECU&h$TAsC5N`<%FjvWB8}_rQjAlv
zI8!9DYCisu<#zM`!L_&e<EWfv*Zs72_4_Hy9d{@moz2p<HF~RF8Ojh$K4r`zNTl(f
zb$e+e+HCC<4O`7TGb#v>AL@SV#7-M%OCQ=kD91-<g>8xXFRf&%rXFwoekoP>Z?*}I
zQfr<TmUGf9IU9+2OJ`mhL?JJ#^~sdLJ$T@}h@$Ywu2Eov58*U{e`Xkw^HSv+n&PZv
z2TCi&^~>|&H`5vg&Y8Tn0^*Dly9iI(jG#Jo9`W|a-|mf{`u}Yz#QKf?YPf(o{_nxZ
zo%#kdGCQmxxxQJD($Bm$ez%I2fy<2+dncjKp-w+S?Njewg|<(Sz5Rb(mg=ZIcWC4Z
zg>2!$W?n&nWdD=^b8hs>LwDWNv>?;d5p=TNVndR}cY3jLKC^3&gnoY0jn+eMoSu_B
z>mNNuL(h2H#u@H1gYa>u|8(dElv96Ky*!M7%jP+2zLe?MeS(fX4A}CMehHZmU@HNH
zC?2r8T9SD=Ii1}r0#=V56Is2l*JuNt;J6rWJbi)<WSGold7*9uP@^iBqI@Bm7UHfV
zrv<^b8ZQ?<HCqtp3%259<>MmMUxx*hpz)uB<A>^n3-wOEZ#Gl7EHtEtpP^>$)CW8M
zS~II3izK-l$-oLGi^sF9kPARvolb~q=}3NP&diAS2gLAmUoN8<*q{W%!J4>#GYR^D
ziL9y!^!o`84_fvIVAB>H@p=kHhyt*;3nyca24<(7J&`vk##T+Z^L_TM$npso-PL<-
zu(Wbxy@Saa%k(sYbVnj7>?ps&%uu9acRfIzr?_z)qYtiAf}Jvr*<eP=u))FDF_{p^
zcDXoF;pKv-ejG-V!!^rA13B;UvOwpziOakr!*!w!Pl@A6&Z|;`UarQCLA5-UE!Y9o
zR<hF;_b=~<`K7C)Z$fIcYFKp*^Ghi8;*vKQR($JZZ2Eor8I9{0NBt^a{%Jb1*%exM
zHr+Akh{fg#8=u!&axo)c6YC{T2^qHFq^}RAXiK;;Qgd5YU$Rb%dCy1Qy8{CnMjQFy
zucc1<asl8`U2rRP*+%ciZrQv7b}tijKoaAg0x|kcoJNOf*OpG97ty?{f;84l>SCGR
zG$FV5{0`K9^+R@?u@iz!?YT1Wz74a`{dD^7ZwEqEa>qe+{;?Z0GJPuaPKi4ttu}Rb
z%hO=t;-v-riK35awv(}y$eP+bp;{X4VNm#iYVcfgd*bQ%!DU_xAAQQGa34P368lvU
z>D|&PQxk>JX?hLyN*{*wSkO1Sx*-dCeH+W2@}XrvpM=_1Wy-FBwG+7O_wt>-{G4Dj
zL`~c0Ob>hgWG`b^Gi(SO5z%$x)E_+7Q%f4_@bxXPII8Kvj_w){7U?#+mu0jXw-l!1
zW3NA*OB{y_0u*XdY$3AW7)fv>hKDcJMy<P90+VVooqQ>b`>r2-m?UWgENPV;bZ{9W
zS^jFt{cS7~A6ew|8t`Yi`4$xos!QW;l}z$8whu(Q{b+33y*Col{TJ)P%`wx2)1ory
zd1-H1!gVE{SwhZS^*-juYv{*S<aD}x^M0>^IVi5|P-~9!Mn<3C6{IL4U=UR$o$1a*
zU*$(nmi5r>kAs+WnQf^8@+lev;;P6ps7*AN(~+*0kk55a$upka-GRp+1g%b}IW3AZ
zjtj}=0!pAilRfop2Bzm{Ekx>&Ce8az1Inf1hxR;NsrF)HiUaDVo*7x-YO!hZ^6|&W
zH%~A;{(a@^+lo53H6}L}JcV%1J<bTu10pP1amSHWPtkNdYi-W*^QyYur(!tXu>3{J
zd0K^o+D<uKSOKE{l|DNQ5v-G^d<r+*3#iXYk?jvH?gdpJpl^J;om_oPQpG;6CFy;l
z*EZY?-b^KL>0zB^$$B3FiW)26$U93k3VeqXe>`9J=6!dHMT!Mwa8+H=x>_V$x7r^q
zVr0N7&Z#pOdO3w(H3OQh3n-UbZ8dW!gH%l>R5e_eZt2YLdX+Qg+1VEgYDVy3sD@<k
zm@#jI>#||DwQLFRF9%sA(!uhez1Kkrb<JTKr*}eyZ6NIS)&CygSp_EO*d1)_g*wUo
z{uQ`lYs}%~-?a^)1wNL`YRr^9GU}RhnAJTda8ceb{DG($a7%I7+08$H9LD<80N+Q|
z<y6O=Qc-LC8!=6${Zk<b8Z}2G`(3k+H=W>675Cq)EM1Kekzck9>c7Lm2#9;3{eA+V
zA8W)3RX`xAtdCO8^}sVY&fg?kd=GBynTkG(OJ~(Q0i#;8YR2B2M7`j$IW7cF@KyT`
z9VBtUW~>LoS&i@j529e7I(^&YEmIZzO>Mt86|fSUF*wGh-1nVoL)*@vzbNKo|BrTQ
zM71=Q$k360&`2_gkG=8|nx*guOY`r+VFux|vh0f1SIVf%p(Ra60qOo1vdg~*!ztFC
z2v&TxQaggz|CSICY?`M$%#gIrb04u}rwJI^=NH0}xGK1yiNoDtBi-4KaLix3FI~8Y
z1X$MBD*cI74<+p0_Prd0kS)Ai`=`8{FozOx6YP<ZW5U;KWywl!!zLT+@|<!+Y#bgE
z;aEAbt%@*C2)rHk80(dqk=g<6swOyy8bzvig&f<*nOlC{85!@DRv(WAtj0@zSh-GE
z5NFmn85~E5YBKxbq}%_q@BEa~DjoqMWPIU*p?Q|J7Xff(WOMFVA17!nS0XgfJVcua
zyLqwGzmQJ-CQRvE`Io05k9+UzXZM2MdJ7--Wo2VxJ15^vCLgan<D~Sb&Gt=}@Y+9d
zUyl@qtBTF@YTFNGgpv-j?)>p8+QO-YOnA(h)eUPLlB4^p<ggrok7^$tm0-NZ**9&k
zmg--VXPEss$Kn~kf@^1=Fsm6Os{=9Tktjt>>OlF1AS_5c`+Ht)c=w&Vma>7#yL)mz
zIkGGaY`_44A6+^a2Tv$+_;jx~k*oFnLAHAM<tvjoU9{m^sNAucsUg0l>6XFeP52JR
z>%(y`L;^?WT-7I$#51WGm_^uaz9fCoQQX7cxX%1hUH8-phRBIgz4iEP=vV6mY7*YT
zunW*??mFTPI!R;5CVnPr^5T&<3~~O4Ya-MKzBFa>YgcoG1&~j3?J!8*2L1*6=~RPn
zt(DZsY1$&Gkj+EZj4FQH$sX|&g5}K{uG=q7tHZ7X_dX1B<)B)#EfVpjleH&`fO}qq
zD@)gTr|x^dv9iWbtU!~tw~4E=CP7EZ$(b}w97bXhzT{$K9S!FmLxzGwjlIO5O&riu
ziWPT9ZRc8<wneI%I!Oc0_W>#H>r>J~1ff>R&?@YRj&n7IE}Aouo&ex1n_d=EALrqY
zvf*#j{q;ysVg@!5YtWj*_tpqQQZ!T2HZ)Lhz8~NTZ3uro;a<X{tof_rL_dauMM|Og
zyS4^@ZD%9PNiZ^}iW923FsDqXEQ5YC+axG}N|)xk?Z>+WW4Whhr*TSrtp!QCP#qbF
zo+CWka)xRlr3Tu#|4^oloQi1bt_~^|vv8P@qn4R}JX`CxbV4>YmMi4g^q&W<gNNqA
zxEt$tYKXdy__ym8WxQX^Xnt&}A1HGlCQFuCh^VYQOruv#OUhjpX06QN5&YYqky+$8
z@AD;wLqbG?t+$}=Foh%k2@f04zC<QO@VEQ5y<T7v-$r@lGRdnSzGQMS!(tnA#bPLY
z!*OKMZeR-SWISv^PGT{chC#KIK?%GF?F~NTKJxtn>bF&(@rcYXMo_Q(Tf2}6qPm;0
zM*u}(tE@)HmYE&=w`?A<NgH_-B5@U~^1!w_3h!DQXp!Zg+2g;8Ijs@6k$}5_BoPmU
zKV40cRwvf{F1yPcVIKhUew<qY!@j5<v~(8B-&%(_<44i|j>6D<yqUzP0?;!Rv1C6>
z7d$~^=l4L~NQk0}<5bn2O2EZ1pY{T+_fq7HR4plB$||R}GwgyY$$R)|Sq^mb>Z51i
zPh=ehve&=Ic4saa=RlB&vY#)^D%;O^k%kuz#!F5QwA|+VxbELwA<h7?ugl~|yhx9{
zZzMU~@?6-N3-#wDH5SqRy8gT$qbc99ic?-K-&SjK|8v1Z$`e`l`LrXLi$6}W@pLno
zpZgC0Tp$OV#a&2ePs$*v3z793;!3x;vE4up`Vm&6*yiN|*98=48a&D9fx@^*uiEfx
zt8?JHC|*}B37riMPOjBxr*`kcGDPhA?%p;5{ne|I1pb-Gu@Frb$m9@m5n@B?+L72a
zb~ovfKRPozdQT*MTh9L|`Q|Jidf2t$+w-cUs7>*6d7)0GTSm$wrLqZI&Q2ZQ!iob}
zE(On<`2mr)>C+YYG)qE<u<Q?dLmWZ8qF)V<&rs1l_pX)bgRC;CVC7*MLcSovtHG%f
zh{;BzM{={rZAp+b`ma|UGnxC-4ZNS?RDQet)NmCKtElRh`TjMOTrm1=n)ap-*OaC(
zeCnpsCKPE!zNYsl=MuM!C#GJ~uCY*Ga;=8Yb*;XoEk13Iw6nXa%bs)?;pv{Q$mPrc
zJ}PVYJ~S+65OZ0c#2J=~$w_Jd8RL@g@y|O=_ZjiXt57$I`>Pa&l)^WLidulmLNuS<
zExrJ;;43D29B6Q08|1g}a^54549v$7-1i%ztTX!8BYQPD3OJ8a!3fmafP{8VN8l9<
zh)_b&a0_J0EDM%^qaITwe<2v+mNi+ch(=`IERnDETIJY%RWGB_L6<Zft%(nabjk$j
zdK@!Z)W3D8H1oX}>Ya>~<u(nZ#|v@1zLM8c7|=~ZYk6eg?Q1?)W$e9|`(*q+DSIa6
zs(t-w>q5xC&6KB0|LvVgMLHb>#gR-pNh<G7gf(effR?!It3SA~`Hjj`N=GIoV0)~v
zxWti!fe^hjAIY%D`zy0d&+SP|8Vv)!%JqROSGhW~=tU~BZl{1EBG&-kIwynmOoPl5
zETcBQfnlv&gmvP@az%TgCI{k%DNA#wfFsE8J6%tGNhWft_F^20F0G<hxe1^6O*!@c
zflFOxxhHVZn&~x#u(+xg`oh=}9(CNgJw~UN{54ZvL%?9W77f0g;?(OY#S>3k=QY`j
zr@U=i(+10$)|ih$$F9G?uijWPdYtz!ZZPN!KNhFzFFnGcv@#`j%2WnJ;IT+qn;gfI
zOl_UGBtuzA)%z3YBouEJjgU4(5-Q5GWYifOX-n5*a~Fn|?Jq2^1ICg`G)7?gN4Y2R
zfvF9Aey3g6a8*VI_whSBT$t<jubNw`LA^g}0l3G>s&a94MnK@^#c%s$XNh4;yHF!;
zQn{NIlqIpa&Q6lHSC*HHlznO(jH1n8N7C~qz2^)V$>$^sTE`OjI?t$#?W*;jtzpF$
z-#AXSiF%Lke)r0@h~C~MUs9FZ@As$5-R~$=;>tOl!nuz7_ZLCOJU$OjJ%BISiNQ<F
zGhdhFgT<b6p<RG^se{c)K0UZ34sUOGvrOQylAVVe4{{_yb9?Uv8|S-*VXIsxMxE9i
znfC_5kqz$FzVziNKk%vlTlsPynfX@>);p3lK}~0ELfWS*swanM?&V>IXJid8g>G|Q
zM!pV{p#3{OLD3L^((wAWbNJEcDflUE<Aflrdj8j8x_GT`%fL>`!*&fJl=kCF>n;3Q
zw76bVYh9LmNgcKC+Y!!Z>XAfMj&sd@@#XT7?{hjy4S2|9=OKae;<*MhHN!0<K{7-z
ztE2Q43ia}vn&v&NxYp-#JS*$?#;zyo$tW3Q(|@*{Wc-ss*3D|39s?^&GA1qjsr(V3
z^fh-tM+<v&QlhK$?7+B<GjClk^XD$tv<`q_cJ32CBGSy!km~*DpX<V-lFM|xts28t
zi9%Xr*BU$`dCV}`&{+@7L?J~<<!q_S@H~sp?qy^GDKAbrd}Zy;OgcD}<#_Nu&4aHr
z28wE%f_7^JsU&6@IseHv=`(PI{xaW=I2<8Ol9YA~R{|U>FPSLPK;|T<+hy%Fe)#@&
z?)MxWD?j=~^ioP5L3-&W|M;l^30tkf(S!cPwxzjLg{4E14kHUa8qG?kZ+sEAFXYXJ
zYh+IEUw@-q8}W%RnS}E&^&_a1NBWEMO$pkn1Tn=zSPIF542XnpAHCd(KomVbunLSo
zT@qVcxx$4#0iL_3fg##MAJ#>)@5F^yGuC%++1J{&Cqu6~Lt&|(i<f&C5>n7t9fdgn
zg7QHUj+0Nu)en8FhS}*T2+HdX9CAm1N`O9^Uqdag3`dM#b@n{WpnP8Qi4jejScQA|
zypXe)Hu!1hF$vKF34FBAgZ*0C^Iu<G)l@taDD+pAu1ihLThUUe?O9LY1l4WyQ61^B
zeDM5rwBXl-|2<gBxei0ipD^oqQm+0uv3EIJ?M3iyowlg6Oow|aoivACrQai2Y#qO6
z5QRAGeU-Z66P1dpl{X6zVpW=qy;a>yW0!h2BDWselqU7V^-c>Xq$Bf!e6D5WK-w&r
zE{>XxI@t}QRe?9yO>6S?HKeT5i%yW(p%%WcMR9KD6c!i$X9ChC4x`M}JRa4pO3Xzk
zfWDNN|2tX}8sBnOVDbAsSR|IFtLXd~ZgON?DOk!u)N8qr-s$0H-u2$5N>{m}NM(e{
zH>q$uvbgonj_E&VzBuwyxAC2QiIS|3`DT=ZkP!-*NL%K7=ANMSHqON<y5s;6HZj$o
zFXotCkJ5DzKI?-O!}L+k9OIA{DD&CZohDu2Zr-m_I&TNTg2AE&Q-^~s&R=FG_U0VN
zUc088o{9w??N!nrm#gl3ob1?sdc~I&K6jqSe@->oKn0m&;wNmcI7+Zy`h@mR(HjwO
z^dSuKKh1S;?l`Imz0_faW^~jJD5<EF`Y`NgW~6_q)%t{4OzBQ<$*Y12R4WgU7*v51
z4>n?Vo??<@Tx_@HpyfX$dwO!eak`qLtza4Ii#eQ!VJ_2sRP|O$lB@Lfb7u=^7DbK2
zbAQqeM<(-jW?hE}!<>%tsr#QrfTZUbYQi+zYS->0oQfXj&jDc3W{@vq*Y>+2`oI9s
zKvFP<AHk>j=8IF10fBO3g(q?OKw&}C+F?4#U*e~~`s)ey1vQL?>Q3|KxV8|G<11*r
z7&c|gpto4jvdN(Scpf+=C&NKrNrF)y>2ClfL{Anw5hWgnX`blKtV7weGknU51Po@~
zv0q_K0?#VJb1x$6cVJd3jvxHQF$2gH{hUB~66ry`Qnp)wZ+j&)P&ZXf>kTvc_FHD*
z@GGC3xDjV&m<8Qbi;$^WTF2|<;2Q@!;>EEB27ifS!8}<$@L#D#O1Lk1WwN*04?iI4
zB)w7_LG%}!XOZ4d92nAav*hgr-CxqfTRvK^y>^}AH3OkkZ5MxpN+d>t7gAX5yQb)W
zH*a3O$rzp(aNutWtI0@COs@L$Hr=DOE-$B-^V?~jCm7%tFP?M(>5;$}WB&P_n2C2b
zH2v>EY<C?mPrg}lrHII_f&hDEZrDYPp{BQWIXhS+ikSN5qQ8EBcicn;Apxf~E#08T
zhk0ci>oSl=F?W*BE1yan`wmR>H|JJn5Pgk)Y2hR&74~O=xBi)?%LDSl6M#|KZ$Saa
zZSVAgohS1>!(2TkqHdYd#S_PvzN|2pj{0M`ih01J_jO6iQdQEGl9A+a^*rK!5a$dQ
zmXhyOUmcaKuBME$!yZN%EcL5csDFDP`sL=j#lj&C++Vf(L-1`Gg@vE$h>%`HYszG_
zlpK&?pGXR*sT+GG;xq_fiS)(<Qn24hzO#o+n8m?mTzBXw4Mg6hm1+m?>~x(m$q_Mz
z)g0LBL{rU9==+@AdCk!oqUgltOBJK|^E_jjs-dl}28PQsfuJr2yk2~W5gV@q*Dgi+
zK)H7w{w*G;i=bLdNJZqubtx<y#0((a#0_kmSVhDxTycx778UE*Zv0=?AUlT|4h+5O
znoQbIr3V+ZX6G2vskO~G#9xi@=1j+8d+PA@h(+y_-{ZpyLS-!RL*!~z$5{bUVGZf(
zF)Lff8cl7CfLOp=`L$5_BO%yje0SK3%l4c2=)FO+YeY(2n$Jc5%zSlwPHY5t{$6yL
zUc5yoyYb9Dvj+*)*Ci^_bwimney1-n(aI=pow*b=2a)|6c;iGi0+wh$e)|6eH8uBQ
zna|t4!&RQ>sGDQ-GuConJJzKR^OGEiN&L&2cx}&by&#vllT5d?1ZS|Gon7H;ZHHR=
zGQ$ui(L!0uMDVfBbz`hqt*umJq`sQB4~IiLn!L0gDoz`fq>v6~cazkVt#sB`ENM~9
z*NWx9;XlS>+~Y4OT4=^%^P%FYF7GBFzRO!XnTR|ne;Po^#!xbNr6L2Zm##Lkrf*o2
zwYtAx34dn)CH55Fa@oV71j2F`ZgdEm$h%4uC$+x*(kA%72bGsDL4uZ(aEBo_T9Baf
z7Q^`jaN^f~Z65pjg8Sj=K!IKhAf?JN4xKd#GkT=bf`3_`W=f2C7?4yLL(ksW^~i)!
z8r7UV&p#1s*Z4Kx^t5?4@ig*FL60q7iLO*mD-}*^!pIoF8^^~?Hu9AV8ohjv3PuaD
zwFMr;r3d|MoE7E}>c4P>N6rU8kxsGQk#hbC&WSAf!JbKoj@u%`-~3@dYo(N<L9sL{
z4eu%KFl_M5`UiLJV(*?OJ<%bHhqS<rxPe5vK~3>LGaR~O$;eaP$a1C;kLLaH0a|~4
zKcR#3&IbF~WEoGfy^FfoUJ0<C=>xUCmFAb!;wlVtBDX{E=!4z6x&l&pPPw-UF>raU
zOO8IbBUNqC-n-?sVE@Jl@M=l4AE&r^#g?0=I}lAMoSau4aASye603I-r#;}|Hu~#V
zuB*+SC7Ay@GU55ePSnu=vv{&PbEnsJyX%Mcj$c=HFP^ws-h*%qwNB`fpB#23%J{_<
zL%p<KBJg;!kL+}_tW@Y&q?Y7Q!(rUq7(a}aAtNw>^k~pQeTt^LG+gziTLu?aMk$5K
zuY$cz&-~(ifDh%ZE^+D|PJ2?>!wfL+1&#Uyem`#`s-h>-)k^=+)F@vV6q@<6pnr!&
z^v-3keIqodxZS(D{cmtZ-9jj_Aga8ao9S{*)R$Jd>ZI#fmS`{aSh)(uXb9=>?XuEr
zY>99I7SlRQ3^k9LQy%)8FNeFw&85_Gl&%2|NRZkSBmmMisH+B^xoeQB;?H3rSjX2N
zOsCv3xDoMNkQDW9Fqnv#6|9OhOAX|R_&FLDUFB+~pAvv~zowgqHhHKhG`NP;l=9Y@
z?`P1!*xIGDj$c=z=Li@s(3dDuf|cX{dSN+F_&*5?B;m4YfN(r8$KG`N8NQ^Bn$kVk
z5D)3|w>PD7QQW&1ZlN%Am4Ee8@XEC~L!FS*kOQLG_{q4|Mm^+&pFF0O@>u)7wxguZ
z2gqxCeFo*e*x8Sj196@^-nf;HUOHBe(U|>0>mF}Yj%``w*Mp1|#Ov)L{CN3oi@Qj~
zORO)mB8fEN?Z;QXHGSwuIaNf7r5yjPy%jLp?9l8!itB*B=(#@43||8~EXk`k%L-g1
zPiqZs?1~Vj1pSvnze7$0#~xbH`m-96J|#ZmB3$Dg<`MrnhATQEPaWqOsX3#ZD`2a*
z^c_jFal0ax0szi`hqp@`GyG$NDB!XgUwuG5CcPNw1F*{ENNJRGhnxUnf`Ar(ggz`!
zgl6B51Z-^^*x1Lv`6<Q2Jsq$?%M|r;VdTZARsX}gYHRBRdf_pGX}g*qrhsk7Ea7Ua
zUk~in3{j8i;KtcR$`$yG3cZA~>$f=sU|LCeNhJaT{7srdj;}B*|N0+CFhou8m<FD}
z_*y=Q8w35xczp0Z;MzsP7h&=vaY8apGpV~QbGV1SG}dlrfQ?S9UrW{>%Ra3udeFQr
z&Q`V=+4T1EgA4bf{?Dv&tDaq_`Ami_u_lgiP;2{a^>dAahI79+b~kh}&@q0iz6dUD
zj?G`l!#eQwJg<a|T6aDauXA@6UP=B)5u3Q=l{08a__8G#bO%V*--xX*iR3C#qw41i
zY|Nej*{gI+AQTlhc`>?Jhws5N{8odWhW=sa!Zit6JAbWT_8<jT=Dc<XRFIn5oP$_m
zK{|WcLK=$NinPML+j3|Rqh=$V2KkLO@4(#)rQY>~bBT#U4VEuD{-H7M3=!cB!({fI
z_L1svX@4xmyf<~UZ?DE_y(TJnA%FDtcGE-?s}PlX&ZI_OVJ<ake&4c2>`{L%;zog_
zxC}+q?R1k}=YzLE7yzCu*AbuUo;GIgxf64cy`D-5+@ty&_IxziF66CK^l87MWK^Qx
zv*&%6R<PM6eAQP#4^ZeCRRF&?SEsiL*Qn&IyX!!tiG8NN<?^4--{OunDY<3CLP`_+
z1yn=9EK0?h*M4suSF1ylT@WJl4%mq7w<OE`k4p0eOmwSoJb@mFf!k0`N#1(-^Tk)}
z3@NS#?ITwifx-yi47{6*FpOaI6X3Cn&%JSRC!p$|1MI_S*U!-s(AW2<;={~4?uyLf
zHPmyZm3#{Fs6&@7%bOvLhygAF6oyAxjlG9iPsNgDYjTuraZvYiOZAkm4s}5mBHW6z
zaKStM$j>^^#mszSgFRks-PsXFN3#W3l^PApvhn(5Fe#4(TxM31qXesyDmNx(7cdiJ
z>6L~d_(7||P$w{%LLk58AdPUf%%&}FPV{T3OdcRw;m-tdc@iq+Wpg<P4U;T~5AVTX
zcANHxSPvkV(6L8ys)?=KG<M^Dt}_!n_61=O=dqKRBm;@?s|lMCYk)K8^^We0>ZfLJ
z5H@l}@Fg#S;|BxdQdzSrHYs{r6f1z~8X0AI*XHOi&f;8<#r=|ZiFxE;S{(P9(5nKs
zIG+b<!w9+dTqp1n{CK$Le-HL^OI^O?%Kbwur+tP`iA07^@DhukiSnGoIC^G9U?If2
zen%)Le&Ztg^-0ovLo4(gtf&6Hw{p?sMA5`M!JE*gCh-_SflAePzbIHM{t`grd3{Ge
z>Lm;~_k+J(SC+QbiE9(2{bVC;PB6vZwilYs*JBJrA42@BTU*r=2Gr@tMl=VN3K`fY
zIUPyRJSXXzGc?{Pd<rXx{p+B)0tTm!NjvAv9}J0F$e?HwkZ?`s9yiMOaZbdpT$88o
z&hzHp_>YX925hW`7nXFNcb_8b^PEU?MMu-9lD*;WZg8&%9&ZG{uZ`@TU(W{nP(veY
zpy8xu(6LDH`Q5QcOz-lzXXzLa&VcxqQiCJ%>!I{bp`0SIW=IC#0b-r^^A=LU*pgl7
z0fXOwvC&*Jh+O)2#H*B4il^;|DaGZ8EQ0`u9EeMojVCbY-0W!4DPz5`)~zs20~rWM
zNqKo_${gGCj(G!?IM=waWTu*_jL_OA9mYU~oykh_Sh4daNXdY(R{pN9yY}<!#iDQR
zvlIpHmvFiUfOwuNj|<waG96jDgrNvW?X{qx_7zqOKUOQ4*he5=Op2xV@oqOReti48
zvlM8I;l&i~agTG=*+|Cv1b?fTz3w#rJ#>C@crI#|5B%SQQUv`|CXo7+)~z9mX5)**
z`r(x=_q904?S58JQXPM*@@6Hc7Pr9K8Y7(t5NZkJWmQ$Mr0wnG4~25lI-YQFW(Ddf
zwS>NklM!;p8GV&dI@~)kp%&6-1%CKI<lqqZRO@q-3_p`~(TVRHxr+{43Ej#ln6a5N
zu(W)^4mfp?9;*qZP5BtQ1}cdAZ94haPqe_%j*(Y+cK#qBc^V_ar*;1+#e7N2LZNan
zE-A2yac+&zojA$ESgaYcs$Ntr>3dxVYAF?GLFDkeYz`b+3DKc)-`k&B34t19``1O*
zX#d({Ej?{tjDPT?!oe~PsQvBKr(Wc!tAnSVd2@cE>cOv0i_7Y|l3bqQXw}*Zc6f(N
zHEk+dl$&eoy-#nd%J`CgFT!j&$#q+l2Xa2^hws3Ybf2KMP@0^>gVJI)BYHpJv8cJR
zmaxY&c9{Ofzp-Iht}?N#Fk*^PqCYbu+>}niG~ldc*JZS*fa#@7o0U53Ocxtas{O7m
ze8Q!I!5x|~xLkVb*qv1D{0kR>dx?aW8VmAA%XrTm_aN623#bvRAaU>k?^VZmhe6rH
z$mPcs4ESYNBigAj16gMSe4S_UwLu+4HPKNvLNdNnGlg-|-+G9MqbCs>X2wdl@-u+r
z5_W<0=(5%J5ih~rFBP}39!r`Bc;8MXv>tP)s&=hu(h!Qh?66e|$h$}?b?ZA=>H6nn
zJq|Rs+*+=b@(3Igm{I9btIJYu<O?}5a)aMe&1YAb9>NRi@_a6Zkd6)dC+5cpPk+e<
z$KqO^TAiv1{x?kHbybRw7ysBCl}Pq5>1FFIU*6XTwgtv)ha)ZSEoa^~77^@pDBp+O
zmz{k-`L4T;o{GABGqZI*>>WI5-zvL4PpDDe_Rp@?Leh)$N#(ICW1j9smvx>2s`Yz=
zsaUk{7GQo~DVXO6heqcdn_b8gp_ZgB!v794{EQz$4x^E9tf&pq1Up^@+n!k-DqsI1
z$5Y>8{|hA^@pC-VbZakjH6b_|+5lOEz=Qld)?G8)F7o*V5I0ucUG8YIT;ds3^s{LF
zjyEsd>>w_!S)>`k9-l`*iH8V9xKeNMvoX{&Ms|vKUOdK<5Nyh_Mv(*IZ8UGX4#OX5
zV;zdE5xX?t6287rk0j$&A47FWtv@>%L?L+n{y)get``>196#@IDXag%*Bd`1f-%<S
z`2~dhd!>0OkjSsvD2-q>r<0wCh{4Zt@o9kNa-HFA_4GnA@jTaP@4&CFU9k|2e6~bU
zc?6^(!uN6Gd0)7!RUd#&z|)?A;R#2GC+Tvs=PT_7d)ElA$akB*3O)hkg10B(6qopt
z8u4`r;XSSA1z3zuR<x&V*Q(Wlkb2JAE>|6BZoC)mI?ok~p^5&9?*F8d{Vl5uU=3^;
znj(f`lwIDeD`114&7m_=HGOG~ZvUewp?>wzNiH-0>xUH?&m}(E^O5JHvD5$Mx)pVu
zS-UX4$>_95&V#II(+}^<NOG(|T!w1$LNnfcW0X!M{frByBa)JQ_fpCgwvi8dIIUtG
zwDbTv^XeJ*SL{PFz$?2oHj`(chZFG74+9{66HF-pd0ep$?DKVnGqOrN4tp1OM3<$}
z@@8urQtN}x!1L*J8*>j&dYZfJb1IL)d#;7Ia_wm6e}e+9b%m-{VFJ4D=}!1#QP_HM
z>y|LH_`~onnxwygwx2;0vv2h-#9ZLXj?l$0HYD<8W<7s;RgoP<W<FpQ+rq&Wtdmct
z=#dLo82H}<8r=(Cc5IiJ%^x=;it&`5Gxw>*bPkgZMO?;yT9Gc9rnuhA1Kly<a3-{k
z-(l#@omUaB)2YC*EnnPzZpgcPSIh~Oove1AR%+gwgx+F{-@S25L*nM&vG?nr0yD&@
zV+}Ix_$B9g!=q2kFPAQcY%FV1pz}b6a6Skwlm$=ucHPlzJ(b9Xa|0VvjV<PqOSTL6
z^yEa9C4H9uR<S6_s7vfdPZaP|(*wkTY!=_S@WF%NO@xuXSxQzxYxz8IBX_|j4Vc-i
zMmfdK>>oFEHf$QixLjO95#`)=K6ST{oa7cWfkz!MoDw{znZGO-Tnl}=<6-N;fgXn+
zn717_NxA?7WCw+xc@uu$I>iVj^%r7;zo6Izl$AoK4l;_&U`+-Tgd$P=QKXHy)BeJL
zr%tFPwM?*#9dE#7i~X*S|Fiqe(LZDK=6WV`8v0WR{Z#TVrQszk9z}rZP~ZpOex?^3
z6S1E8p|i+g<T{S+2O^eJVEj4NRX<n3Kd;+I^oj71ybosLc-NnqG*_~jZmm%{OR_ms
z9R>Qh1eZRY(;D1ukc7KQr|IxHr#ueTm-)38CRLz)?9>DV*Q#^!6sNt6n=%H2`5Xjw
zaH^T+#&{0Y<mcNX<Bn^1naLB-VwgPpKR%}2#dH4=vo!Wpej!p$aE~{T#-ghcM^&L?
z>5BY5IN?HpH%blyat7C5B7ze|dHGXyoIkDK&wdam0nIw5?wgp(!LqY_6w|cLt^R05
zY>L*SWqon9+JtT!AjXYu(7gF{<;@zSvEl~aww(_op%6t3-rjj=mXJ6#@=8>^u#&l0
zC>^@6H>BmI0qDc}aJVqK!pa+h1J<<{r+s!CbNK!uS{6<u);sqp>wwJl$7HSl1n>TV
z%4<A>!`{m8J9QOmwz-ErTEQ3rt^_h*tI8a_=<_D%v823^y-w_eOW=R|_Ig452MmGM
zc@Azf#wt9#+!clP`Fq-$@kO@xUjALyoYQ0pJH7r0Kh5522dPF(hHPxj*DjyHiC?B_
zZc{;9YF$|`#U1kV|JCf|CT8}pp#t-FFw(9&X6B;Bj+*cFE%WSiw1KQknv{j$H`H}n
zJ5i{rTP6#A>M0nThK(BdFAhg{8TRLs)|zI@tH|ywUET@QljHe(9`Zur$y#inh1A_Y
zr~xS!;;5qfr%pfF!Xw{ciQdat5g?8zvpBk5EuD)Rh@;%4Ow>sKea7YYCbulO-s?=<
zJo~iU*wjy1i|@voW=iHaYQB~W`H=I^-q}!*puxH9#JxYS@Fe?r-ACMB78dD@+9u1b
zh!KPD;*EN&72|e0@f)%Bi^B`NSm<-EZ?I2s-y)s%_uJ5O^(A&uH?`0QX0%fa2M3=U
zzW_9w8DiN$KN~#<gDj*Mq6AG%c;XN#0!q;MLIY6$)V8-GHAmcqooCRd0X$@$t>8f{
zU=dd|Be5~s=&9$;R>{IYxg-Dmq@S0W>>$;9jY%XjTJHQ1NBy^POuw9x8}<k}us{E+
z|8CActpO|6FtAkS?jkiTv(5I^bsNLqKsh&G=CBtLtW%kRe4MWtq3%C#ELK(TcsizH
z9@$9MV#^C6p;sAK%(I4$`DhSR7a*d8yxL(Yh2qX*4yxW1ubZfG?AuKsx*ZG~j|u#B
zJ65K>Iir5N0x+3S`vzaSo9SAaSl>rd^4#9c4r@YnsLk;#pNaSYhjy;}yq>ES`Ckci
zAs=O?zh3(pHAS2o(6Pb*UX;+KpwLRbyQ@t@_<kW+a7#x$KkhLZ7&8sv;ou3oG;BwS
zrNSku0X|2(lO(Zp)eoDL%Vgh*_$<NAYm)OTO;N|s>3a0^PkF}d$Z?ZQS9c=XbVN{n
zY8nxzolK<ra{s&m94OsF!NaL+Y_KFmb!az`=cod<H9W-8q_~b7Qk?Fv(KT23zLl$W
z@3)x#@(h_s_V)V>y*{Z0y+zL3+`Rbst)x~7=ga)Z>NAsT{f6;;zboc`6#l+yHPIZc
z&(-?!hxIuA7IGX_<ZJV;D(>>ARj055;@z35%jo*dMe12A(#)6t4784KF|wKuk{JI<
z>Mi!V-DSA9uS|(TxY|<R-8l6c&x@S%h)dT)#*3W~%fBxFW>SH<aGJfg)Bk+qtp^gs
zPItwEhc7g&CxqH=zN1~r8Af8b8)9%lSYDL3#ae}k<I<jABMvR}&&OEL_~Cnh)8@7+
zB422bQLnq6TYD}qkBJPj^lQa*D9cLGBs5KT?coJ&+Cgr6!RMs)v6Har(8l`@JEgLz
zw0CpIe*23Kzwov@$>!yE4Z+K^l>3cI_SG-5H!ak1mP7>z`yqb=200Y|uK0>kxWh6D
z`%Sj8F)DmqToINDmQ2-GfW-*3gX=>JsV;x96w2#$36h|dNEXD(L~>)PNKCEs$XKKm
zl%-_^%Kin`*swlmJ#r}lY!B>OE}>}g|NZ&)Fo9_Fw-Ss$JQ$+jhq2#Q@Ws6nHUFn)
z`PgF1wY-^e^Ix^UNYamL(>iF~V=xXaX~Y1?4*TMe$ALwe{?2l(JS9#M{-a2{#)^ll
zL*|-1$LNP<O*!hmKJUAK#83S-^do=-tv{FNjs?s;!YI34<s=AY6X!JD)m*-6d<Zy*
zfY2IADw71b>T>wvSgnpS*sT63kD0FM*Rtxw4rwp}=7hk{T?b=WS-!@0XAgd(6#A;g
z@s&`O$Gb2L^U{P>%!kZo)8v)w=ATMZ0phn_LRPdHta^Y}gq}8<G=VyizMQw4>DLy~
zGO_MOAKp2~5B{qAam5AW1nP$7*N3tSZPr~ms9l$uiXnY!7TiuM3JvYs=>QxaRIpzn
z_AFA582Ln9F`$9pG`0C^EVL;JerpfwENCDvt5x3?sTRm>P~2vWyw~M@Q+FwSGLds5
zy_NQ3`g8o76{)wyg&rO*j%r?<u5+n`T@Y=if3^xC$D_+x9ZA;ViNE0~3Dyt?TUO2)
zHbY=ve=WWYup_~W)1)uD_0#`XIl~jOL?I+-1E5)P{5IfeT;xozc-Z?tW8B62+w-Y?
zoZS`dpMPDiAMgEyPE~vx;h8i#;Jc_}{KFFNjc~c3cODa1$InH_E%KF4a>80VDLbOU
zJ@Ge5<JsQ_gFEBBMGAPG@KE}M#1@i6;|XeLD)Tji9S@1$J9EFG5?_23!?!k&84~pE
zIn4n^P-O3F{SSMlITNQy{9tTDb5+aM<2`=1)O~isiEmG!>Ks^>njA$jep?MEB-0Hf
z=i-K<;Obayz*I}CSIkhZ#yOWxrHCs_M&lFZ1Y3Pl2rOhWWxJ_?*F@^tRZY!b`L^hC
z6{|IhoK}zOw(1%8CBTl@N)gYvc5vo%116eT>8DH9YW!=XWg3hN{?mFy#;49-mcl1i
z)b^y+P21J9e$URxpE+|*MYSyeUquJ5dazSwN>?>3sLQXd<9m{!TSlP6`(a!LrI{V}
zusM_zAl6WP&t!U-87Cnid6N-hQ9CT34m~d-(Bd@rXy5v)vH9+7*Ww8govL2c%DAc`
z=zig1e$S9)^Qv@JH=f6nBZGB~JsgLws&umcK$Xd<oL``j+|=rm04t$>54NU_pO8K_
zR?9X^X-fbr3(EE#F-s1Q$Bl8g;S@2Z3f^V2D?^QQ$h&EKJm@{Sap830wBP!kR2KFj
z=n3W6>ty(0R;$OjNWbJKTjugVA1dW0;-Pf{VhIiGw?&D`D^GoZ+_Fbuc<$tC%5X}L
zdG^*MZ{NJ3(B_bk0U!IKI(;4Nx~?U1MBw=eY)i<*)8Yvl#1>ttK9$oi$+wf8oLzG_
zaG;;V1bW5iQr0pIRNLy|s1EU*C<{FEy|MqWpY(o!Z=*|RGW9!1BVMi=bo^UoS_g22
zm>gP}(|RNxY*GZv(9zro5c5mo^i9MD9;UpnFKne2OfFEi16JDAVaV4qc~oT7GQhG4
zoLRmLJs=aBGFYy!Nc!($JV{3yV}V;>2!JLrLcksqT;YOsf+&$fJfV_F*hdvl>sky$
zfhD3ib1JglXEvM@wy<dEmg_`bu0#{}PL)1yx%c|KzM}rj6gno-B*~Ll*f(H-mA3c@
zVjoD@aWKTiiSm{&aj?lmD{3s><EQc&K84`)souNa*&r{y_4z%#uZnW&HC<#q>R+0r
zk~l5<frt*YMOJlXYcfYsWP;Y^fwn7c0q2#su(~$>e0WCoUDsfM2kZPtS&qF1e@&4Q
z?~!vnqyzE-eZtbomb}WeV#I2c!3>%O^mrvXElltQ!ku%m$J8!?k%GDbdu@0anPN@9
zq0h?75L*QrUNcCyD}Sb!n?YOT5_F&5FTL{AO|XT07tFIG=BkgEf!2^vLtAd44U%Pp
zxQ}a82XQ9Ki>V@s?*A@MGRsF*ig&`~iN2Ri@AMWO|3GE+2_2k`Fe5AVGRmBgA5$xt
zB}yfwN4!&IMS7QiCO>>HhRGg6xU=<0<-dcq0pM~uu>ju`l&*0;(lPo`3B{rJ8qRfw
z>xN!;b+?}h>=cbWTqCK^S>CRODidlKgCWw^a*uS4P5<dYF)wN)lxpUVVg~Zn|My^A
za^uAg7Ik=Qlbs_ru;6)W%7gDY0cmFr6`v2D?k$YLz0$867l0QdCG6D_4diYeB^taN
zOLeq(55psdpt-1|)_}IE_{(8V6kIZsf=TV{i4iGuODvZsxiw<aP*0)3FDzGYB0pGv
zBL#&uL@M0fH`O<>NhI|Lb?|P`UTQ7PjqK~0zJnqAQ%v-|vec=>Qprp0y0Hc~?#ULU
z7$D5q;9BQkU`zRmSD{gJC5=#|R_~H<r=#a?$f-a{z@#D<GgRh&*?-Hhlu(;ppr#rc
zR%ugK$MZIZR4_fSB}5duj)lYW3q>B`vsPPO2(ZDSLRRC?hdMEj-M#%qbwV{~UVZ~U
zqHk_FLGdPM0nPMMenyB-ap(N~RX)?vqW-8glMB=m98yP<N^i+f`$PQ${hKgqy2|LQ
z7g3c+W~Hmm%72NCCUiB;dT82Ur&7m(C5?9^?kNkyvMn;4Rm3%=s52yL3l9j6IdH@V
z{^+A>*g72!U2Co&HdSTTnuD&7FVe^;74dB)Z(;P;5h&RRU8V1pd}8v6>`|oW{8$14
z)Enq6^|mj3_WZu7P39P6koru0MlsSIHI*XAaGS3p&#%eja(D<s9M-t>`!MIth?{1J
z<we~X5RxmvqAIQq5dNLB2UfqxVWzY}SXw{CJjH8|wC^)7`7_%i$m#VU-EXoylDwT-
zX`fc>6xkdX7>$mlA8s3l>6mS<MJ1}!bAK62V)`B`68d-e*idEq8p*?TIyB$RzLTX9
zy)(`jfSDdHFR%j&#xdXE4zk+$9aZY4B~9%NO7eBQPyIL;<2AnF{#|+j>ON}K@|#P7
zg`lWGp1E<dIs1KE28$6VPG7iwU16$*&B$Cz<C#c1@eYAyM90ycU&)k=;`WZ2jepRc
z=x*TqmB;a&VP$u=*X_mLcQu`FT22mgo~M$aZwzSC0ea^DTvNlb9v#ajT%wVms3eC@
zl?PL67zuFMT^5A>&E@E4h>L;YZR-6^IwQebtK$cKlC4>+?pV=KQehnYkQaYP`wqk_
z9Izb=rPds&JI{%qKg^=ng{qf<xY(ftP^s+39JvEbZ7`$Oz7b8Ys9a$D;H{UyxgHm+
zvRml!sDvUmW$rx{byCOE8+#+dOLIzsgIFXe;OQ1XozPeA-wW4)&mCIAkk-V|t0&n{
zzA%vF^I#O_@LsS%U&lX_{a0lDt&|l>1Vy4Y%xCZfsTUy{qL8QQ0FuK*eu`frfF&A*
zsQ)A*URgQ_xJJPL+0X`CJZHD7IA%f6EbzTLb}wWQVBAZ1gqe$lfY*H0(jGqh-tk^N
z)a9vpGzH8=EZsv{eLTt_doac(ad=zh@1{R1Ue>$^+*N-JA)q|wYWtA!5u!ekaVsS@
zO3vN4DYTPOWTz^z(2t}OkuW787z6_4KuZ4YuF07bj558e<s~V99gMEa>hcmNlOaY+
z-kNfZEsTB44@YLh8|*DWgk{%|H%i$6h!g;+;5R~QtW)*$Dc}mn3ZiKIv@gW=Dl$f)
z19`4Wf6E27OdG#(coL|S8lE3{|Gebknyy@3<Q={0I`;}+EJz#e;Zy;C(e_UxgD=Tm
zNu-vNb31~B%})7QEknf@<+<8N_O9?~MiMIcr&=HXzCH+NAIkMBu$5mk!1&?8mp-PW
z&Q~3^@&RvKe|m+twA0XE7?yFCeoabhV=#B(Dl1~bx2}HOlRStO)Qz`Ko^ysH%Nh9q
zMVyJAsT)Y`A!9>>O4;I|wUIC=+%A2xwjk|cA^0X#103jbLsnV8Nl?2o(X~dVl~X6u
z)#R&@rkBs%A7DPOk(}*2(tMsGg(TgeB5Vk<W=Rs%ch6TU63iL+peLuABndTfkTefF
zrSZsys64F<OSRFmbecH$g&qiuA#Y-U|1_@#PjTF2#JG^|kKq@`(^LGG!MKI_>X_Z|
z-9CX`R#(IChsk<Pd|~T*f9?_2AwbHVLup;k+e3uA;IUVL^$B?m5-}DO;^n*GJj^Yw
zu|rm6z+lU1kppiRpikpb2iEwWvd9)|i6#l?w;bYEb|9OKul@UEjru9K!x%)%=Gt!a
zuT}=XA78t{BJa~0a*or=I1e%d4J!xUa+S%e3;%4_-jHpvxdW-%If?At8XBoJYvEl_
zgE-=A_*^V+809nv=POCQe^=MGEan5n$u%v&wC*!ana|u%`<~0*Gi1G}XQ}zKP`+|a
z*vbR#sopb4T3?Z+TYp-Q2wb)3Mayjq4t-f=pluqqFOoGucdM#*TKgAjfv4I{C6|-~
z{1~0+FPrMFK1Kp%c`dQbo)&9a+D;@#o-$cAE)LL?j=;KPw`zfY%y`Gg%iJpM;)eh1
z!cuQ$n2zp1J{l9A<vTCej+o4e3@3DXlkmFq9Y~_Ma<SWPMstI&vButPLvOCq)5Ypf
z=>SNUNI!8XG%sYoXPm2qH?G1C>P~%a$0Pa4M}QTSXh*0b%Lu)|Y>4Np{~dwLi6r|e
z5+I)DkAU8avlgtsQ`%VUD;oCgTqDu0PBG7vS$*QFnjwy;Y~gJ@R?aW9BOt9W+Bxgg
zH@&4SOj0#EU8xj}tncC3JnzS9(nF`J?jEWZ)aP@)#o#t)2j?Sr@gLvq!D7sEuZC_P
zj$Q?C&;JFLoMS63;2xJKqRzyw+0QgtPlf%`NHg#Psp~ZRmJR*FP~|)`bysL%F8JRA
z%9|N^v`DPIlervY2&2NRT*WbDu~pPbl^<O$lcNL{xCrvcX1A5>ZxO}3gOlG54j&lj
zxL<xzPeKh1kgafD-Z^zxp4O~sjKvouG9d2pXs^S}H5X33s`+8|JP3!%OHbe4znx<F
z{|6#L-M-d{oHR>N)$exp^O8xURNBiBWZNi!uKZ-AjKwB(>3lE5PI~3&hc>%6n_iol
zsJ@R^PovT3v70Mi((Uxl>(hy@E#qf-YMdEFB-YpMnv1MF3diJ`vVg`*y;lC7GfFp<
zQB<wy4dtdXjmO)*4dGczGE3fQ3}LCOR_l9z!!qRON+(FgIMwJxk5bskXmrHv_{Obc
zVC`Zims59Zh3Vbk{`qlrotcK|>;#laF42>8cRjMrEAVt~w}NGQN%CK(NlzF%ZDx^$
zO;f>9lOvIB)BgbHW|~o4O~y5Kn>_t*h#@&b5sss4WyW!o_^dLvm93t5$m&cwVWuUo
zdOx0dxj3cB<da(1#APidqVNsvjGLBGtg5Y)$pRF~IgN3XYiOydHjKoPOTX8D*D#||
zvl+J0T{N6hQ7R{Qcki7Xp6uOQ{FAi=-6^?W%QT~F+J-AQ*-(iHG9=Wwxb1j2+oZ;F
zZRlIk>5``P&4x7jP^m@xmO>3nK@BZhZ<h|<jW2U6RAT9#!rzO*#pvI7I(lIvL`~iP
zpVuV0D|!enPT4GDAXvA%^gL#h<DyDYsx%^j87HJP*S<1%v2TdV2>$@NDr3NV;Ib<4
z%lS|760^4$>n(&{E1=U2Ma*p28vg(s*DP7npMwC`l99X?7D36&2u(dPr`w&(-F32p
zcax(pPs@)SakKQysHg7mbXZw2D8{wej^*t5tk+mM@om!4Qq^=v(rzXyjjLK=5MI=K
z)q5FQ=o6}qWTLc_dF0>uWlKhqT)3j6v`1WTi+If{X11xb8reoswo1@<JmX0xVyP7h
zHg|WsKS0I4SSFnsF=ZE5N~o>8<tmD_$vIUZVhE9JcfT1WpK+w?7IdDP5gLyiJLRX5
zj%-NC{PrP6{AQ&T#`S1Ap;4Fzuy(~-z^BDiE*BW;BC2k7#p4z=Zi>CElFN!anNn(=
zhjysF8xoM;En=Q)pjP%8X4RUS)D@L42Jf_nG6*sqrCW!8rd)i7-0hWGm-v3YkFIMi
zWNipv2CY@TO*CmX(X~uUm!_)w87QZbi>qjQKujXtZdYvD)<w70j+D$@fw~@YisCp}
zG;T5~HSQjA=$3L&P+7kcZ)~|`vsXyb81CJ-TV|W=X*Yt3)k3|3xWj7(<h-JYw5xml
zoOJ00EGAe{Q7G->6;f8sE4yGHnygal`{T>V*M1Av)MDLM*7tt+q_$}#Bd5%lO#nNH
zV3TdKO0Y4x7Q}74@rz2z^q)YpW3>a$+uk&mj+!VYlp7pY{{Vc_UPWUiM@8Lm>{s&3
zIyRG8Wra<$hkde7v%FHHrPva^ge%TE(WI%JFQ-GQuX?TT1fGtuO)qJ>{aN&SGwMr}
z(+_t&K8L5ucKSR&>c3LHkLg=a-0+W(_)Pj4KLW$s0B(V@@OTm*IB0=uGRnZ>@U~`&
z+UqPWnlio3+{~Nobcs(nmfHeveX>l_K%3jf4>}@qJ4R?6x*~A8$`pe4+kCb<JM_AG
zO;X+Qn~I`3Czl@ViIl9{oDFC`u4KqHl?IJ_W^y>_j$6k>XSs#0$k*G>3EoJBu5!Z?
za?u#)e9O@^W%t45QUG7qFToP#TA~MhutvS_2FRD&C*%?i+~oX-q>b}8(HL(gg2__1
z!P+jvjq+j-8yE;HgFp`(WXmCjER402GTnw{<q;}{WGM#@_&VrAY|e<D_B>$rL$i?*
zy|SVuR{}GKL>Ln#=OHs#SvD9qivUj;UEz&`XBc4-ExWtr(F2hi29X9}floOB9!C;3
z1P>czMEhVswS@^d3TA$nm6n3vY=k%cb1cQ!C}1>2z4D_#r;Hv*mN9t@%_1e~!*z&#
zFmXW1dQg1kcqSVm=tr{P;E|$!PS#8H!+{6Nly`)y3Tr`=q<Wo5?J!Sl=%*#KNwc(4
zUzhpkNV^r2FzSEe=BR3k(<6xHT4i(%_cEqIfH1&JrVNR`Zwn)3W?MEE2EmcAB3IuE
zG?f!5#z&|ht_>%KOyNI-=}U`>=NWntdNO(E@SRBhT7Hgjm!TwPjw~e>r4-IBIP8hc
zftIo~kvSEcBPPQnY&1r|jgbP@L`vR5Y=qeoHbQH{$S=+tAbqk#7hSR=U`DFENd48$
zhV4#oZaZh%ZEHlUEtMF~A*4q4-(%0yIk@&1mR(VioTS{I{8{?>#Xb)|v97RWj8<~!
zq-oy2H?}F=NBQzjO}u|TN+!K9C8dc%@?{rIcoy^WbJ{j6Y?bZB`{N}YqbWDC?UbpQ
zMxDcK%e1WmS<$K^aFZ=1Z_R&(E>2HHil>kL5eZA>XwpS@)b5xxtusp6ZIvNs^Cl7p
zl+8uI*cU3KOd>&znv&AJ@J`(@lH~FK0K)ZXJrdmH@%~pB?(JmL)ph>l?!}D#D{ZDt
z+fpdYEv*Ph01Y+A@ns4~#U`zch_RwoM#%Q}#WhNeDLZMRdUrVNAe@`#qWb>BJnd-e
zGn{nO8jn1idw*P-PexrzOEg4p=8R=(?^}MRW#`)^`<iOKq(;`a4ZCvvzM0aRVySJH
zsy{DW6%i8ozD&dSDwYXlQ+qX$(+6;>Z;aB;TA-$~hg5abG2gMy7uqR8TWK1|#w4Y;
zbm05u-!3@7JmDZB#!KnaaNmFFnORrZ+Fv$eQ)6<iDmPo3=9NLx6^DX>8Ay%q{{XGC
zGLn`he~{yCJ)x0lIY&xzhbPx0{{W;CYfk?FpY}9mr>HVk&8)?&RSfA>2?&ced;Vgj
z!G+qYz$WZS$~v`rilUA<#VU71^P+UgjYHG8rmlV5SIMO+#9fSmxJFc0BU`y~th|MC
z*)WT&kQO}4zAbe7B~a+S$oisLwT939W?FPSxg$n5Vkx{%_4ds+lc8-%s$*R}QIZ!L
z1%Gz1cH(6xoBqU25s5!l3r+8ui}ul}T6iXLjnrigqrG;#BC+r*ijMQM9Vqnfaholb
z_RXf|xOe_c^5m$at8q|`2&(Lvju(cg=9RleHR{z@$+})EI7aHWVCqIE8>aGJGaPe5
z(!H5UGMI}d<8j)t<eH|1rLqUBIcmMmH>=HMX&9vol!{o>1rJTpx<6dQN-HF#ZvjSu
zv{e(DadJs;ile61FjR;!jmn+Fe%MX&W?P&dvc^nw=wKT^cO<GsckoHkpomI4r5&+P
zmUd=aEQ+5h5?<2V^xw8hFG*ReY1>Cg{31p=`jW0v?z6mo7WQeyw@0o&4-bdZM@nyR
za<i|e>8r-U@oLJ^2mQ#(?|my(n{G|C(YmEp8g#y>#Ym-|GcmT=o?HG|nM<a6;qCr8
zEYUQZ<P}7v+*G%Gd3k2@8d@eIv6P}cpZLacrEF5pbaaoZHYYUk&SFugOCtEENaxgx
z2U5yO=_uO$^LE>$X02P<QN7k4`xH5rn|OLQ-xW-l5_KhdpUvFwnVuiKnoGTZGG;_b
zGKkj$%70vCN;cV^Jd;Nby!w~vlTzM;^v*K%c~V5xVC4uJ%bt_En^tg5w^U_I6v$&J
znhjNU_I{bGr+!T~pj2Z<<r^H~FKw-uNnMrb)vH8e4#O_b809LvW0<E~7CxCov6Jb)
zerpJE#tF?Wzu4U(pvqk#t~Td<WyVh{M@Jv}dnP2kCrmck6Zge?wo0M4Sr#`d)fBm3
zzkBbMCrD$QXva~|bk9k9zAv^8xmj_`Z!Auxj*+Swr52BT(oMgEQOZ@K$vTUKc1k%9
zufAnQmKZd`_M9(smbUttwG*QDylB9YdU0)hycpJQv}HP%xaq`n$Qv!&XD1uYla5p9
zf?)`Z8EwzC;*-2&(stUs7>dB#+Q)0jQ<2%aPCiRr6A}d_S(@@<F;`>Lk~*tnVm;rc
zHRLz#vpQg;qCJhM$y~qkD(Z~|3N`_)xBBAim78A&O@e~MYO6;Cn^Z5!w1gwsx}eHB
zH5QV18xPHPN56j<zroIu(l1PqoLx7~;hUyy)1_ovAd9f>_Om)Tr0^-E?7o(*Rj0n|
zl`67HR!T;~wjHvDRavC$+p<fu$De%CR}pHKMq5RFR?)UK(=!y?NtBG1NZ$_Gnx{tU
zo=VrMnMm*3XEv4Li%Am&*R*%yvb6SUlvd7+cLfcNm(O!1($HQsETY+F&pepsSrn<{
z(p^^~s0+I@QFN?rX&2K?O}0JqtP9d%KA9_)rk%F$?Siz{1xt<#(HUu##jW_`+Z$<;
zsIjGPP+}dtWmU6LYVCv>$|OM3+ip`f>auAyqQ^;MbFt;g>{eM_NXw~2eK+da=LGV}
zV<iySyN@2+Wp$CE*)CTb^Tuhv25Q-AZ6J2^)7x%xqe~kwq<1&v%|$k8VOdCvc6n=q
z3ap!2rY&}nUtamANfKoNvr2J|HjN~#ZY6`h#>el1O%8k&N*Z-;H2HC(QPEm&((~gd
zoH0ozDG@k#=QW{9mRNxoSY7*Nyc<?rONtBb_sw}{M69@vR>|z&6+?&Hlypmq!D6!1
zmm92G#WSK;v!It1jg9TjH&|OcB;Jn~;vHRGNc3Wf(mC??&!y<WLO+x7KkL6${{S)d
zJR<QNe*K<3&DT0S9~I!0tA9)ZWEXARE3-i0M7-W*g@Z4A1e-io8XOX;?UTp@FD_-j
zB9LALLf?EYfwN(CqRBFFY*I7LU`6zLcIuJG8PY4V$=}}{=s{}&BKxP)GQ`*IkOJ1>
zf+BS{zE)HlW?K|y{NUY?JZ|E!fR$V<iSLEjn#`M`20_~Jw15`|M!<R{!oU~CKO`J|
z@FRBPGohm`cgeoP3^jQDGcU+cTiwKH7ViegEWnt3@*^e;g3}EVEA5x$$Sav{#)Au?
z1@_2+`d~)Cd<d6)t12MvfdNmPERDVvK<~B$PTUufY~O62NHYwK{+SUgZd)LHVbKFw
zT@f`|myrfrZe>NXc5owoa<T_(EQogRmIxQHVA&;P-Q{CQ*fs`mB4x%!WUGLr6}AFS
z`<XUIQn37xU_`#zkOlX}u?zZUNEaY%9xx;r2tG`#QBe4fq&8_i^Ga<Rx;sPSAur77
zee<IA6BN$7IlMAtwR~koMAUp4P=gq+1{8{cgm}_m;);VrJYdo?(qZ!A_H#T@aR;Xk
z%ZCIULFvWr<lt0Hx>3GdI3XwH9$Yxt8|0t)IB+0(7sS789CB4;zYi4kH}=8$Y3#V1
zr|>-~@#*?!8GSTaI2<MTWciO&ex`DNO4gV-csWbpda>kw#&VaTBxi}u=t$-Lvmd6+
z(Y{On0G3~-G#@Gd0Lu^4vZFmO{{Raw(<%>_{{Z2B*nXK%e6Rli3--tKijDHW{4d)D
z>HL)&<$w5JwqK?u`A3%*>HLuVqsxcsra<)L<;(QZ2g;8-<h?XT`BCwg>7pGF@sAkM
z8$^HcFye^XBmV%2l_EcCPB4W7sykm9^edIO*>T!T2uT=frT4Qdr(?&8zKtl#TBQ)E
zo7-07CaE?GRIAB`#gz3|8}emZbZKV>3B+CA;mh{N+byFJi&<f)kg>ATmHv5Cx-{b2
z%P{Fwc8RBFYX0~rwz4g_ybMM`H;ZG-S*uk^0+$!R;{n}0FJksjd>TU4Wg_~0K)Cv9
zBDZh;@@c%LVtV;Mz5WsZ0K(7ut@yY903ALj{{WLe3HXQLpZ+2EZl6=)Km1=luT%c=
zUk}sjXrEN{=%F7<-5KYX)y6)j>2_(gDJdVoqUq_k3V-qqzPFy5r(ojKMxLImb+Xa8
z5j=Lv>HHFS(8d#LaDvwK_h0FmZr>COH!7ioHBVfMMJU@C;ZePrvg6Y=3q?K7_`iR)
zTl^AyOh>61e6<+q)bq33H+J~5n{aO6j4{-#qrEJVTk)Bb(hGmOG$3kXM{d`){(g>W
zNwkfnCzBO-)Q9D0iP_&ZE3J+e6E{*K$6$Hp&U@gVAA?RxKWYk$7gVnKr;|6!88GUy
zv{mbLREr(1`OM)vMx`oN%Baf1w65p7{us%))uWvx@Db{32}{-dG@O_F;ls(OMcPFX
zV=`80dB59yd0xjY(s(etPCFa5SA(rFcCuuE6E>G??TT;hMU^UgFQ$?bQPm=>)k}lb
zAamsGlc;YiN^CF3V=Hu+#V74K!*P^gM3HvxbDQPu<nm79?VYVXF^hIGh){o<XExT@
zTda}m)2e*PucP$MQl#j)KAAOfC8~qBH=0W{(K>Q;R=GfoVMLU$Ha_3iGbHN6?sl{y
z#RsNnT`C-AovLYsV&u1xgkYLhS4k@FW?OuflYCx?>CQb7i_?zItB-#9(OlWmS?sCC
zYD@nBCxzRN*^W(RnN3DI4xBCl7{^tmo&B*=^Pp8$&>+p%A5h(&y=4@g8rQgGFn~u;
zu9io+l3Au<HFnW4YB7nbqZ)nkm8*RlZrwi0BI-)lV@I3kGT#-FNy%FSq|P-4&Lw}y
z&8Z~wlUTi$es&r!eXDPKzv-FxC5;@_*CJBZwAmd&N&0)`)&~hJouSa`N~t2mR^QE=
zP?Y0nqMB!ZpTd!s`;yheSNmsW=qTG`P?bhHM_I$C(h3JxW4Zlvs=~pidX9=1o3^*X
z+v}Fz2J)H?q=G>sW#ln}<hd{R2OLr{D9RF&N>uQdx4D^1i8_vm6jnND+m6_{(lzaL
z+7?|<zg*fWz8k|0X9l37p8{%x!bK6@?}fp|OEE2~X8Do)!26E}zG+%x6-0^D>FJO=
z@b)pqrm!tr!EudG)iz(xe)*LvZO~=lokUqO^v3D_m^P9LX9(p!r%fo1sGRS&InDa1
zI#~xb8?789%d6D?0D4iHY*TUPwsLZmvb1K&jCC_(8Oo))J9~X|A12y$yc95&(x$mT
zr|XsR%~&Yhc_5zAWz|^&zUnpYlWE~>tyVH#oe^d_nQkMo_V1Zg(U`v^Ezey_M~dy6
zR-GLZ<LzZNDMm)ENN;=RG?T+c3Z5CIDP1xXx?-cs@1sd>`X(}CFPEXAe~WzSU(_m_
z<y$Ht(`6c$744K$e&AO7Gf|F)7jFc3`(mAEvq~=TUd+{tRmqu@>Zl{dTU~@^>-9z7
zX?QWG7^cOhvWo_<PMt2?@$Zi=Owz#0I<(LS@cna3!ZWC_)YD~-<?)61G_jP5(vgjV
zQr4=hV<eT|!EsjpOc7xy$`6Z5aldt@ZM)0KOl7H{;)?Ela&lT@8q*k!Nn#&f*(AIg
zs;bz^p)Z#yEpFdu6-~20bg!Tw#yTY=7wwu(-c}7MFS(S&5-CBwoIWy5yQ5T|Y`Btn
zvwI0UWZIy^STH8+it_&1IbKcb<d#hAyA~x`b(%`)0>)WGEn1e3CSPHvQjSJD++`Mx
z{9>9*k(|pYsYV>>UA*&>eX&ujQTl)Saqu7MPsINK9{&Kg{{U5WdT{t(ktPRHZ%z{G
z4|ld_)N)GV@-mexkMSQb{YB9HGvWUL8Tjsp;Jsa60n_Wq=t}P4?dR)^Zk6m~maQK>
ze6N)bb9d7;u1f}5mYS&cywkt9(#kbSsmenyY?f}ulWgdLavnEVcJTNGl}yB0Q>Hg`
z_|A>*!K~H`hc(Ze!Qne3xM)b)OR#QnmevdI$eMQnPD2*&2Ke?dj<oe47hZEu3dzYw
z1IiI9+8JLS_%_<h6thB3Q6f-QDGP^&F<2tRFSF+yBx$OnM%0y2-`L4~mX=PcReHX>
znN^lsreHXb+?|&+{jUb?tRk*4Zc|@(wl!-6qA?_hJ6_k%*Cy*`xYHA1Bg^FH`(@$u
zHK7$)tG4M`s<KgOU|5diitS!8jV5U}wo#@h<QqF1=4y>sO@K0Yb`Cr_t4||^gQ!c{
z@9m4Oh2>ioCDRDoTjH(uX?&LwMMnACZ$5HyM&ebg7{%S+B%0aPF3yBD(<zp1{{R_&
z$3={y?THw>Q&rzDmUN1(moBOB#Jb%;pUdEU_Wkp()5T6lpX+|9eIKDOSH{0R9~qM+
zlG^URT>5sNkLQw;ZMU*^e%Kk=Ht|_I6oK}^kSoSz;b4hyt12hH5_BjX+Ay`EV4?4Y
zkqcWGGe%qC!7kV~OU`GzV8PkUzk)#I=+QpeB*RClB}6!`bE2e#%kJfA<cVJfTN7`C
zb$|u>=4!~5?UEvJBW+w^qBkCJB4p%2y;+yQhWDBIC6Io(S75^T%=`#mPjrCGgUFF5
zZJZDX9A>aay3Afk*7?biow>p2fxp)R3%08(AZc<d3o6_Q+?-{Cegk^h%%#{^E5<+z
zwi_LjyM_vlcs_)i4%j>qtaib%qF<&(K<(QNfr{~kux9$;At$y(iBm0+JKf<#YrGpE
zTEKyCED&L=kbCAtFxUdolR%j-oUG7z!LkL~{@Is-1>Ud=CFRD*l<$C`$W4(OGToRa
zPdN>CMn;YAfdlP^$cOphMr_|ACa(rT?&e4r+YqqZ20*Fb0Sp@;Uu+U0a%5!w`H~p*
zXAjFH#YwFSZ5<==U8f(@I>*)WX-mo0Gla{EXH_)xX*(QSBW4>Qx^l44E9W8wtcO6i
z+W@e`WoEz)fh>pL4$T7IO^{)BMB9*At;h+KBR|a%%(R}s=P!lmzr$Zl<ozfzqm=#^
z{{R^ohv@iWM=5+aRNrUl<{zabRL*QBa~Jf?G}h1)m}G2-*%6kqB3AMs$c>R3Gh}QC
z*%7fL_AxqrPMVQg+}l2$RH>C(w#A%eqo-nk9z5oF@l^I{q}q*)M^2MdW%1uP$t;+S
zDN`!r(}<xjRWS<o&UY6oMBwG3I;?i5zkeU&nvFa&N-}P!(3Jv2qiwe5`QarL#!59v
zpdC?U3Ai!aXTDV>rL!qk(LF;;{@>>1KD<^*IXjk&w(9=?u|$p7#?!gB)sl@PRd``5
zc3Vm$$j+40bXmLbe&^wT>xcbT_#gUf@gMYW;lI|Oi1hye(SH-xdi_tru0JkQ)o9AK
z5oBJG()iCbVHYk*WR_K*iDdN<j@S9CYx-wu+hNOs4x<KCnuzVqKUcOfTvdPYzU-zt
zXuhiK0$pIKVClcXB$3k@nns#8!&}*w_KNC3>S~l?Gkv>rog(ENwr7<(Oq~;CGwN|~
z-0_o&YulDJ?nOo#=4qLGBUwq|0(PIlEK^TXHtFPeJ7%QQZRp=8O3~A$KBT1C2!qMr
zFw_3#Uly$s4apD<ngrX7(oHi?T%WmQC({~;dXQ?iJM){CBW4nBF9kr0rc#?|(^Y$K
zl;YDyo6Z@b2F+omZJ&SB1c4`c326o~SxQP9e*XYmq@5aR$tK3itButsAHFe7%Nj4!
z@&5pFi0OoMTHVLL>xxR<=#q+mBP~Ov25#p2eK6v!1D7Xj_cf7qbfl6gu)W;MN;qQb
zO=P2{H3>0^dt1SR!p2NWxh4^aYU<jTLFdM4tHL$O&lG9(1qXe5FK_dVqkS68vY8|y
zAtS$(-q(y}ugFGGZv*I9{{Ri0vb%ULFTDcw9rYExi`yrJjVw13r4w0OF2M7LEE#-m
z<Zhg8)9N&GnKz4M^^J?vieI`Z+PmgdQdu4g)K}UjCq~qarrzGJJ@cbijRiW(9Wmd~
zntzJ#GLn6gPiz#9mf{p8-Mi+hH)Ry_6!KS2Olwb1^H{zxmR24DP-_-M$!mhtM)&Xg
zbDE3bnik(5a>&$KGQFbL)&BtA31ceE$+;w#^1t$Qk4LRJO8Q$yw5iT&K9rjluA4g_
z3Db<DDJ+ES`+3fh^eWVusHE`O)*;d=5H{}f^v<i(O9<1CNbmCJ9-6TWrtCJ&<DgQv
zOql8Uy0tWx*PrsUFYUoesNa)Kb%R$Uv2<kxYRS$uVX8Kgsm1JaW~Co(qLiGSB12<U
zF=M}dk9@}!ZN%0S&cyVmQd*SX!{awkC>6<jk)lO}W?QF}!FyQKj__Ag(HV7TD_Z?C
zDjc13A9|!lxZa+gkniTQO7iwCwy2uvpa2(p9lYK=cs6TP{F;uONWDYU6)zYz<oN@o
zJ6#dg>U^@bGI*krKHk}x&OH}3Nj;pUKCJaaQf+i+2QEolCf3)3&!?vjurWy2XK#GO
zll{T2Ym+*mDH@ohciSb~=+d<9nh13z86b@vjMH`QMCBYb$8cxKbno}uGuq-RPWb-g
zDNW(yes;{XrH;Zh^AR@&g{{i}05`!#G*L?O$<@+oXHuE+Rc-Xn)cE_fElJq!M3-yY
zjN^;12A8!oi9K(mNi8OHs&o-;>||<_A&SSx6gg=m^lO87iB-~B2CL-Xo5|a0BI^1v
zv{jR^^V<%%(0$JZ!Vxr)1gjAJvWr#nX*bH5JV~4RrP{N4Hs3=lm|uFl7m$nM`1vsn
z0TXIzw}ZxRRa}jHvY`lBKBJ^Wp}{u3(=vQrY_bG&&X5s@yNqg1n;L)I7pwC0ud3DC
zE<Xm;Y3~MOIR>UEdT4P~6~t2I`<HXtR56at-|LHZsTr!nWYtK4xNyDwvP%|eCeDnL
zv%Q?;l`I?LzTFe5Qi>=tD>jx#Zkb&^n=|F^rFYEQi@uCMZdi}YZYdOamgd}M@ka91
zF(gU7BfiVt{AF&amDy@Wez<yTXAU?Xsbw1D^EF#iTiatD#>XW}E$Y3tMKg7)6|%I;
zsnjCIo>9zS9HUOsx1%xXj-!C_zHU08rrJiQsIgiO_li}~rH7Lhrll=cusa#1t=dDC
zlyrzNlt!X*S2<~o;?goGkUu~9zDcdJN!_xx>Bnl7n8!LSWZfjaF}<DCvwWTQTSbL!
zq-p&RIaRZEyAaYcDpKUyuN1Eazu2Ye=_D_wemKEinVP+n?gsj8r!<$7N;W61>AU9g
zasJ}4m1xL~+2Fo&X;uwiY@v)0s_gII1oB%}VJ*`ZzO-cXvrep{*JSkXL6ttsNP0w}
zykl!}%~z9BS;^1>QEr&?P83&=2CP~HS2n2{R~1ExQE&4`hcwl@2c<zD&AelNa(>(!
z+O%BMtx`Niy|P&-)zT<q)ELDr4eu!0c{H%ql*J%v6gcsUc4{v2Xfm*2zTxfn#Zt-9
zEM1);8E(X?q4SpZDOSrP7^LZbj`?Kk6@tjbKQ%u5Wc|$Qtx?uemoe;bvyzQ|PLWTu
z$@q_1(;tQdopL?<e%-U*^zh>o^1ty9)j9f$>DHs4-0*N4^PSp1ILVgVUKQCSOKZj!
z#23yT5MPVIy9SApJQzO&f$`@hOi6Zp=2#|5>~mePS9T!Y42gCyY=KVsnp+f2zS(HD
zYrb<kW%x8JQGRkx-4i`eJLgn{nK#dD9t4)m7m)1NyS`R9C2MTUb_Dm|8_5~n+02aw
z3(6L(C3!Gd4crk0)>>lSnOegv&WMwIVN7U0TrP>Dc;5yFUp(ZHypX%#$za#t&K3s9
zjNS~1Uctsjj5UFfx3&ZdbCs<TEx$}OM!d*aP?)>{p6io#i?UQ(%+fRqnSTUl-xkRO
z`{rIqmEQ`nGqvOJRu`FVAPfnD<Vh25G(a2igJfXl^DImO!<Dcn@0rser_Nl6H~Qvu
zM*8;4Buj-B$+hPd<f@H(R%MVp<S3jhb_-l{fFgIm(VU2v+Y1XNvwXh-YihGH1gI{y
z$ss2)$%oSv4VtUzfNY6mX%mI?MttPR7cxPE_##^Ek|4p5UGb3xCJm5gNX>W1iOA3{
z8wN?X`DhNX_>QD@u|4xk)RGk@j<NWtOY?1=7pj@HBwnIh#j`qNp$1ivH$j1uVX`&_
zL4gAJ%IMHxWNZl7HblE&W<vrtL<@NlFBu9%;N1zbvIHJO2TX~2JvW${&B81=qnps_
zy_4^U=|zVWOYpJnCo}phqT-2u740+chv?T~#E0SLcO1{@K8uSp@cl!v`sQDyBI3#U
zJx~09e9!62z*7&G)Vk2&#$F1TX_5RcRKORpnIrakrW&OkEz=$7+diW9zA1xBTm6j&
zI+>Bz6f0LZ;`zc_q*WyFYyJ|&;A?NEwh3`|otbgGqUF<xIVlp=NSx&_b`43U_kfT~
zW2P&|rnbpN$9)Qwt3>I3REtrn7m9yuR8&>OTT*@*oeX6OT{PE@cC6D<PMX(dTyutJ
zsK;VDnhzPxr6+F#7`S4?Ln&>zC;q>dYm!RYEmLB$`I<VhNWIBk{{Z^QJ~6up!;^Fo
z(wphNBO&+gnNK$L;8ICi{{SNyX}HQ;jqdw)%a6PJ6r|;pjUsXZ0j*kBdh!1N8Mw!e
zc2A4Q+5p6js?ofzxb2E@YXi@uXxv!`Pei-N-walY`Q^MKr!7pXQfk^io@q4J7*%2H
zC3h5)tG|M~=3=O2P0|TQdculb({q0q?bxN^P>z{0CZbTja+S&8BYM1)K{S$<mLpZQ
z=A|6k$}4e|hN!-viF-8Wr4@$BTPhW=(!`BfA=}){xl+zaJ6RYi8g#SWtFN|g`%^b2
z?ITH_N>Y@F>AkM<PZa+Exz07TiDEq{nr%DZ^vtHy(Wd!Mkw8YynOa+GzrJei*(<!G
zNvQ_GUtgw4ExyTdmP?d(X^cmiKd0-Ni%SJm?SEneOv{)u-s9Wpnpwlqo_yk!SyMEr
zA$GfX7y06&+BI$czqul6E;^7~9_~+)z{)XM{(cN}$n@4T5kd2AXOok<Ch(gt(sbkM
z_EUfTV_co~4ddIeW7BrMx4$^1*WH8;)A=e<`E?k|#OJ;W<(djpAgw;3bpHV4@_lnp
z58zr)G%?ggPNMB-?zlCig0Bn$W~q}b<NJSn8`nWdlsP$=JwR~u~`X<?a$r$A{|
ztWv$QZuOHhY1(Yig}?2Rv-0`+{@Jf{&^l2&biNm<xhS#PmQL;~(UZ*$X4%;INOai6
z>ZlcQy3U`W2|JSta(r3b=|iU(5dhz}KIfd;TBLRR6)lysOWdNr&A83+ZQ2JWkur>=
zmYz#0+`il`l4;^7)?NI%Bw9OSnxN#}QLZUN7?rT!?dJz1rB#tj{-$YZVvKl0^48c!
z*V^_HN~viQ={j0P+k0yBPwuuUIGCKH7UQal7sgbhDtZ)`+-)`i8?PB?n@QWruc_0O
zN=kFK=ltT*WmZ&5DjK)l?Rn0_VIq3d(`0u8fzFCllQf-GpnWn(Mk(%i{`t=eITs67
z&QDjT1z`cQ#_vt?=4a}q-I;Z+jy+zim59*qq$<7hgOipEla_RIj4#%a7t8)}mKU|G
z1+^=XEUx0J8{YSorKD*)S0kkKksT3}-)-;vW+Lt6617C!2sbD(A1PU>C4VfftBZRy
zw9YpWxc!v+;gy4Je3iDu)oR&2@tRK$0l8T^`m=P#41e;b{{X`>@t(?(T$4IQlWDQd
z5B0^xB-I*tR>)<wqvL*mTw?9yl2TRKdYN2LOs|^TY{Y%ISv1xy_8Q$RV-aJ$4*9G%
zqZ=5c<EBuTs2WQ9eeuL{b+r|L%@W6`zwSeR-HO7}%Gs9PU|~{;792II_PlLJWYsd_
zlLnA?r@M#Qf^u)#J40)}{u}<er|wqGW;QZgjC9uAyqQJbTO_L?CVfyQF|Nn+b9AEW
z%_o+Mbe@tjBhISKwQnYzdrVzI#5kejZ+zC)M~tJg$$5UZcfuHsL8q&rBHyAla=pxL
z=O&Vp$TIahT|3jaKYY^Og=DxBE;2Q2w|rvbbwiYGsMT%oJ)B<2rt##YW)YUvX|<1x
zuCOu3b|n*ywYJZ{jFVALAnu=oBuPwenx>HV%_oG7EY=EcGf7CNwQwIyoLyNc+M=&l
z7rC<Fml0mtqDV_g>K!ecdB+Ipl>SAF=^gD}cg?5p7>P_L>9sFm8vSxkg<}Y9y4+ot
z`Z+6-$`+E!I_VLcR}lNSBPvycYl{(}-)FulQ#7sBfJk3ecCQ~y))2h0y$HLhRbBiz
z)#z(Bij<urqPSbiDJI>OE{hZ>6JqNKtJ=t>nlcfPZ^d!WD?2o@tgEKUbI%58-jJ(E
zqfVn&QuwSB#tB~NeMFea9$GV1Xp&!YMv4-t7wO*3{{T!<cWBd1t7b^fS5KPrig+~}
zNa<IwZT&G#X_eZxI!CEeYWDiLX0R(+LfXrU+LIOjO<;8~5;}Kk$F^v1IV_BSib=QW
z^m4aWktgJ?LzSz1*@9P=X<-fAVjh(9-!!+AN%S(f?DXw-*yk<w5y-|Ql_A@MAIYvx
zuvr>5l{Z}R^~t+5wry6F+kBX$?uN00q*h8i$BN3l8jNjN-ks-fzG*8vMLiT#FH^;A
z;hH+7D!Mv_o~V|W$2g;`ZI=r5$xtG)IH1i!9Ni$7sJ$;vUZk}Y+41`QX?41i>ds3k
zH@`Xb@fAn&pIg?Kuj)(Ho69-dgR_V=<;c<}d*?HObQk(zMoE+7CFP2`1?^PkSu%fJ
z^F3Gs_Q~Lg<lp(^-$>F0+iWU<<R1%|rbra<)>qRSC+Oxz!Eks6BUG(rtfJaII<;?k
z)v=)8vxW9R`MjQ413`J4B0Jq?h}W6hvIEX#zXD|sz8``oZ#g$WPUhwhMX`L~^du^k
zm6A8t7o3O=WzYiVSOWRV=x85ownjbkGzRDGlOk8Km*j(vF#HLwyYqoCZ8*t*U+sd`
zk-f~$!QhqpV|@X;7;I50w}6HA%2~l^Fl;t!%=93*A|-L?O}~82fF512ZICWBMDvyh
zMz67%ZP*#iw`3QA6D94Ah%mA?7Dik)M5&Py*EtYY*^&qAheNREASHYl3JeC2UGdIB
z557Rd>xRIWoXDVGY`#R7Cdk&Gd>Jr)nKopWUt9`<p4nNVR<QXaUKT{V@rDSM@s$u@
z$QH08KTdEU+n#VBUl<W9c#1%^fVNHkxtk4|@&j0R!pezT!o{N!lz3AWCbS1b;$(he
z^J~^?V`o_P`f~m%`sYRJMU9ptxaO|K8vzBpg$87ekpk8PW?tAZvTz0qSr9LmA`M1G
zK)0E*Nd`o02o|y+TgZbTvogxYiB-bL*b%UzHZ(-aeli*%SDXfeAwOBvVJdH9u=&pF
z6#oG7d9xUs(HBi6?H9gcDZ6OXZ7Y2b%DF|>7wO*_I&{p&N~tiEWv!N|iMlUu8Kp@i
z^kv00#;sr?1a!Si1YcJtjMgNojc1i6PN~nU@*m6Z+urjUgRx7NTNV>1V-n4;x9f~5
ztFe}`m)O+N`IEUkQFnat#$0(iK-{A%U(W?7N{DkSr?ym`S!LHEVbs+kpGj2J+x5XE
zO+1s2E1?J~q*|5H_r<3NY|T1U%#6BYblbbic=yIJT3`A$;`GVVlG<&?hR=M|nrn+e
z6rZ`W{Ut$E)v;UVIIOe|lcDs+IyLP0<)@74yvQP&!_l<P6zY^@o}<QLD|RifBxndS
z(|&gU053Om>Hh$MRJZ#V)9SGqlC%rooE$F^G~AU&iK!|E#htnSxz#wll{8pfbwt>R
zJ8pitmhGg9GOdh)2c*&_)y@tv<)bYrIv<zQRs>l802O%gi?;3LlD3)+Hx=pacT3y+
z-zDvI$yMa3th!+<fAY6y`(_hux-_EpmI6IS%Y_?(!*BD=B-J*<;h>hJ)k7WP_WONy
z&Wi4el4_QGjLWSxwwI6c^DY$FW-IbhGBB6SY2~MP$J>0*6&I6Ib%&E#rMdc~dQ-Me
zDlI0k3Dr|1lYcCz#YDf?HnEqL(Y034$O)hxuL9feno(;ElY0p@D8p8WrwMz%%QDsB
z6*VjvjCQk7?4REWTXbd}(Q}I$>x6<yMQ<OjX)c&5s)?><n+m=q&3MXETyQD0_?xO(
z>Lamc{Czy(4mYxLYSI{zB#|4ES~g=PdK8mc$~zwqHlLNJOMQ*J{{Sr1eLH+vgrqU}
zZlkAPTV_>3#%+2E4%yN^oKmEEABX7buAfdat;Lm}T<H%^F;>x@D~@|AT`-EuBHt$4
zVyQY>7Me!tPAx<#_IuyozHKVainsVP(+Gn-)D_!q<_|p^$KYnONiO^wZ?-C;_?Uvt
zke3?ME0e!q+a&5npSG_8>Wf<SN#EP)gHLn`CY!7k#3d$DRML4eNnHb`(utihBGiC3
zK;JG7PbC)C1l2Ow#v|IU@Z%A}baU!|yE?HGN(XDkDWz!&!nB4^r)io->iZcvSGBMH
zjq$F>r>Y{7q4CJz(tQJ!Q|#J$+Kn@|ma3eTBGX1v;_|R*sbu<qKa@YNUfkKJ#VdL|
z()gAGK8TI%`Ob3nR+nI<wzP3)F1pXD#}e&-OvKxM)D6kv!DYqPox7L)^OqF2*d=vg
zqPmwQl5?w*8B^;9@7j;h{xkmoPk+|G>94^42mMR<@A^COpTmC%{8(Rs`2PTk_&N0c
zAL0-H00L=J&TG_*ml(?0@MT4;+3-rKKp2&|+|5Q&f85h@OgEJgja@4K_|$KTC}-}$
zkRAJWj|F6-nr5AUV4Rs#K#RM!3Nc@~mfd8;dhbn|UaD>><BV?PtXqcK!Cg4p9p1sW
z9@#kFjbUMoh{gwLQhL1K8Jnvpt8|UhCooOjUU$6Gd2YYRw3A4mmHy;+d)RTAPSLi~
zR>kSVK`2=%Uw5}}Z0S{fvp31M08TAOeKZ5P-!P=pv`y6_-9!>&$257#MJ$4({tQO7
z!$~0DZq4TDRhx}%8ap*As(zsO$<&%PRQ^tolp`-oRTEX=EGw)7Z$U8Xv8gG1Q?_Yh
zDzI%vP_{Y}B#FBprZve1S~D$hsVip>H}8~N6&4jHF^r-ixMRgF8%ZZ`1;w$7Q`o-V
zFjT%wwA72AQZSRI<JzOY-+bIv<Z4Y|BPguUDdE-UB^Bh>7Dr7)o~^Xj`=<F}yaZk0
zm8eFyiAMe3>4z7w;_XpTBG$6#t>4SPeBNENSZ%Q*f0||AC%25Hj<Q;Os4hfU$(EP1
z`rcSt+60?e(M8qX$kyBDy~*V0uq5RnkwE_d4P!}QxcM_cNapw34^QKv?_&#;c53!O
z&815wmLyz-S7s+|RqDZ|uvOl~wq~Px6z!5pqbc5oEMe*|N>=VXY3$n0C?Qmq%Fai3
zF7eRe^f3J7Bh&oFx0-6UZL7;JbjNstTHky!7;c$+X*J7(b;Vi*t3erUs<z{e>oxFo
zYB;1YoMhV`J3X<Is-zW)D2z)c?Z8*_%DiA>Ch`|n(mjg)Su~lXtsO1e%4B~Q?lFq1
z>@6okQ7_1Qzw?t#tPQo2n5A4RZhO`#VysfwM3{!Ob~RRMuPAD;i<wexDQ(|n^Mg-j
zqTZ}^4Vr~5GgW1$lD1on;}YpBoRV5%lDcI}kSw!rY?8H;Nm8<op7(q=&e^PXE%8)_
zJ?g1zjj(?uiosNyvKbbA48H`kk(6|_Oly<+n5^L3TG;9r@}0TgG?A;pOIAwW{C`YR
zRfAE=!ExkCNwdc}CE(i0Zr2pqJLGhNZzy_mmoY+}?Cp(S8QDb_6<XMKpvmCssT}_R
z5bFHCo{C9z9$qWI@1DP=f{JrK2m1d2)PA3%A4V@Scl#seJl=YHKOoUBH-j#LaAvd%
z;{dW;$9XTnci$@LY?Z*7Ego|CB3Cnok)U?L;0eDwUJQY76de7qcVfx!g;ADoJK<@H
z1@VXARt~)$pww4<++Rh!on5hZM6WfIB2>R)E$o8#qih5??U`<XJ7ba#cg(B^Uic?$
zv}b*s8=|rolO>{}Z+tW;+xfw}EtqTHGEA3jwgJx>eo30#H=#D;zD7Y)3nlU%`3O7l
zg=7iezD$g-Y%umg{y1AB4V8EUGWWd6SS|Og(nN`q?}OQ(zpnUX4S@pTtPm~WB$!wV
z)&xr9p+wU2k!aj)nXoeLyLdK0=4^>vL@;D)ILYJzyW29z7K_2zY?aA*Aj?E-2$aE*
zE87YX$WU2<0+}z8(XY+`Oq+J&0Z2ai2wp@>nY@r;cF37MjEFYKfwP<mf??QQlYOwE
z1^Q*8A(%F%WyuO5>q64=JkS#L%B?ORs)!!HVn4oR`kn}v;w9KKKd8u$pFeNY4s3(w
z&(0j#Ho@vo{{Sq{Ie8#@y-NJwwhnS5eCgNb!<+I%eD0@r_Q^^>-2>I1clOE2mDoLL
zmAQvDN!h>dC*H9AMke|4@L~Fli;u<199j3xFIlY>mmkDa^TPg_(mzuBMwF=Q502=|
zm5<cUtJjQ$NgYnVPFy*@N$HKEzGPnK>y3?rEAC*(*MU(iykt~L<Wvq8M!=1M8zKf?
zGNQ0wIFUNAAXUhXnImLI$bo3Qgo#OAvJ{EPO_3W0$o*M8AsNQDYt_Z)q5lBY(dThZ
z;G?L>T^HO}cL(fwDMz4^PFk7PpF3tPy=)uc>atv_m?@{Y_`mO(%XVQl+N~VPL3Se&
zHK)!$Yr(l?e2EEZ`Ks*mSlhPz7<^^$3`R*Qg3)37VVY7#)Y4Ci{{Z1yq(bhCOE#+a
z!PAmT&GG4E9;6VbaoShU_sU+Jx-rI;Jp`c#Po{^0+8m+EE0Q%7^k`a<P}C`VA6#VD
zW>ZV{DT&&&#CN}b-+ZGq@J%@+P5jGUAr2bgeCCp;eY0xSBwtaFPNhBDEqVK3p>Cc5
zB|Iie(-T)tp#EuU{`soIQYw2|ER=mM0>_owynAH$x5XLasKjNbS9@LFAMKl{NsCEn
zfHg){8AgeEk#j!u@-d8Nsp5{3j7do8iBu0Q{{ZVU{mF9vP;!!&Bb^fe0PaJ8{{TL@
z(QDcqo#dhnVHpZ)+;5X8%a^rm*Cx?j8A1``O76>k`()kkVBnn~B9LbZ1xubrzo~<2
z@E+zfDAlQ_V{Pl(C?{^s$vGw9vNh<Cuoa`6({}P!>fQmHDEcXR;<FFg_b`<;YxvAm
zO>V*6JLZ~bqseXLicKt@8{>~0XIQ+nkS<AJ=<A%BwJHECrLHp)sM12=io6;_e{LQR
zre$fejk}81^Y=GMkeyDP>Sv~br{6Z#4K%j&F^^7H7^N1SgB2Msax(p^+1YXUexnah
zNf{toZ}0Dy8BR%>c-hzf*Ql|yjMKJn^dB6Pj}*)Q0JMn`Pf`5B)4q277_#8@%a6nK
zqbfm@>igzn2&aTjDmjWjpnNC&3jYAA{{RvH08P4|^@pSWh5SNa=<b}}oWB$U<%vBS
zs`W7^>D5(vhwJ)EU!}|4;FVqv!s?54UTR!y{{SrN{{SNX<z|evu9<Uhxcg?VunqDi
zMMxqOqL+Ssvg;(g0js4DoKfojxiyOwJIWKQD5I#5H7&1iIJmCM5_v->P4IEv>H6kl
zzaN7wG>o#B`+A3FfUWXTcV?2cc4<a4T_sQ}elg0!D7OCq$vR`fD?zb#`(-KgVY|W;
z)wHZPZ?;Zdqoh;i$eX2zueH_r-^MkZi*Z#JXR9_^lhYQrD}2u!P)0Is;8~C*CPOP*
zH+JKE-cq4n+{CC~%e}YY?QUhR%_iDy9J-xI^jQ#<YUZB*0MjmMZR8a<X&hgT>P`nv
zFaoTTwsHMNouf6HI7_WQq|I`%?_K+5B>7t~?dFCT>W@>?q@Xva>6qfHm70>IlOnZ8
zWN)z7(-|%+Ae*{nvzQetBl#2gWyU_ERoS?$rc<Y^bs0#C_YQv8HsOf{(@n3F{U!eZ
zTR-{B{=WYJt)KKy@NfDn_}BVJ@K3|O2k_tfNp=1^;ic4<Q{tb8j-H;FKAxG)h5bzO
zeLvB9exIuSSi8G3SEKycLNvuuc1QYWt+eSO``xFbF|LuNT^nrw06bfZc8-|nqtL=L
zmYTY6&QDa#xTLTNV<<YRRMWZj!jm`MEfUC@sdYq`oRX~zf=Zss)SXDiG2XJ0sNfwp
zfYbitXFjUFT-sFpmo}SgMH;056mN649oN2P;kzki+y4LsPMESJp;xyVg%s5xZmW=K
zjKVNQt+Q*}?da2=7ia}%vl6X)`*+FHUz1Cc%l<Ic9;fBDK<;+;#wxDE-5ivI14gMx
z-Idq)V4XbZowjt+D{Cm#Cu0WbYa_utlhR2;<;_U0+i{I0p2HD$Wz-6yR2014EjDzF
zld%bE)u)Tvu8;4Fl2oX~a<CBw0bFp`AM=il@yEE-OY=fbovQo!$CQ)7wAM0ogk-Ux
z)mmxX!sU6<hLgGkg&}g0wPQ`>+tnLE1~tysJ&xJGxiNc4sF5Qmts)H<SSn9jCaLE^
zraL+)p4%AKX){XGZ0RVlS`A5U?e{WEj<gC<ZjDl=P){wD`{j3Lqg=8jG9!GL(n|w|
zi)9K1Hu1lo7^L>eThJeznQ0L>^5rkLjG3%7GTJ-CRCtYMnr+}p2%tm_KBwLAWuM4e
zOc=xSF3%62d}D2z)3W+{_$?E*4J)({I2U(L+ezoXH%iSo*2;^h3luJ{ow2256trtN
zwJVlYi&pB%H`#I0alCf6?y`!0NhYv_g2RaT3dz59BTku%AygF6d}f-aZZWdq7E{}k
zcMLt1(7vNoQSIA|lBdz7RtK)6hdXaswU4y39F#KJG6n2-%S@IvgCyB}xp2~WabE_a
z)h1X@KbBQ~d*X`)LNeT=)OSBztv6(n?6~R+m#7<YZ#0cO<N%V-(y=zK=e}PI(r$);
zixP)>#J7gaHliM0JZ8JeB=lr$D$BB*lg3Vow5^qKQ(D=%UUG`XnOAr$oKzII!IE8c
zFUZQ>=V?^FG0#TZtpMtY88;WP-xR2D+14zd%XGARWw(=}kBt`@NkTR6wknaeOtK*Q
zP^));T(Zuo;)-R-DemvN+Zde^_&UP3wufW?0C^>PI>lpW%=|;G^L`>h#61!E8T4Tu
zF;Vb-2mMd#f98IwQ%@RSgn3>u$>1jU;|w80wabwL_QRF3CFci%2L|i`Z?)5zZL}o2
zn_+KYkX?KIGRH#jP3?veB3?5hg37^(E8h#CXxrBKKMX-jyTPzT$s88O!qFS!41+yA
zKBQ(>dd`uRh_1=s6{75$tiK`-W&8oQ?%`vy9r<!u*=X0>j2pZcM*HS<l0>LwvSi<X
zI6Kgo-zGu0xH1h%g;+LAZuu1(ZrK$BU#4FK!uhPr16|?R*)JU4AaAAw3i-*9UFKfM
z9Ipazre_oevioIsprSGmYt95d*dj94`IfA*Lbs5ExwcFPe%Y5~$(GoX$uzD^N9&oO
z*k1RA;HWUZh!@5~L06TvfHfT9bXI5SnY%BLc)%O*LS7}HaIv7l;Dd(*$QOOG(30<g
zSvXmvJMDrp$;pr}Y#s$F1AH_gd2ESm#vyha=4e9jB6xBGF8CsN2i?H7bDvr<_j3+t
zjxSnWfHMwkNMyXZi)<`~yeur9_zE-M0Su~$7Y0P&7tyeJB!e?T7vmxfh?kr!7&rnA
z>A{F*L>9QngPrgo$c^7JL2`>@Ureo{EclM0+)r~fy;&i)bUrR8=ZbwZqV@chmV8f6
zTOCTyk@aKbQ%6IqDxz7_pUJe)eEHvM%bN=yp8o*jlAn^Yj=X$1az$hFx_|k8*xF@b
z^CIxxX~{C9YI7~y5X(etnHvH&Oo3}6C7+CiB6-Nsupq&avdbb@zE1>xylrI006TAC
z_0Lm{I)CKyrF6f*P82eaQj_?2?q($9YBd*h5|TDiNfj%6QjXB2PT0q)K>2AEho_6B
ztkY4uTV$f7??yIiQYBo<Mh-QLF+avadKq4(quYzV-@Xs+z{NVY_H8nlNc_o9gWnoZ
z%O^-7R~Qo8mfHRCX~s=<O*+2+0P=Ly&QbIt99@2ymX%Weeo7E+ujltMu2Ll-^!B@J
zCaF7L24n5dl6t?ErE-@voE6(BtItKrMSe}GThk`?3df4A=+jQ=i3k#mX8NDa)4P7I
zbV=4mFF(wTV?AX0c7f{Ni}%dbc3hVlul$9Zuu)z^Y;)0{a+0ip(~M-Y72l1~J7c+D
zMxDx%co|gvaoSr|Jo{nBm0n3EoqvI-2j->oR6aeiZOTTa?MXx=ItNj^zi-<cUEmgQ
z(NS5{Wdfb|_U(hV?T6a&{P-W44KGX`x8t06sI^9^ys_DgR37I%W5<8b<&<i$zxG$l
zm!e4Qp+@KYWaB6Htgvl4QtZ!5t0LGTBw_*|`prkuXw;ojiqG@;;HRb3MT}*rISt>g
zG4$k}HGp>U(NDs4rPI)K>uUj~+*~Id$r@3$@>ISWdO_-u9f^127<y87oFkn%S}H#T
zD0O<9#bqtOo^)Q4juSMZ^T?y%<I{@*ZN%8(?+?+An;*KDM@al9M37IU&Whd5{)}nA
zB;wtqc8nt#$<kS~W!?~qNhF0Nl1=`{HI9>|7;5*}uWag#Mz?iE6rF`zlMNfiQ2|9n
z1Zf1N8)P)7C{hCkGGTOYNDL<3AYlNb79bm-(%m5~ATd%pMoFi1$NTO351wngw&%Xj
zIrr~uyT=msU6hN>jnIHRVSnkr`M9C&Qmmy1=g8JbP(pem8~jU3LES>yv%xem8ML{k
zMUnu_mjvE*;cl>aJgZFeAK8lXmChyUz#%ROaV>u-(HhK3syj33uz0vw2|6wQ*4C4E
z!1?vs)S|c;b24!2oECV#Iqow1;#PFQ*7aSS&~Pwr$mQMVx#xXLu0sy>_D)Hlhwf+Y
zbN}eg-BjBjjGk!()wF(79S*t&V-?cQ@j8ge0C&X5n9*o8C5=`SGLMT-+vU@PXQ<aP
z2q(E1_p^>7*k=RB4rx_8!S2yVW)8ze|B+27eECE9*Nc=WXG*HEXTT2L&dck_>dRRn
z7-X)wRCNpJ+IBo0bgA49cE^B&x3IZe$sfO0mHyC1T0WE1_aZnbq9Vvpj#q7ftERu`
zSzQagFYAsNy9bE{nUl)Zc1J?9Z6p$bUBvkekTA^s48c*<j~Kxd)b%bB#yG(IIPW2`
z;A~qVf|YfmFc;gKY2~<J4?Q&Y(8Hxhvj`xbLHy8)NPNa-98WkR*au0RulN?lz8z3=
zphBOyX^+f_g#?Ct8z5!0@|3q;2Wj>zKHzhasWx{Mk?N>q{J}!Vs6m238-2RCTtE#{
zzj~&#mM9firdG4>(E6}Cx_M4;&zR(Nf+v8Sm?dmAuGme#8ZABSI+)$4i@|=+c=zo9
zK4g;ZXfYkX5nWjq$nN47D<iu8^}-OTd3f&fK_c~fGP_nOHYTT&Jd_brrC~F50ZqYp
z>WVv6n!g)Pf>@{p$;=-2R!+aCa_f;=OW;@85d&7)my$Y|Egbrz4vU)PN$oMr1i|(T
zupH>j4J6S!^vsIW-Nf@@o>t$m@-#0Yz|iAk&UWEVue;7?^SK@OOxMrxF5A(d?7rbT
zgdLR@)v%+y*3^qh`Dyb|mF+{F1$FQj-}xC*Sgb>+u7QIwD{P@~E$l{kSd@Vs4Kyi-
z%7k@iX&bMre$mGHLuQ1uuim$pA(D?IyG1ldc{MFV&g%Lq^r9z&0NT6-?3t_53z<c?
zP+7i!%{XS=e41vR))-+%cIkZ4JPx&Q7kxXF`7@_-x)QpYldzf?aoL^%n7@}3PjkSz
zH8#Up{>OUPt?Z8}6x&CA(&)0~8S?7!Z0F8LlvGaocXe{>RBatkLIR;F@fT_2KWku0
zJF6Y?m4s0a={Cb7wz`sH*;z2B-pe-<x`_YC!WVb2((8$kjx0XutgQ<>R;aU35eDZy
z;_rI0$Um6lXiB|aMRvpQ{=~2@VN!pF7q}P)&m=-(z<*GyFxNjjc-?MEXC9g-Gxhtm
zyyuKA^WgM;HU5#UrHY8>myF--xS!y`FWeZ2?<_GeI*eymZ2t-@dqx^G|4na?eQeev
z(6Z049<Cx9m!u%q(;;%pu)J_IpU<)%J;%FA;l!`t4>hDdU-{V>4O1UmR^cOdO!C=5
z)~w^0n5)mg1x%<u4*D$+Pt(jka7#2kMAWRlZaXT|69}(;C9Ln9CRx2VVPS3a*L$1D
zmV?%Z1>5f!wx3%nitkFo*dI1^8D2n#24xNIOu;ope7=c;jO%|qiRAX=C;GInndwn<
z05E*dYluF~^Fyv;Y|2Ss_~UF4n@Nifd^*MmZq$;SUpMKntvi#q%I4`Rlw)8)h%a8{
zoU};|xy4@)dB4Z3IWYeqBNM*P6iR<dv22o22=<jTE7))1bXVVq3aFhM40FHa)H_nn
zwbDMUK>7JvHDA-lbSi8H>#fc=e2rAHo=EPkidXesy_cz)eX0qZCAL{RinmnL_AuzN
zTF}rpO|Kwslx}~VlzI7+Egcao*E<U=4Jt)Y!etw1o>;!WOpt$a$xeBB$H}gB*(+VK
ztx)eE7{1O*=M%^>5q{F!@I|Zt%=uLqg5N~SBBke)Z~u|Ux%I3j(@76+D3SWXe1oiN
zvEks;2K+6Bq<z`P9>SEZyMO4Lt5uv^KFB^=>KgyJO0Fk1Yc!bQkvy3AXm?o^mN9qJ
zM8LCCtBYgTg{~>JlqcxfifAZ8j85$N{+49~iCmLxy*(#&#;bDTVJa=M2$&(Ws@79w
z{#q`x?}odKH}vV9gwHhY)>BjN)<3$r-G8@b4O_nVq@|8GRBJ9p)B!tNT0eO!XusuQ
zGm<6wSLk$C;paI@b+bhg=$Gx#4C@yBW+GLE(oC~X7Z0v!Vg`5b4o5l7qty|vf2X}#
zT3i(oIYRDN718MU>pNyoFVPfpluyoD*I2BfNhvf>dQ?~+JezOblH#j?rCT?a(!YOV
zMA;>@N@f!pFS~=5CYX1j6Yk#&zj^!SJyw#x2%1%evr|T(-e)rndg_mDP!9D;K(s3$
z0_~bBJUFX27a}6pYk#Bc$BXOLY`xBdruu>_jVG<7NFfT=xDB;q2yg%Ji~eWhXdw3B
zA<2{o92cfjEZ>*E&fp1|_RctJOJ-k9?e6n<;$P;M_$(pwG2rppw_@{*CHMgN@!8kl
zOd}h@t+CKq5+*9@wzT%%?cL6k{*a=-tScT#rtZhR%@Ca)JQpU-NI`bCcjj9isp<i9
z6A=Gt)6g|&t-ZNNc45jqDo%4)NF8kZHU~6DD!uXU9Wr#N`Ri4Nv<Hkfj&gR;g=uA8
z(*)ztT^fHK?g0NI+l?T~$mce@<(}*N1<+o+0Rb>Yvwh`P(lr^vvYkt9c|1B_!d;F%
z^bf7kK*={q9@y)@>2JE6LtuRx!-rG8fYU}XRG>2G=W`Lh3KMyi7Yp;Vw)+Oxk8`|U
zr1Dg-*HGNi7PTFnv^s8-cZlcWr1Mtp1pK8^bQsmZ;+H#s1=;+>*~y`&aYvritglDb
z50mLXYKaQEPAs46Yi;0}eSpvs>|<Qn$04Qqg+_TkheRPMj+^=uLqPA~+MM-vF_zpM
z`y}n0{M$WKW=16MrtoH4M<0u4221&RaS=wLPq<FoHiOwsvF-0Uy%Vq2Wd{9e+sEf@
zPRFnQtaXEaVM>}tEv;B{Gt+J#XJno~f3BKw=g-#TIVqMY0uJe2Zs6}WCBuHuIn2`T
zsyXbKuZ{t$-HR2R8gTBj6`s1on+CiI5Eh~fn6NKl>JUFjKY`@ri(jO;B!d=0v#Os)
zM50Fhqu5sEoMNo1uvTK2)Pjy9RCj4~bSxE20`t;o%IbxA@G44AqSD~)D-J%-E7gW8
zDVmxnfqJ_;9d;bAUNyfv>Ra{%2bL$^ILm#>#SH>)``Nq=kW%0^R8D)@5j}U?B$}D8
zsZ|>2nQ%L_o}e}iKm~NUuvK|^G>N&@vFB`C4P0V5V$>wXOU|sfh3(TUq~&hv<^oGY
zgre#$S@o;tp^2J2^dyFaQLIxk24le!>^mpKAeM1kO#L$|3Y6hFp=EXD^r*plX_U=8
zull#Y4^N(gS_p4RxM)_uy%zTpesutuMNiIwU!S7yHMjR@LN*-6A7JG%C*bB2@W|Z3
z9|~QMNfKX~3#@%!ibu@ME4Hj9mTy~$Y;Pw=dk^j2oqNr$ZzxjXRkNKaqZV~0*Ar!c
z*Citg5>7e6<+g&IKIo9YeMO?^dPoOc<+(P+U(1kHNv}wX&ZL%}yz%gz>`mA4)os$V
zo5~0Y9Oq~x<?UGjtMWe&pZWYp_E{mXXBP&_x91Y-svXA@?fPxs{^ivCJuW73eQdW}
zSFLX3TKwWv!nHy@eIz7J{ehyVSey_s_^C_xQYiqO=o%V6QcET513fU_*A^OLSs+T=
z6;B>dzYX-fe)cfRb=0H@vK7jZJ+MKldv_F3d^MyJ&)spiFXpzw*Bstd>s6+?sn-mt
zn=ggFEF45tJgYS~v5-S(U6A^mdokcl1iC%_uj-o!*guj+OmuDO!C!_6_kg#Ylpb5L
zj0&$!IR<6O<1#_Qk{5`h7GUQcOp0MR%dem130~PdmmVNRHEP_+?Vb<F938F9<^v>C
z$L1ZDss8P__uA*S|BQ`g9zk>y)&V?)QN*gTvm`e^->0c{n!n*+)G$AZ17~y00><jA
zV#?&}wj0P0*#YIQ7TGiqe<q2O%9Y{_>|?I)U?A0%$!^TH-<e4NAbrxuk7~&c&=@*g
zGXL=MZPWAVSNT}g0ni7{c-Ob~Fl|)M1rCWxdsVw5WYQkw=92$fi|Nk;`))ZHeNDfv
zxvhR2yqu1|JM)E?>wH;+I2ilpTL0)g%Rg%J{9<Jj)%EO!<&E{$w{|zx)7p%S-#{+p
zJjb+eKCOpRM2a1Vh~4T3-2p~!9JakrbjCKq=LE`RauW_oK(Z5M4?adKK2_oY<j}kM
zlwdd2`Op96J>DVHw2PJh$P{>qR<{Hpd7wr!rRzy7?_>cm=g)|fo<6q#LRn|nl}#EK
z)uOLSZ>uI-_tbL=&XyAnsd9dY&>gW)TN^!P&MD(5@Fa3t4a=_LP&9Rdu*bTYy4OLa
z2AibHkhSllnWUKEiLTzgvh1gE`f=YSAjA-zB8x`0%$a+Ubgs`1jyr~YU^9{IUKAVc
z!)jj_TdYJQvvu>u`zQI2`$R9ks?goVn9kI=5s1U$`jrM2FS6oti9{J!{#1=Sww=x&
zKatqJsk>+Urh-fWuUi6}cupV>gDM?j+b^@ejC6bRqp{F6j>SvUTmPZKt9m)YdS&F-
z^=`p_Guj&+W}crwjtso1G>Fb>arrVMW1+)y=uY<94v7dIu1;?o-K=!nJ*2+F_l7XZ
zj2yjqWnL*GRt#c-P-Vw(#l+F<sXH208o}DMob_MeXN70UTf-v3|CaW59pxt0k>dF?
zHP>e%D3;-gKh*NGxT51eekv2Uq55G6HoPNdlxqU08nlNv*DkOjd(Qmsg@p9h7({>Y
zLo_RA#xdbpI(RL&(@+u~S(Un{7fbhZkz%LZl=2ut0^rTeSBjW_UQ<A5sHO0H{~fzL
zdvSm@Q&a<FP%3o}_pzF~#wG;oF)yG?lKv=1)NHKQ3~EU>3oJJ;(B`ggbPifb><`>L
zmMR_?$mD3KbqHMq?4jV_0+sks7OCY|znN0{;>B#BDnA2bZl1oMd`5ZH|E_|!{;QM&
z_PBdrz2?_ElU`u|z5y)@l>~;!U4NTeEc~eVkhfrophO#PGO+V;l260(oAE-(JK3Co
zhlQvTI#oMN;k}D&ShXz6Eoi+GbckY~X}L^J0dF5#*F$T)m(IIm{EQA6njBzLeor*g
za*|$k65YxlF^B9Y;8O~z0`E^4qZ6b|46)y3ux2Z+zaOunpIDTh-SVhrhFTYjhMv#A
zJnQbFE<zGmm6lJrA>A#sU5{2G>jB4wJ^Pf1ewug-mt+D(0IY$o<QrUit>lKQeQT(W
z*LOYyWk+Zi&B?DnFFce)_%Z^u2#x!s(Jw(rm%K0hChw_x1@PfcsML!NCg8{G$)}}=
ze&`KElRD})7ne`rm*vq7qip43kCZxfvx=~@e*RDbogDGzZGPA%o7g{4vYjUGa4vy&
zPrCrt@SYa>n$YjR@5$Q3Bi~QbY#X9Uth2l2y#JPM?~{UD>`z}Vz3P;QM_M{|d`Y=O
zYGH~=hW35@^XA1V9N0;6R0rfAh%H0eZPjb~-=X<`W8xM<%xk4&TT<$QS8mmUsqQ5A
z<m&y>Wn|SeGi<q>{DZsim(-k-FLj^csO9>S=lB#TDgu|baVzb#0|$@iBt{;dt1d_Z
zA0b{{9UM^!UL;TQW1q1>?QZ2!(QsyTFlwa*>vXc%4O3N+7V}7-zyFpMGffAxSRfhT
zw99YsyrI_&b5jo^jZP4ai!N7N8JxQD?U<KmK2^^P_}m7y)<%bi_gl2Vu^wXeHJ7BH
zqH#A27|9+9G$>zXOks67sJItrkn{>N_f5e@G1jE;9c3<=7vJo_W^^q3_W_&x>F1SQ
z$i^En(q$ET5|raC{bpR6J`EUl&{N$pR#g;2x8Q`h9t}1rjuKu^uS?}7&(KMJj9Gpf
zLHdin(Y<7IWOn8wLCU8o6ZqU*FBw_M(~0>MYWJ-Ij@!M|+)y2tU&tX(n(-kaEN<P1
z()>278#iCQ)H7l_-odwn+07x^+*y(VSI>8yM$6mxgpxUUzng1-PW~fPE*2hpqwMi>
zZFvK!Pf<dQ?#p3Hs2<HHA%?!Z75tces%AJFS?n^ay-tU6j_G~FY|{<yv%Ik%UGXx$
zO7Qoz<MwBxNvv>p*Q?Y|D9^+u-wq5G;35#VDAc8!Pi<{PvlqU|bho-3V1h&C{ZMKw
zM|veJ6*t7?T(!Qz;wO*#Y7j8;O>*)jK;7i*ad-Y?_6|Tauj3>da$2Ggf$LO<F=WoR
z%9a5wM$1LuV`de`q~KL8oQeUJRTVI6?~{pi2UqUC%Mm=+jxDOz3Vf&bADL!$J+}>q
zmVbcXNelmsw-En-WT|&m$TN+*CUkY0SOf?KYL1tMOYQSFcjnf=>gOp^lJ26Oj`e6A
zYqUgnsJ-<*q>n(Iqj<aIufJ6-via@GSEvA39YNg|&nw)4OOGGPzDf^z{=kf52Y2u_
zN*~*mS-zfX_aO9fve@a=E@W6pDKqSiaUcd$II*{bGxnLVcO87Lo1^>Cjx}N7SKakw
zG!TXzu6-1;<8P}%-za`9d;L|yDAB=B;wnWk)w1w!Mh<|R1SbnzJ&(zHCrA7q&f$MQ
zGmCr7+@7N<0jMrE-lF3V#ZFFw4U>VDsN36+K@nchZk~4KmD6FO3^QA4jbnehw##z;
zafhV#oyXN{CfeDAbs)E^pE(NLs>aNe=WWANCKw%Hv2z7@pBZJI`LlFGiihPMW>(fv
zAtyTSH6zjNVa~`#du})6&749qZ-p%`Ksd8~3~Si0D~I+jdo{d%IHG8ux`mp}=rHa3
zn)8icT35#!GT%5S{-*M3N``#4dl(AT%-F}z%%c%g20I!G8hInQ4Uw3cxWM5+o(}tJ
zujrB3->Q7DgAgL1s468HW0#rVH7eL48UI)czfIb<fZoTtL?>iCla%x`oW<)1+gW|E
zi<*fFqk_eImw=U11<f%8rG=?+KalUO3yBoKv^106F`?zY;1u9y6)3;XhD1>FBR6$C
z9cwmg@B~<e^qASTuaIKG`kbb5&o}r8*md$}ohMs*gb5vG*5orp@&d7{EK0wQ|LOe<
zQ?#Fly<?5Zr2Evai4c{i6EAanh&g*9kJ~8T??GdRkC&O9UPY=!wWHj|xLwEGj|jue
zuOpd<{O5K;_0{?p_7-x%_fa58yJ6&=yLBgXEYsgAjRY_79Li_@>RZZAeYs*q;mZ|_
zY;p#Tr)`S6j#Z+GDv}ijjRJOT2zzj|$G$du+Shu?4x9#D4a}CSnbIK>=(>V;7VMu2
z!uZ>90H*$gq}fVVp#%J+AA^esuCY=k6wFb7a#b<j>UoR(k|rm#SX&fad$!)gFSA6@
zcEnSrdMilIXO6shhJ=@YE<eeQ|1NugLyfW_gj1tgvX{v$;K^^@CkJ0#;&dRRc8eDa
zIEPlFPo79_vYxd29n|+@+`QX~Q5EDRWgxr(lq9{@bjax@NTafqCRC_%=8#js6*Zqn
z^R`{7?~SNp1Y-NNqvGfrMZRl!m#il-mljU68kN#W7oUzk!_9W2Ymb;zy^S(TsOgLn
zjTX`L*Haz<Y=}I`TD6bUt(|Q=?F+;$d#ODfe17uGw?k_;sxf18{qq|VIkif?i519c
zr+_2TbC6DlMoWXA@(hG`K1a2m${g1VQUjTHq6jw2UlpYPSl8HByi>*G>Qx<IPuiMJ
z%s_c^lX!MkWz4aXXkicob#))2Nm!P;jR7M{=0NHjsh4qd=wYLKKwQV0uv}u)XTS+R
zeL8=X<GIImd#;{pcaff5^46%!o>h}`5WS=1HM<;T0bh6D32NOqeQ>T{7{@<aUTyS7
z;QmCGDQBs<t+qX2MA?ZTvE^yp;=^H9m^)i<gUfUg9yDuJN89Wn!G)=|SZHIGz9=t3
zqlwi06V<?>_)?39(0^1jzK$|4jp^3cclXz5-}GuiK!RSM;$Ngzto^mV_S!rz89#z;
zJg=VYL>7(dTCl!Hq*FImC<ybpcSD>Ko>z#?ho_z4e?1hamYvLG-bqHJbIr7U2V5L{
zG*0)Kb2_)#=bDqMruke0HzO-LxL+Jau<WNfbXrRTkRs#B4+YBRZVId?+~z#_^yMue
z;?G9e<m`*H-0n!JGH(0a+WN-3c8On@or(96=zuY8mIpiGu3h6*-o4jk$uc#YN$O3n
z(Oi;8y$MP8kz3gG67gT8_S}z8`~auy52Fd8qBVtmL%d<Vr5{TJ{|1o-Jw=l>9=B2E
zj#&5NP7ab<ZY4UzbaX%?O}EjW>cuCaluWOLya7+lzjQt}XJtJ|hEiLZf1x~SO=fK#
zrnD#git%-ZYR#UR@Y0Tplx48cu$fsKfL#3S(r2lBTIOGgKIJyol>2of09HO>ujWey
z^H${56`=&-or%-{t!-=(E<I?ICKA&X>AgsOm+y9b%ThwWg`m<u`$rEs9?c?a*?XHo
zjdb1m7{5nt(W=^hXwmVVa9-y6&~%HMuE^%V;xAIXu7I16U0YZKfdb+8z^S+Vo&<rN
zR~5MTk&cz0l%c{VRPfr@&#TJ$(aF=5k_e)`@nUYI?0-K8UFj2x|J#Km6h!ZY36`k^
z!SgDCRCK5cCxw35Hl-2`p+$>7v|{2vk%#EQXz0cn!LyLdcbn#Bb&4|ln;Amx(VRZ)
zMP-hzyH?le!Qxexx|kE%O$^uq=bi&&mQo+2sk!VNm&CN>Ln7@sDNvO@66bh@f<npV
zcCHON?D`OMRZN<P<Z?p_5xvGyqnP;d-&S%?h7V@RJY*Wg8_H0<!|XIn<wO*I8j+5S
z@X1ZuPWBzJF1B3JDCBr1kzebKyOIKVA3JHwirW0R%rx0CGTgzP$ikGpMe|hdeE^+I
z|2S^%x4>ID4uxT>EK1H+{DAXq*aCs8OkQAO8qL`p93liTay=H!_#Pv?759#1=WMVb
zXPXPV{=IUU=sH;_7ToTr9M&u;p1`nQ1ep|FPOjd6k-MS3Y`8$WZK+in1UkhO=p=r)
zORZ7?BK27ssMF_+z9kutfkoQ<2Byc~@8Ucc-C$+J^*HZpXJMJ8H&98Z82t44Vt1*%
zY@TEkhi12Mjb1?au&FfO*Dhbg6cs;t5LUGh%=%;@8RY~M@sX_o5D|*}XK(HKSdj0;
zj(wC#Z0`nc|NQGVR^k3ep;wldwb?~1IehcKgYEnY5_cftAT?9M@VO%fg=J$S-TIM=
z^ax_Cju66^_Gqf(swDW_bn~u4qF_~SUZw51OXIYVSA}FuP1g`I-8?66q27@oqw=GN
zR<u=}9>&I}mKF#t-|c;?8&m9`A#}K95o<o2Tg;tKVkS>q_OT}#p?<yxy;Z^TK=K&)
z+)tlbZ<{E6`2A-Zx>weB@wK@sn?|WAhMr5PpAEwj_wPTlG^uCb7*)pw8t8;P?Q&Lh
zZv-&+U>eoxA3>-{)b@3Kmh)V*Qi<zl_R6MW^9qHsU;ZPj$Fm<6uOIEMJT4I4wRLw9
z7ZA?@yT^DjJlVUFx(ohY10(RMcKV9iCh-1KTB-<(0@X}PMmR7XH&Z1q|NArLw3W#4
zqQ<48p!%#v#7VueJb%za#PTSlc-9@`(Cw)9Lz<wu7L<r;u+ddmd-Ux{?28Xg>ZRqi
z8>l|rJ>Nj*9Si<&#|}1q4ufpdvc5}z#?UhvFmBF;yqHi|mKP4j8%@$7Cq~O+MnA*9
zaR9f7pcmQ#Cfz{xX#FfurzCKT7dK3xH$KcD#mRT(r92ZwYt3kT1iK~6g7n5Y3Y%A#
zOu-;;0lgT-MCDek&*C7{AWUkLzDTN8_!Q{_yNf12XkX)DjmwM4^|SJj(aK%tpE7tl
zEM&r1z#NEo8Mod~)CsMsvdsCm+yS#8PAFYizKq_O5YOx5C;mY9^<0t{YW^<GH-fcf
z0*^^UUYTjeP>_0&+Hm76V~=J!+CVCDcwLPz3*jhZd^I5d!^M>2K_io2(WuCy8vEJE
zJVjoU>W`Dn*{p{%uDt@r@iXsrY)?D@eZGpqI~63R+R$D2*k8Pxz82UNK+MoQq7|rN
zpU*3#k>Q9)%g#=6nR~y3g7jrMWF?GXH?;(>znMFN^{-d*d;3yR2>)pDQ~AM&JUeT^
z4OiIM$|J`4wZ)J!ef)E<O=Z@c^+7D8#W2(5r+n<caTKVoY}r&4{oM>Jclb=+t;ar3
zuFEEWLrjL$4v049AMO!RT@<zzV#}Ii)4Ss$zN`IKOhT3P;885usmAfP#;An<$fDa&
zc@AQQ<^4digq^Fsx=VaDsYu<`be_tmLT!aen)Q`$t^+bSXgQM(&COhRwdCy5#X5qV
zX-==+`*<4uV|C(YT>nO^01he3s*dkj5fUr#2sWQ;5zT^CjIut(=qcI1@3VNuCj62(
zReL36?3$=kNfb95(SPtmK6s*dGIPivS+ZoH;$?ui*euKNCaeYBl*Z_@{qb$({itYx
zQ_$<~<xIy#$~O?1?P@3CW07_&OWDxx`XY1+w1i2lUT4Tl2f5tVo*c*kafy`EV`m@t
zVbcU_tEsCJpI9J*%}1e*U+b-cX%%worlgN8apJn2;xK4~HKnTS>$#DiGk@8Su?)On
zj?Uqtm4-0g*K*6>X=6{b+IbEBeZ@K?J?xa%lEDCilCo2M2GTlQqz7G~eP7B%n#0*a
zcdZ0eGD*REM6!1@19V(KU;XN01-pH>yV5op(aEqIu$W<h;)G*Il}-SE)~yF`rz*Jw
zyXBPC!H^oRdXz%%cxRaDRgg3%^;6|-*|K$#!r3C`K>+U=Tog%~lKdgKa=C&cWTGdH
zM4l^W5ZMYj=(Q>Ga)zc(<$6Ty7f%&87VrnYd?~5mE=&e@_<P?+Mh@!?=L(nb*`(GU
zPy4m&VxL2vE1ff|&9vsVD>;f<Qm9Fw6E*9feA;Fe{GQvjw}%omx@;S|lVl(1GIbzJ
zL$bnx$rGDSO}Gu)RN6L^;qjJ6VP<@R_TD;T4V+{%xLzr%FcV+-9$pOvcuW&C(tyat
zceRHor9SvtP8&$xTO2Nyes}xRFDglI_d3r0vdzf1etmg<)a6@C>igv~j~1@U6?)70
zlA4}?i1nda8Qv|Cp9LadVJE#cANT^6Yr;*bAg2I`^X}3IRvT|=&G~)x`ej0B+Iy17
z(Zl$`k4W2MQcD^b;K>wY_P?jyXGJ3sxA!Sqrk=Q8My9>LX~ZoH+lsO2*NHT9PnV;l
z8BE0kxH?o@+^xe?DEQ&zOr^HkhZoRa9M`7~uL?!OpSU6~Y|?c;cgXUdv@Fqlh{!J;
zq)&Nk0>2R${PV5=xdU5Q=<(AEE{PXUC*qp}zkYW56%!12GIe2d#?5!9;dgK4SDAML
z>E0gVoD@7Mb?%%J3i9O&UBx>6p%2aH`AKz&@6+#nnM6^akUEr~)V0J*e7?zVT&lXF
zItd^>DY7b*z9xab0s1QW|JVWm9|HO1DUY>kF*Jtw|KWpxFSTN&S6#M`LH$4N9g~o_
zMdFd}MKOeLV{!MsOqkFP-&D@goIp82Hfx*)#(6Wss~r^_-aSAw+gf~<YYMHgVY!v9
zpKZ~%DV2nA%!kmeUl#w!irp6b>#W1Q4IR>K`fipbky>6fesR|vsiM-Af?WY`9TQ%d
z3dzm$2Ckl}6a<epIf;aU?z$n%Jy|<UO&&ekj`)sS@-P_v6WA)n?Z}PE?!+~~*t0(Y
z8_J_MfMh74wLP?U|LYm1g$d7PIu;GAa5L`84X4NC*t&g22Q*m9d~<$bX>RD&aGQt$
zsF>oHQ7@bY>r|5jkw8b|5!<&~u45C`X-cBKqX!RQ4`?okXniY;F5^YmJy<^7W!1x|
zJtT}VqVwNaY$~5R7?a-XL!BXIaGI|BjgzF-WI9PvIl+%TGCBy|+v!VXhgSLPkm@~B
z8FG26zT?iluR=`1<FU-mhkfdsJPi4RI!QVCubqFn?d(J(g)JuHhJJnVhe;A~jTU7_
z0%!m@tMiOh5iE{2)e?<<PFiz;yg!qe6-)2D){WxMitGntoA`0=1{WeZ`5r-%BGHLp
zhaRAaiiSa)cCHW$DMssFN!SZeB<GUT-SUp9w>l*W<0)%pjQ(63PLzf8hDPUh_h1!T
z!m;sU=rZAlZ*7A1!+e5RoW$PpypK3C2c1p^C)fD^LtcOi*zz8P(Ua^M!_(e|lJ{!j
zdT7ZUNH+Z!_cO^vo|y1h342VPwYK)gNzsAiup5P00Btf8Z&jzE#q2+^4hjC{xUd(V
zp7A`g%e_9m-EQkC<1;a#d#~4r9wCBf4)8~k>2<EhI{sH?RQh-7)RjU$ANST)&k_3n
z39i1VJRcg}$Z+;6L<d!PrV7Nv6zcB4yKCl-uB60;l#BsM^e|Tc96p3C|7cKiqE=$)
z3(3X#A|b)`n_Az!b=X(7(8!=6e(;%B12#xcVoo-_!hdZoN#GQ4Al5ZD7_h=<sZ&s=
z#%6aD8YY?H%634a>R*3epS5=?61CG?AvT}&KLWcHcMA)jvC#A3%?6b1IOTXuX5e|M
zNQmTUh2H)9#HIWA;;y6qC>7<_*=Kf;n5ru=)<$<^wC(M|^Q(Q=AsAC;ZUG(~^`=Oi
z)EHRxhqr1{Nt3J$l9U3Dg0yFKsw4T%iqoe<w-)AvgI{%~e_GvN)q&MNhCi~cERR`u
zvB8GS=BvlO4SZO%Cc$<cGS6>;^liEvUX@{F_Y~gvKy4ViUf&Hf?izt^!)H{M9L%T>
ztE}__=@%pK`d#&xCI1*z<eih<yYdj@KZEx`HdR%b-uNc@&k+^1^0%Oq&jnSVyV*t?
ziMII#C7KdcO^kF^9wAm4rs`w_+UsS%BdavPG}?`6w8b)hI&BvP6{W4!qZ<<5>%BQb
z@S&XTt-pYmo4oxACLULNl`};QLA7e?rWqVR=X88U&Q~1e9M%@*w=*>Kz)r53hdV!4
zP-v_tjMzHS4?vdu7<o2h7~}@l^LB)ld>ATI#PEI_J#HgT$%qzW${K;%m4URwoTcUZ
z@Q#6W^|8dc^>cfRW8}C<##0u>MXsr6u?G5D_AKAY=JTm1mh%P2eQsK^G0N!dY76<A
zygJj~KKrk))RY`ZD(9aB?Sa7cEB-Qr{xZ9bA%@hxP02LEwP++V{GFi-m-qE#*0=Y6
zF|N~zTjFOuRqE)X^e(~_Kv?W^5_x%6xsj20M@FuL+3dwijVA-P27SYwD+lz?7tl;O
zl2EA?oR)&8sacdo>3*{%g{jKLhZ+NnqR+o-M7L9{v_}i6XC_*x<2uGBMYH>;L<H}z
z4!>)9_V)GubKWTsqqF<$e`ML9V<(WT0Uw8W5&f5X`Sur|fxIqnVzj`#kj~>ycCmV4
z`Z#*;MN;TWS=OnvuOPdCqbd6Y=QUA+&N|#krZzn3-3JD;#_*CaAmKG$4MPslP3~|*
zo{k@l2WB#;40$k>_e?!>$XGfR*-9n!==b!TU?uRyC;gEbBd8QG=i{FSX#LO(XR$MT
zd0fl)scz|Br=J;=MU3F|j8+%1YD?0x1w}i|SgE4oCzu;2qYZKkvOR0>lyALXHu(6~
z9Ly2BZ4|!U@9YPSj(!LlO8rSFLp8T(QnR|~ah|^2jv4EFte>3=pJ*6`=o{K_#r&Xd
zx?Hqiivq4W8&uIxZ_jiZi)5+&4*gF1wrODVQ>W7;u4vY~!L;$NAosw~x}zKL&m0AO
z1S{6KDK>9cC%(*|9GpXnSH5qTy#+dJEbDI<o*a_NF%opUa`*9ctq6^F=sH}fFz^Is
zsddPKKJ5I1?Z!sti>AjsyN<C*^olC(k8pyiuh%JmW-i3nzd$TB8m*>szA*WTA@&CZ
z3A%+Tb6wYOUYCqq<3<camUi<}MNg~3wJ!^MxUGm%`@drS3awhQ!Yxcb=2NzAHT2M1
zx;)jH8dQk)C2uA2Qky{s^%WAif~HHEp<^e>TTHL8EK;WY+sEB&xYAZDvO;D6uh@2C
z<j2`Fx(eA>0-Z!|u7JOuBK+I`>>X(Q=;&t(iw|Gl&-~j~7KMHtUZ}W-*BwXUe0g%_
z7(nqWg}ILv@N6>Ujq{@NbNpMs#A+64)`pDlPa20ei?dV9d;*PyLetcGD_)_gIVbV=
zTZqU{kteHS++~mwDSKCf&rZ@3@+a(z`wogfcP{XQl+qJRku#{E1pI>pO}tN+*0u?X
z3(;h<MYbp+Mk{Tv^SyV=^X>w>K%@1<n8uCUpD8aNSX$7@%}FHOm+Qr{l1NQ06%t%t
zXf*?mX+HuRyvL#(e#3*8`P1)6`8BZYXvY~|ymgZsI<o|wOvQai)K)X==!U>G2ZpJ+
zjkZxd;`c%kH*X{FB;5`p=OcteG!zbPCJK9^pzcX3>I({=DL=)!M`RGbI(`=bsQdm3
zcbe$kiZtiEBiaOb(i{H_O1{KTig+Ull;m)BZu1069pvB~K(%PT>nV>7lr~NQbsPR=
z!dRAtvIW6KqHm%JB%S5uWGVcHGj;j*W_euxy<}P@driRqCmzx3Z}yqozlE5qgIlu4
z+`UJf2$)0vT8!D;r?YOM8tq$z=hy@5S1Br)!+y$oGf6)dJqO*n<U7ZKIxia=mR&TK
z(x+T_@SK?z?q5H4hy~1CEC;6k(Arh>i*Pk+E;zS$-RO60GPWDZ*74TjIL?uI>~z8v
zH*ic9_a;u_U217&O;gm1amShaiGb(aV@dib(wXg_BmPQ?jgY<JV}gjM6VBT?m9Esd
z-0E@QND``n$Wr$;s$4wjd};GM<OZ7_-|xiP?4YpbzH$#MxVOl3(HhVC<mZ)ung9eY
z@#)v8L~Tt51ij1@vEv1#_n!&j0kxfUW<B`27$zj+^;$?Uq--dQHArdiq+J980A-KA
z8pe*)(srMiXXYtN3dCv-#p6+S{maygGT02i`i7^-A`IIH(%N=PnpCpo{K_#xlNQSN
z4#5I*L@A-?5viSbdNbHcz(Q8z(j#fT;szCA=S_L24KEHOHCM#yonkqEx!`kE|13rI
z3%M%$xtizn3ur}0@vL><y6u!R!R@dn=`cXF;9-U&y0x}sB;u`4_|w#yLBOWxxziC}
zW|Jl<^BHS%9?h7LvS=Q_;&pEE+5ng}`dDXtgAZdqI{)qB3o>hWFR`W1sF`?sBl{7G
z=Xzh@X|2}SX>fAf2x7Yh>y1bq{c$&Pr9{ZT$VSS&U;nYvf<HFPuw3cp_<nD<`{F-8
zz&KOUyji3>^n-a?(19GGGTHK}7H=!P=5Etb2f(R!r3m|A@*}Y$t!?LPlc-BW9e)n{
z`Z%Up&Sh0KrPyj<qNwnUSF;~X7D0V1LAvw+$|Te<Run;s+*4rYkkOAe;0iimz#U^v
z%(5_jE^$39I#x;UF^fnJ6~2o`RwJl{s=;l|?5U!$8s$as&+IW>Cv?u>_w#r(=M78m
zzHYJ18r%7~Aj&W3L!da?HK^IV@4{Vp5fhcFF3LycbXi7(qn5EhkQJb<aaKe8N4d+g
z>c+A`y!PI_y{L0eE}QmTz;;CHR{ADQg!-GG>aST1NiU3OmcnS35s)I(Y=E}u>}$cF
z8iHYjYRuZ(M$(W;>~Vv&bU>;d+VWVP$8#$FUA;#dP6<1%k}g;c8AiHX)1>^OoxfXd
zGma+4$CxqIcq0_lC$ffE-^ju0aUni(2pw|-y*<5{1%8M060Ft|ckM_83k#p;WIsWe
zQBMDDA05QGwemmd=pIoGO1hLXDa3<Ah8j{?Ivzx3o2GH^ki5PQ5n5$5rm|cAk5H=>
zYxe0j?xMJ7#PUS`!pFVNpx4lvNi{a7hcSCRphc~6Fwc6hNO4ujDc_c(9M$;4+<&n0
zEjRkq_R;T?-a~NzCsm98$fOMg56pu+z|KsZg1P6}f|4O0l>_@mNrBZuPX-Ycby<K#
zf%J|Gd7|jhpx|%Vuy$6-cCv0cDCeJQPp;?c53TI*^Cg_4ltF&)(RcNa8J~Uz|2(kN
z{u^<=TY&r=T|qv?gf=ArPQPlYmur!59f1r(<{C-D3wFhff*QUbQjw2_40N4aBn%hx
z?oRtH_X$0FQ$tv03e?HCl(VUAvL#Z3u@0XX6!Sk&z;Ey1R@+zAi+)~y9@rRRd4tD$
zoG;<AT@MdX8DkseM}#AU&p<VuSGac?UB-A??j@lvb-B93Nc+aWS3+8#4oDH9VVw8R
zL1XfU*Bz{2{?5iEQ7U3zD}R{PY04<zt=>VO1~DuOaqm~Rjz5Ik16x%Fs!9r*ROjix
zYTO0Y*cKHw+{?_@u#r3I6E3hujT)=pa;|#boW|ZQo%fe9(?#2Kk-f&w?*~~Ph0!;K
zcXw)~*UPBXWI9F<svQl|6s5D^v!&ds#j;C;`Z_k2A-RPJDWEq^J;arGZB-H|c2xE+
zYM~m$H>>CwW9qkM$2wMzwp6ZWN~Fm0G<N-Cj4&zAMP<2s$Q5-eMaW8{g%z`)sb_}^
zg*^tJuiCLBDT!r4+Ou-q1e{XH-GvEna<q)(HxLkF=!WbyiI24#cL!NW)+)IoqOdd6
z)je@$g^qmUvO+|0J%^fv&+T4Mgd{0H!~Hag-HOT&$A<-=ah`LG3f}eyuW6%D5nnQ^
zwPr{ypO8<}7Gg>Nk@<{pmqpO%3BZ$biG&7x&K_JXNi`47gIk>}$n^Z&5s}ANeR<q@
z^stAzU4QH`<FJbzm(DOw<`{pynnNR-qYNLy#n<vDk3F}<V3XcFwAOW7egv%*kKxRe
zDTD%o_CSz+yv7?OkS(iO1Xu4ER+SLRXWZ3Txrnfzj~3%he(Cpb8Cd^LE9aeNhxEG7
zBiydj*3aL#Tg~IV)9zlD(_bWGQ@7!gF4}?{<vqwB)R<p4P>74=JJmz1eg02_Gln$U
zHu^R|DaA{33Ol~O%Bi}fn}w9>u0;WO$CsiCWr_;m$#i!Ey=w8ev4hIRig!^M_4enk
zOzptG!8H8ZtCW{$LP^sN)sjGRY@2FZ(*2i@HK^Gd;!9X0DUW*_!f0#ct-@7H>~<1c
z!u!dojg3DQeqg%(DaqGH#+npbiq=quBTD&?e}#>E+!pvVh}sup+2|Id9ckB!&-D6i
z30Qr{D1EF`Hg(qNFYx`k_9>qak>)P{RrxRcx%EAFxUBX7D^5Cef4N*1LgHgcD3XKZ
zD_(iphdk*!Y`aH;&Lk%ux4kw%G~GiUd|tXYrV85edmbB3QhJas$(Lk7k}A}`4!5XP
zUG@>=@b*uk0ftNL_ljfQa83XK^S>VxDPILo>ffq9dmv8+{mo2C-eko4^7H1$rJ-F`
zR$qZ8Sh<aFGp`ohW@3wGnj*aa-mTjSswGHr0Jo)kIXRL0whJ2YD*A~*Z+y^x`TbMb
zS6(f~jK)xB)tV1-*8i-@C8HnrBs&7Q*4h{0ZHMFPxvS!sSn6JRX+{kNdWDC8(Hjb%
zNRCoGz>1YhWbx*`I}>!}gSXm}%ekDn@4qLVewFelp!j>)d0_T%rV44`!-NbwQtD=s
zuCz|dzIE$HpxqD66c8;bhTR)ansX%n-%1>DoqAv=#QQ@hb>Vd9TgjDX_UPo)+h~a1
zz<R&(cBK{j&Z0uIp+<oMXmuK}$?@u^5+bnqyzPkIsr5ycqG~gJ?|)>7Y<A0;=0%zF
zeBqDXGwE?rhq#Z9TRYpy?lt)=-MUM$LBI+O)BsmKJ-#706DRkW$v_c>na%z>U3~HF
zqPN+IMfJ*ZS~KpqmR4XgtxpxJmXq@7wI>*-ZFa2>-t6~?R7iV$d2J$L=oPwBnb_R*
z+~l&ex}P}Nd})29<1R5;EAU#p68mBF^Pbx_{>msy+@<44t!;sC#3zP+;uZ0}i#Wfs
zeig$03`HZFOFq}HoWF%RgWU7Vq|8$b5U`{2k5Mf7CmiT6w0UH!IbP)wglyLDNv|H-
z_c%+{`f~J!X?t}}wh>a~ZJs&9{bO{a-AP~J@a){W5m@Ts%*Jx$U#4$IZ?;omd3sRQ
z5K@~KR3Nl*WQHDjvqeD(t37@+wv`+>dU(WwmTh`{S(q}Vp}w@|Cu}<LK=Jo$QWs3k
z`OjjIf(b_ZKoO_nnS;+<h1>BDC%Ak50gcM9N1&&yR*{%gi=`n0yuB(<kc!elfd}!n
zd&rYim>Bag#)is&>yo%*lI-q2-<riwZ}M9>s(x{{e7Gz3X9IViu$GsN7k8|r=K@t>
zMp|-V=ELe#)iUFp$L%50iBBhsBDmX4v2Te?wYD+)`Jn4%!0oGADaaZk))}zq$RqR|
z>6P?^J|NL8=m6Lu{MB}!uTU!?llgoRQ})C8#mv`=Y{J*@K=0;h!hd8#T4D{1EMS4W
zIBt7KTUq8mWcr(Oa<g$|QH1PmYu)@@z5QvcVZMI@Xkozbuk_>gdo+U~1J%Nu6#Ppg
zr5$&>BiEzVo_*870j=8%t^>c&zcy7rhFaR6;$qaGVrl1}(rgj9s3;e2p#09`{%#(Q
zPnH?}Ma?JShWn3x9%|Azs~0_Qiz^01ucKXE6GYtaXn=AG;H8cA(&<=%PhRW46W+(1
zG`V~4Ew#DFRVF(rV);?EN(x4=wZEt438L~%nZE@Z=HdcP7myjI{z#)p4->)Tb54V;
zPMz@|wV07~_*LJo4{)tFbsk8c%$zG4RC~U(ke?VS_lV^)m;~hJkywqzBV!iip@-+5
z>z2&HM^|Nj+5eFpY5^}2v>AsX?zBe-u~}$gSyI*|l*dRaPdJVucD&<MbRkBFflj*K
z%>U;@W+2h{DL%RgO~e0#S8eKoW21D@vv9;QYiTg|@ZTe*gw>;7Lcs}ijyILQUsyt)
ztzhMBQ8X*eq8xaxzAU+KY1XPJ`d66M3#g=`PX(U|Zh8-?+*&EJ**Qyp9Qi3sGE{?k
zm65-^Pi0|W@-(tlo9hq<d`uMARZk(As$cJ18ks<8ltvK+cbk9tDjW%nl#A#Wl$8`a
zN$*PnP5jSM#;G8cU|Vy056hdW!>=qVEnF{*6x4)W*Jf5D#Xz8Rc##ib#Gil2J2W=8
zl#v;~C?L`!4%y0@ie$r=ob?X*!VBb7TdR?&vxb*75ondvztK1EwnN2<doc=j<t`2*
z)2)5|ffvx>DhJaJ5ML7bYbRXD0Y^x$cO7nLuykn`c<n+`h=S-7UcsJUzLRatHv2J3
z`SN~Sa|(EyrcMsbJ0!>ZkZqxaa=0c~m8W!eK)xeQt<JF!=@>8dEGFa=P31=jO3Fg!
z(+Grcv20#$kf@)*a5y@VgabW~-Q6`%iK!_*ZT<vFj9QOCjJ~t4k=;)A-fu4mUSJMT
zjXv(X>Q8qRUCqdFU$~XmkoHR0Pi~p!S-p}46g61rNOguVeB{!(jx5bD>|~lLQ#0<c
zF4@7jC3<R?7sct9Uk$_GkFpjRIJ9%+VQWHk^7qu2Qr8@rxcM)P5c1=;xB|vI2QCLe
z<-lrFUk)xY{4JLYy9+i$+vhE^Wle7F`ff{a!q)fds-k?0Kk`qk+xqjm&OBS5%u2;c
zO6UU(wo(18iRUu+yOlz)BqCEzSftjCHnX%z+`mp|zV(Q#Upqd3N$@1z`oXU#LmI2>
zy9<uW=$9L$eA`J4a#o4C0TXkc_;WSptc`;n1Y^t^^mmBvzCN>5Q~FMHq?1)T?j!a1
zuKoN%l?8S6JBN;kdD+QVWk0Hyc3(lumX&1~UAS;wO@mAOmh$KWD7FUz3*?1={Z8O;
zwwc{Cn7PkMejg~2MEz$Oqm>#>jP8Wb%=RExnId&wyPWljgYLq&Ih&1-o*OpFwg9s<
z49+tOUx*YMq~0;KbWT;2VKx(g%sK+{m$3@yPpNxnDDckr(g1e${p@nCNqa<pN>SaQ
zKjcdw;&sk$p^dPAELJ)2FzDF=F=78dvazOg%WuPi0?Ep|ff*%3z8e}x1A2lR1D*GO
zcq7)9zJ=LL9f%O<k2~vBbSgyINy0{%DzR@tHGfVamGNUuGBk8<J6tw2aF&*rogZ`v
zO>*j`r>LMCq**aM<bNM<mx5Skf}>~lp_hHfK<aSg=C_n}w`D%*4Nbk67fgO3PY4yi
z+K0k=FX<<O-aBTH_}71)h4(G*3w{ffD5H0eUN`NY0+5&}KS7zn$;DpflNw;ag4s$2
zZ{PjVe%Y5EN4+(Ztj+=sp;mGqA0V)QC|7jiXHu4=X_E2W949SdD%EaiVH(R*#+#|T
z)^>B0UG6s_$6Z6OYEl%XCQEr-0_H<e2Z<2?0{c_-?yXRtF4;#bZ?e?8ash+v&GMcQ
zt?h~rC<_I$tM}!!3IOV>Qa;q9i1Cl1sb?)HuGJJyt;jji2b4Om{uYXq>qa&tv+2T?
zsH_5uW4JxIoqtFA_3XZ^N`~sHz7tCjzZVX8#b3_tF7w^2i~ZiiTlSjhHuMv30vrE$
zO5~@>L?~wpr>*^?w=x<Ep07VEH0!kiUJoZs9f;G=_0diDu6=pWW3M^+;F)z-vP|`z
zFI_Po=_+{PtZWCouZjGyu%D8ePs>PN6+GsHEd0jO1a|>ngX)CjJu|MFer6~O1^!6}
zR2sG&|LLS(HADmSz<~;w-4_SHlJ<#D<xiW;ct0u+*LZjR)Ug4#+{0l1T)xut?0Yt8
zYd+8w8dbEnojor{nNlj5N6;5gd(Kjb8w#=M=DC<gC*{=C?<7O8zuTwFW5ga~qjNsY
zpI-rbWRo7(^M2OLH83c?^-daZ^&V!irkmMEV2f#=Ht*3x!FaPuH*cC}(GSgWPGXu{
za$84#@X&!wtJF86E>?Ao8<|qEPo6(P7FL{HpPBxVAI@w~R<*?Q3xkam1e<!ZD_eDj
z{|H(*i8!^3w*w2Uu8|$!m^X~p--;a@-cF27P3r0S7<g~pUtuVFiG52Rw~Ps1?>QvM
z34qv<zRO=C7H-ohku=<48O+urqp7c4LgtYM1`InU`To#x3W%pGS598DjBpZBN_X~N
z=S-qjKNka6?ZCIw&M3$^6FNZLxiZr*<kUPFCTu3oa`~Y>*k&j$x8NB`6&6m%F2P6#
z7lFIET6D%^gR8Pb7?+1jGu1BdC(vrwZ{~KQ9G`}{>6Ujczl_W_3gAANlS=y2p7qIu
zj<u&IXWO`ahE+S(fiEvt%QC&S-fsKioOj(JTkC1&=+WvNr}Ah3c}nok)k6!-$@JQa
zQ1{<697ast>(6yF7v<|dS5Z9Q$G)m8?bVXhCO_iGXi2{nmk(S99m{to1Od^+%wA-K
z<v8gJYfLVjTC*GGOiA#j{4707x^rog-|8jhLB@+j_XvMU2u?kFdS`ZF$tXz}Z^%dA
zUKwNc)ET-bquCFDa`c+C<8v~ghNaOO$J=09$rzAyjx@|G!W78R@g8aY4B@tOMCcZq
zU!@?d+M>q-YyWMfj3e9KEomxoz^_UA$d__TE_e)h-~(zo+ohj=VV7uW?x^geg9qJ<
zRLS=y0W3&oECkCxDv-(QKk8h<jkCc=7~$!dw>4r37Qi$Ns9$8=uR$$&xxpb9h1J$A
z!kabz?5*CG6~4RC<L|W@kz41u8i?(=)0>adT$R$)g$L_fZ~L~-GWBVZVhL`@3~v3<
z{?%sI<oek|1wVd-E+9i|ZZ~C_P{|S3ha{*?&T=t`_RiqmdIj)qidsC~whF}SeUI+Z
zQ2wT%+wTyaQ;YIRBdF!;#3hf$i++jJUOKnki;^y?NAPv#$*_0{4+(iSr{KLT?wZt-
z*A5t_9;=h|9NYb`7S6l`?(%x%&*Q+_I(NuNA>djrfmonm7U<jitd1G%t9twrN-^Ad
zxvC)|oqFLABw3ML-GJW?i%z6V5~W0|<6t?dXM`F39FUcLZW$(=RCK1Lkl)o&sWXVk
zzV#qqBKTC;7gqHT_B!VVMbSxCX&;baNN-+F_d)$U05W^(?voIcUiJ1Dt+;L4=WVYk
zD##a{Uwz<8c@bN&?iP3V5D43xba_uOm|k{WpKSZEeVd0ydf(};=K7Ufo9v&Xgv&ga
zK|`yicY?!#&9|~JUSea26Tz(89ie<BI?|KGnraX}x^dpxi@J7$jg3g9<dTxZCjw#%
z1ALco{~t+b;nw8;{%w_#RFDp(J4b_xAWTN-=*|ro3`VygB1j7x14*U3VZ>mFQX^D4
z1`G)aMI@x*^V|1%{(v1jj>Uby@9TQK&a<OO1p&+wN+Q`OQjnyYoLm%&gheKCb3N-8
zuVnv}uw4z>1^~u)Zj4CWCUT;=OnRr9pZjxeA#1*OdLB&pj=e7xlThTA<cJ-Kn|M0_
zGq!-_qnS*K#I5WHn9<G83CP-4aT7tSdy;@ekO^KHe2gVvb3hM~hH6lz_U)3|D|<Qn
z*<j?KVH77sr@F_8@dR!>#%W-lx<rhe@2tUiDkJk*brv+&Zf`QPAk?T#IC)I>+EmK0
zN;#fFI+ctrFUKmoD$QSGrDdw)#C=FIcx5Lg`t8InMs-LgOQKnv^f*7Tw)E#a)siNd
z2|j*X!bBo;*a}+Hw_Qg6F6ZX`W#6D!>mcSqgwQMw#Ak*v^^7!~_d{Up1Oy$Yio16c
zzqDAaJMpHMv8-Z$woepWc?y6R-W)$%M?)rTUv#l_g%G}~E3_h-x(z&L3w&&fm7fa%
z*&1~ALW-YrJl%0@+qFBx*Gvk_y&jxYc#fIPX~|ESxfPGAz*;<7v}x7>#;pYe{}>UR
zd@xoS?~jvPP(XD`gdCvvh(a}FiJ`yS3}B`29Cy+3VDmmrEyZetw<^ARUZT1=ERBN{
z2?{NNMdaU2hqqySB8p)=Okvid>^nz$Lku8$L&^1SUJsgxW#wnOe-ab!PTW>8At=4L
z*&`zD8S+f`#|QQO$33GvUrLMv*5(d3AKB;ViC&hU6YI}EfO|but}Y%L!B6L&sGp9u
zS>HVC)@&ccD>VrGcO#JG78<^8zb@%YeS5vDp<uvPaY1fzHFe*4wcAvTdUm@@57&NZ
zm=qxR_RW<a7LZID#J)#;o0N}n!hTZ?fX1D|F?VL==PuJ)2Yx<nu$FG7kI`JtBPDxZ
zVj)N=6z*SaLP~pY5w&lKWyIYp!1rw8b~FVkkOI*&hk&ReI0ZD|!@Pu>RA1@bm+Gm~
zh3#eF(G5)vx9tCrQ=H**M0ZAiT=^P)?8*!z8PuN-W!*1<wtq0Fy?gQV=oM?;eZ05`
zK=^$90O(f*z>kvlE?9-vR}Jv&HXnc|A)<!@h?{4ksGGA=A|voB&F6HbmyBoQ%}~KP
z0{7)5Q%Jd#8B=ZS-ycmpzW~Eyy(D2-_~&zRimnz@?w~;})}KMZp<9y$rwZDYp^VLv
z33g2)G4^9Cy^PTHG-L-r-c_3|1_HF<qDCB#e*gICX|C)zI{;#k<HK(o#u7*S>RvTJ
z#a_06J-I&a>31mz`O5#0()^}4KLvv(L5@s5l&trBj4n^g0bt1PfVcNDUB>7R{7a&X
z_M<yYSN;t5Y2w37)Ll8<MSl`GvLO1=_tLrK{^vo<)Rep(yU9<-K?3q<!*7knjmFiu
z8`E}4r%*Zw!m}C3IsV_7#9fA8>5MV=Ws|@;xN8c^Vt?VbZC3C3-;H_Iuz+*{v67W|
zf0_F0W<%dE093MPvml2_)8uov50fH@$Mk-srgn^pfMp3-`_*4iSZcC9;r)6FI6?3>
z@md_J2h(YW;WN~V#a?W0hMm9k3xN$fiP=+s0LAb6Wt*FCUN^tU^(`AST(&$aH*jM5
z`qHi;!x}T{XLaCKq4WJg?sC)T1>wBnSKi=Rn#Ik93;V|#+=<6W4kS<QG80=8LK44`
z)O!4$_DIWI-}S85uO<kjoX!5&$++oCv?aT9t&DMXn)9ZZ!dA2M>0#`Va=#j<nokyE
zT1;DH=-8wUO{XRR>?Q_V&hj8q2?_NFC^H`mPqTZemz|#fkXS=UoeOU~A3PJyrKIct
z(Nq%@^4)X>OoqvKVB(TF#KB3bq;A>VHH^*uk9;=;IlG&%)lQ)4d(?3R#*ng^=3-Om
zL8oN8G}n|*=-!n)^J;@<MghmSDZNcKG)${5Z(*dYBJhNVA$^jfdoA$FVAIR8eX$*5
zwBBmgNH*Oq^>bIUuw0~=?QwD;lc<{Ye>d_nf3xO5J(FG9@5P_TD58^YmmI-9gC!Fs
zY(&n8LWZKru34_hA4~=J1Q+TNWyUEBi6UqxNLUy&2#&B=+p~J60x78t5f^kiOBLTJ
z_ZZlG;M=jutXL-?iiRHds)fjzZnNCrPj6@V63~qq2DNP<%g*E!zs%ln(J9zt*XiMt
z@W`dKGb-G<y&DF^T+dbPgSzikOg?FJDH5sLF}`rYBZ`B4F~-_L6UjMATK*@yuVI_b
z;)dZBb?Ia#hp_dh!GNja9Kj92GmboZFhuf|(8`!%l4|uD<WaEQzY^RgnZx-2Nrs!E
z(MGri8Yi!^`K6b?z?}6~VPK+NMMfWWj>LLYRV8{eQ30PH!8}~bL=_u5vW76llC;x#
ze~c3*7oiRq=pJu(^?G)on|S4))0C5uq|7nV;|A^SaHcHvy^(+YouRA?h5*voz)%_V
zm0oeYB)9MVz-?--dv2+xo4UhabVvx%f|LIJ!3Z-ymSHy)q?&hJWwk!x!ufg!J}p(W
z6RuNkKD#E1gby2`w1D@HzQUfi=nY7uTKU(*gpq(ZCjQ<_Sl9LC)!4_+wRWY7Obs<G
z<ZQFnxfdXBT*R2Xfh0`GB2ss&sFM?@`&J@~Ah>NKW+Mz-FUnxP?{7A8tC8&`i>ik=
zZE5wrh~cEuPr^BPoE4{;F#&gdma+T23ZE%c8C;l|)+}655JPXd$UE%AG*cfoCb(d$
zBNn+nES#^N&Sl--KZ^|h{vmgoO3&f)Cqjv|4N)lMs5KB|CT*Y1?<!vnn;axK?;Z`G
zZqRyR!xTI9$%8t3#mzlY2JusBZ&uBMRUS1aBdf0?(0<2fqU@>fF=!*>px@@(oZ8tQ
zNn^?c$s$~~?Cs=ST<^DsEBkSV`}PZx{PT<#dqi3<PVc1~O@bn2O6riqG+22)^N@<N
z4Z-UdOU6{Jv~K)ox*ArDS@F?GCKbwTy-DPTH~j5cXzbh9^U;&#E|C*e{xC&&6{wO$
z%*bQ6@u{r~g+Ar#8XKGw#fjQ}nks9-El3XrC&+%(2@0wjFf0!%ELmbm?J7JhpVXxX
z@Xs!-Nf<Lv*$I$WOr+#SU#$Jb>*XJ*rJUzw^-nt@VM2{1%Z#xiI%o3WHeg1!-%S_M
zibJ9iNcr83X?KRfn`d^1W-B&boRuBK%(h;j9jBGGwEtmb-MF$}%EJs};N+tuRc!Ye
zUMPKoCs&hNHjV4rb8M+b-f*RT`+*5pP;&%ZTGUGUbHk?w^|AXK{Yu}v;&RdEd(88x
z6!nw=opHuO4t0tigGHU;6I^UL2)=ykcJLsjYZupGx?tI^!<KWpuO}%r7Ua3`j1L7(
zx|ule)>COr2v|GuTu0AhI}Mm4lZ+0+yHO_VDpoB&XG=YJE9Ic0x6~Q;bKFz#zZ=`%
zD~?~U>)s|Ko<r?YN9;anU18S~)I7$*(1L&b*ElVk_0lKKq_-E66Ki=nbUQR#D|QLW
z>>ISD8K#ca{t84&Xb2Yw93aLamNNssE!NHUX)R6_LHR04#ABC8KVFva2&0(n7?>}g
zP_*MwD9y9N++MV}(iA0_W({2aM2mRkgQt>pm&%C#@=5ITnq2=7rDGaYruDbC1YEmQ
zCX>|HZWGF+uJde(F+T!3-&#~ZF1$2ue&qe_Q`lr=Tcr&6_;)gwDn(&BJg%a5BQ=^B
z=~F~+!{v0KD?{0r+3w?x;r3Sj@F_^dr1HUt2ubbW{ycGqi}$MW3aB%<`fWAkrCj94
zdAGi!53BWX)sI-ECL3f=8K@dx>}T)s*UjKTo#VKKt$&ubUZ@Z5i{|p*X1jY(M7B1=
z#-U}jL~MHd@?SUz`s2$DhO2{4UJs1F(#&nv27B$?`)6iMvg0^TU@~@vF8`N{%%^+7
zZK?i(jVLbv1V<I|L&2rE`H0&OSL`^y&^E|TWrSprc6UYhRt5bS#3OZ>1B<Y`?-brr
z+)M6}OrAcb^U;Q$o73I3+e32-AMg?G;9tD}FvnOVi<XQ<&dD;^`{4R!EtMU)x>js%
z9;O`W&PF%_?WzGo=z$@gMggY7_X2jl*}S&enM95tpuGg;Z+dTX4tHt?m577}XL=H7
zW`(F3L~F?;h`1m7x(&T@(+6H50~m^x5iq?2CkOCu`tvK2YD`Z(t!U^HnnL}^9G|oU
z1aPOc3VE+&f5%WNz+a)es5BRcZrQq8mTP9FQ2b~nZL);Z#n;KP8;<ODDd2|hxwnK6
z{0yO^T_~Y#&E0cXZbno_C)YWYWD6@_sd!r|-}+OCGA<!#6aMK|cBb|Qgn`@$l9Kk2
zi<Znc?i0ETTvcpO_din^>EJ?U4u3O$R~(Vfl^VXT8OoUX*E{|?i{nKg$45&N#^Opx
z&yq@iU7Aym@UmuopcXe`Y^-c7m$C6Uk6d|f{ro2i1kJQ_x$dhXmV*Yq{&Z50>JZ6w
z-gYBBS!+C}H(RNEQ{elQeZZ!%TZZA!>2+?Gqe)Gn^8!7!-b<gz%>$-<bzW4N95S-U
zBMR;>ie*M(>Jr1W2GplgA6^}*ZgK5@FJvu!krd9gsoeqmK2^2Rm@t`CB{Ze~sB2j)
z*|a?2CJh1aeKfL%^s0VIHjtm}!okSQvz=rGItV%@v9^C{-dcYavhDHh=g-RNoIsYt
z=y&>MY2G9<p~gXE2Z>0QKSS%H8WxzvkDbSzWubSU6crJW)O~T49)a*zzPWlwjL6ii
z!vi{TE(2$iz+~eBkBWQVIuUpeN4nBMueQTA`2lnpOyZQspI&xB?%Bk=gMkXcFeawu
ztx-IAk!dP}R!_w2l3yY@D%0T#@4f>FF;XWI*r!^&baWJqZ#Ovux9lH8z>c01dVYgx
zEA7NeNkmd(dyNk$xzr)*J*Hgu?q7@j6NxR-Fqzw1dK-Sph{VH_(e?$*0$TvO==Lj;
zt{H@-uO5(tMLsnTnEWWjzVj@4xX#vudZI1lI>_}YE;l0>fvD4nqi(qxeUGR1KqfR-
z?Fm;ZWMgffQ^p|y3{7C0lg%Ui@ZU@_^rWX_<IYnX1^%F`lYL}GkB5jy4N<{M@Qx=8
zV9%Ohqi{YpQhC2%A}G+DvC`RErxfHqXq9EG9orVTfP1e&y_Mtn7m=+Dt}DnFOLkx<
zs9vASPt9>vw?$t2B!@y4Erj}h8i5;;%=MS96YUq^O2G?pV5>7@?Lw!&cox3E*2nyP
zzBKsv57W<iA6g{uqQ4&|BlYQ~mHsvudbn_8vUzs@tVUz$1;^uGGZ`zo!HWxxy&Ryx
zLay{Gz0%!g+=J^EW}LBJcl;3;DYJX;godh|Ru%WYtO2qFVbBuL?F9Ct+RSURYrnSj
z(U&!lA!i`n^P;vr<XE!<VyIe7GUZ}7F|kJOXA9*)f>BbngXd>-_U8%mf-<%6CG2WZ
zfuU-BZ&GFjw$SP8{gRNaT<v-z>+&+24D@C5^NPYzdp-R$6J}xxu3bl=mg4xfj*E)i
zTn!AEpVKn&@n6%_i?Y2Ks#t>z(}u)XiaRUj2PJ8hCPOwHFBZHO{WO_NDx-OMbZovw
za5V=U2?pu`3qsUMg>p}fWwL?RYY{daA$(<95?SEi;$rY~VED;Y`y`2Dg{(VPa}$Nn
z#BJQap3{+NX7V^-LQ0g+U}HK^$^{AsOi-Z(!#$Tl3^+Eti&6>$8=QLTYrkxpSN{xH
zP0tFf+QP)-JZcuV<l5l;O_L;}*T<F|f~n2^93^O$6X+HA&RSs_QfBzVwmNsAA+i7U
zU`Osj&d(`l^QG%EShJ_)lfD^URC$zHWCu`aB$KnKn1@X|532m!pF>&sFKFxij=Q+s
z@}jGq;_FO~1-S`5Kx!4yD{o{&I9GD{wxTDYye%PUtThQMzV=7q{_*tg{du$B)_)<v
z6BvIRbE(OvY~DG&{uzn<Z>uVHV=P=)4x&D4OhjEK{dWTxRZa???Vh$>ai4xW_kWOZ
z>A7|tI&t~ro&JUf01O&43@awVqke{L9!+M|bn8fRFFsmCX}n@YXD$WPA1=i<*OoRa
zWoDjSTXxn{ZUjcB?O?oj!EM{GpBIt7A{Wl1f675hn__mi?BtahDRsHK4{c3c|GEck
zh?sWZs6-}cU$Qmqbh5pJS5p>e0h5g-_>$wAO;H1u5pvOt-P~=FMBCgzT_US(QElBf
zZZCB4BP)>y==wYG2yk7xPE$PofX}Kx*|lim^WCh`>3bv!WB~TQMCz~$Gn8uEEuvc0
zg~vp$y<WsEY|JSmi#z^Rn8#@4II!+_$yC))Qz9{<sGdBZQ|Q)$FYYwQaNIX_psZzg
zY~uo17t30b6+ud!BgkT-H3N3m1aq15xu!X2=!5oBge#nPnN4=e^atj?U$i_<3m7rz
z3dP&48^7dB@-Fd46Z7mTFc$YKhH<!v)dV-F9sbGMzt7_690QlTF!VeICP&gZH+dU3
zgsSw5(`;2N+5|tszK^7$(+tXrl3I%`_FUN(8%C#4DJG`yAa$$Ei~n^F8JF0)U+jgn
z>TY!v@;RAO<LG~WP1uA)Ai&!Z3LDVTiTeokFz|0u^I>PXZR<**n9#eQH4p^9gf{GE
zzRJ)41sLP?5!Mgk;IomPNipN1FnF(;zkXL0?w<2-h7Pp(R*JD%dH$YM`_Sv@`O)<^
zzXBPAuMWIO6yA;xIRY0h=gHcq@1<8=JBqqk1J(Q*(t2z=giU2o{6##8zMtMU9A@)I
zK#S<cY`BCr^Av7|-u2Eul)05Abt0#wpWPr&035hW`C*b9f_7v0;RAhA``MD52O$8q
zxD-J)B0pul8<p;msH^qRM%?o0_t(3dv`wxLIC8sRq0;6>;I(L$!ws9soaFcKPosY-
zzEce468#mh{O_H@P?K*++SS2wG%6}W@AE;Jd?Nwlm_m@Ero}Ba@o}?LDQ8Pjn$2@Q
z$C*CaZOa?s^^~~VIYfEck@f1g4B3;8n@zYOk^z>8<2QoPU&*WTo~u4&ls4swF&i-C
z6v5n}4ZW+H`J@fS{A`vCXfp^5r~UL@-8Y(&+#m-JPuTF|_Yr^$ao<MHCf^oHCkeSw
z%z7()=_sd`C;QpFBd!V7pYX~doWSEDGG023TW)Z2bQg6qM0XzP<Vpz&9sUbr!pZZ_
zssH|=ozAq~znkr2kbOsYS90)Hk<i2k=R*d%ia3BO_og9ygidlu3fid4;#X|S!Pr1*
zAI;xvx(9iJozeC@8A@qJ@^;;xv*O~{DESVG)iu0wnAOn#Lz;p}nP9~Ylv77~N<7{2
zOwifMJ}y<sce24h5=_rzy=q!unBx4jrVLa$tMZQCbw@iMUyT<58$>#$Z<k{0BSIBj
zO-(FA6~bnjz-*`CHr*SU#~S5^`_$=qAZ@74pa<d~UYV_r(`PVUU~)%IP_GIj{<jbM
z8imAbcfJG(@QJ^!LM6in7xDHgT|tZ)_I353Zm(-I=hMDY84ewM@<&uqP;t(Fw#gb8
zIOX%-jd<jXZCY>At#dyYOFUd}K9hKnzN6h@N{o>CO4mNwdv^y>Ug=44d**E|=#%fN
zTd6&=&oT_CpzPm`Ji|jLIa@lY%6w;r%PT21)vUMbON({Fn%)@;3Igj*7WYsQ{DW^S
z#@>#`aG2)b@uc5xug4bo`g-d_|Ec9a&f7wjq`+DJjaK>v9fQ4of3sf&J`xn&*f>5O
zF&ar+3!)-H9(K68&35xu72g(fBn{~qWh-({h98`bwv`2}&$!nY@Uj2l{s8wjvI=k$
zza*R4`QmJ2A+{HT^hjxCt&JZGXqg=IxtIZ*jmQ2moSp}SrK5R(CFg1LCu6FRMZd#q
z&||mqQ0333=j*%Ju)pT$8QSC&##%!C*cT#A(w@fgnY^?{W7&=nt0{1pn8$1#^wtxp
z$(7uat2y`k3Zak_nrTp;%yU0G{5L*i;rPd>jpaKap;t~fQ6KoTsG{y<H@Bof<CCi)
z#gmr|_4S8eq-^FC4{XmxZIeQaY$7|n2#O;m2E{76FDl+j|Fq4;XFmk{rpUke7ks|H
z>{;lE2Fry(N`-|BKK-?SXfy0;He%4e6X;t}ry*@?!6lf5lKm&QnDKh@{HJlMC}bPr
z#;BlcGJ?udE7ft$cs9}ZL?tTIyui<}eQ+he#8bf7Bv`2%yESYSsxy@<cpi!4=Ij}I
zMsvYSFX8Awm9#0bKHFPuy`@>1iSe~;A(#U~K<-kMH#s~m36O@GTm$10y@~X;m-~7b
zoTUci9MwU{qc#GyLJQQ^jH04J5n+4P;DCgw{r=wS*ss6%{E(D2u8R|IwvurlHXP~q
z5lL>J@{Yx6-j-W1Z)%P@gjZ#Wk5`&z8bL5;e%(S`UN~`u154kgPZfR0J}l;Jw12Xe
z)@#CP%DJ^l^BjFNteOJw?BuXCFH>tLO<ju4$=iQRt9{VhH{S>9{c_WtLi3mD3YzK%
zJrk}5I{$a0vPp>jYa|M~aoyhE8l7Z_R>>TY_{P>AJc6or-hOQGz$hc#dvPh-xSG8y
zq4Yg$I<ofC)xl4)6fQL*nAm+%kD;}ihq`DF=y)d5@8(q6$$`3=hm!7^cn$Amn4}rQ
za`lB0S`p2yv+$}W*f2VX;&WO>_r)qQRBNFnDUVtsQ--QMMn1!ADp~FOzZ-uK+afZ#
zdP~=Zn9-$sjW%muZ~R7W*F?GR2hDx4<1+f|=Benx7PkM|%TcPE6GWcu1U)6;bd_>l
zXb~D6l4gzi$5#iqY(!I0h8%r?QItUmdd2Ic7zEv(xOg;)gRSxBc#MOSJzrFQJyY!e
zE(R{2EH}ON;pIV9v2CZQJ!E^gN5l5MRYdbmOvQ3_OBQ@$1mk&MoBx?E!Xg-jZ8S7j
z2;f(keTgQDbZRe5r<dy@%WO^vJXH$Q^(2336=%>`klP)oc6gw&I%%CcY>Qj-$Ycka
zTdN=J55moT9K0C~O0v%CU&)XoUGA&v-~)L*)I%T7SL_KH#ZSD^Sz0$dG|I;832oO8
zlSx$so&OAsq}FF1)lmfQVqsVH%7*K?1A35=uzx3AS#4zO6OGGqM~fC8B8Hz5(=+kq
z&Dg}3jftKvx6fZpHlGZ(B)1ujOiC$8f7~P}1`JFD87K}uFJC_GpT?>oZf_SGl_?rs
z1dY}Tj+TF?YX#aoRL?qhZAdA>s%g8(qD5xUHnKRM<lg?2n!9{)s)6$$kOo{qNk_1v
z7y^d*82H9UX*$O{C5Wiwk-9+bz?E&{;DdEu0_wv^J<?v<s`UST2yo}h1qB?%D^4(5
zbIOX0FwZL$0z9zU^w?I@(){aDt5rndC3Bc*vW@!LVU~OS(4vFMS?Z&GkA{Fm9*Rc#
z#f*l)=glSBUZfI<kZVyrh4J@;V1K)Om)9TFtDWY)JhKyjmb9NxQaD~xZ2kCBb?eAv
z&F!bg>`R+}0^1?x5d@-|+T$Dc^SR75s(38FL&W1J=-q6rLUhn4E18F`_6l5-XV>q$
z0Cy@a86_f|obR7l0x0wFWRZSN^z93O`Tf=Tq1b4$#;lcu(e4~;(H$d-0T9OgTpsSS
zFaO6Lkr&JMApWgXwpjI0rV(HnfW_nOX=+AtnrHxp3<TADNOX`-IrOj2Mu11kD`m}W
z3@gL@4QuR5tOR984V89V+O2FWindM41NT{+0lryDyLN9)CSCJw2oDw43{Q4*`1-uE
zYv*X9Q(z_@xsRbAM+J8&c-`2$Y`{FNi~1r+3Ym-@mYIB;RUAwI{;OWDtD{K)$N$je
zouX4r#B$Cxf^c773oJS_p<yeTOd_{&WWmSpg(yPcV+qYlbjtMmycW6mYJr!Bf@D^#
zDX)<A95)+!NpG#qwn<Bpg2E+piFsUGnh{+PH44HF;eB2kr7sX#ASF_Op3K!~p6|(T
za}H<ryHXJCI&vd}7#aa7f3y`-HR`Ws`1`y1@<F1q(5_oETIhRexs;AyL22qQ5BU?U
zzpR6TKXH=r0ab_8MF4Z-nN51V$hNd%M}v7;;AoEU&Y>`M1f(BJN6kAlUI3s%`b!$V
zvV43hFE}h4^;WL;QHs^rR{xLD_7HKQE!`j7BQaaIodi&_tK*bz+8ks`TAp<AJfrd7
zly}o9)`6e5Qlcqu0!{cdO7sk*O<`@}!RKx}b{6fUQP+k!;0$`fKbHt(jle^X3LSQ>
z3L<HqU~6k)n{^#KRa>TfYZDYfJJ^`Le$*>CdG^Evgef{9{>m}bc}U~EHL=4v`(iSz
zI$|U#04ve<fwFxV6p8B%&mI>Nj#bJMK(Gr!n4kus;U{1HP+c?!cmXzpuzW+M7g1jW
zx)fUHO|bJO)1}*&L-SgyXyzV#Msu_yP%;oU9lpHzFtP(7=|7(xs~2pnL^zv)4eZ~4
zEmgMl-7H<)vkfr|_**jxlMpt}6R9@4|KE)sf|`u?+r{Z-QbHg$eQ3!mqZTS4=P}mJ
z7{jT4bx@Tu(rjz7XzHXQMh}Sh(5t#Wb={3UfO0~1axyI~q@_$gntvQm??iX2?Dy~e
zcSA+&S?#-vXp?-sw+DNdJq0l9VHyip57#Sz47d91!C03B8?#*}G`Oqhsa)Lk&G1U(
zu$1=R#>ST<y-tNcT6du(dB-7G;()hwvAfwWy3RKcuso4^s&bH|$wvFhyaYWkT#fch
z5#435n>SE=BOoTIp>z_LpLdAiQTaw92M(nEn<u5b6pFmJF)-~nQz$N)I>3k(U?SA{
zkTboa%5HZr8M^lL#PtycKC_T(g(unVIh}EYvC9;aH(h%Ya}nzQ&~HK9%Vz$`%q6#%
zjdNCvY5=EDu>t#b0}T<L`5P7d9Qt(f1^rZQ?aM>+`LY~Xv)&8-!HPZ1z=dzNe$sji
zjWS9*!3cSb-AlcP(K`T(v7!Z6H!uvSm+V#MV|Vkk+lMdOF$EUDDX95)%5lcPF)cxO
z$LdZi>Y&_7w3dRe16_{vGlTDFX|q6qn+dMSlr>i_uzR|YfpXTH?q-0oc}lRUv;J8r
zrxKxY6K<w%$l#tPIHDbQT)KXUHXpAJ{3}>Gi2xbX6uS}NDwQHmtmnnHpaE;GN73F5
zfjscB!;AM}kaGyq`uTQ_@7IbYx2M9gLL6kRhuuFY5H}O6Z%4F$)|)dKAYF$iahgIB
zErQN4C)P8ZPpFxIZ*C1Ev*4#J>2DfQ5loSWs={EgNMl9jC4aw^A6w$+#`dWqt)kr7
z!%=A*&Gv#5bB^`ygL;B+{&?G4UA&N5MvMP$N`JUPXBYOVDHHPd%qg}?D5~KRsJBUp
zHjpSfSP~GJG{+;`EuW;5dlC+v2ItLv3*ZspPr-b77j4xm^CF{Ytg|rSX9>#OTMmyf
z2+my44<^djNRQ$-Ebpeg<IJxt+Q*D_2v&az_;mn?um6?kT^Vb(uWW2*Jv}A%(9el|
z08o*W@{rmX>)c0f@AEPgFj2z+YFTvda;=<}qH`&4%wsRhO(n<#e|08hf~E?cIP^p9
zYO3*8m}<Ij+|<9MZFX_cvI2_VOJX{SOijA45gCmql1NY_$W7=8Jv}k!Fvc*kyl#|*
zNv?w%ckQpI4_xS8`Bu^$9*m%GbJEkN(8(;I?XEkC$~0Lz@{sF(`*X(_^GBe0Q$C=+
z@UsmIqx}iVl3*M3R}UrmlFpgoYT2*ay;eDmy^&%*{H*^Vr74w0vtrnoYa&$MP}n#y
zwph?gcGA>->11>*{DEH)DOvm*vxg{yJ#?Rf{YVyRSE8avAWfM|4rq7UP5(VCGhEHC
zGJ73kC^FpwfK3)Hz4z+2kvbhfewqjsXL9D-X}W|ImFKfgXz5QZPgy+LPkfnVVCdfH
z)%~^yXBhgxQvoX1UKkOK7<gzh*IqRg6IS}??`|e*>?TEw__#sUVB#QBC`S86gN;KO
zthGc!DVed}!0Qt87a~51pOA?B`YDe+f(G=P9abrxDxNMQR9RS|L;^PXyJq+|Sb>BG
zs#(^G!L@wJ3g;!|VGMrU{JUxWzwOLVmnej5*=sp9iDB0r%AwHbK?*Sb%H1~hU&2`(
z^Tmvb14dUDE&+-pd>u=WO~^PxM6<^rrR4kaAH}OdeX`QBiIi{BtTkCE>zo3aD2zrB
zy>AWc7j(*=>*<^)#kP$8_LInVl28-<YN6^~g!OOZfIz<O3Fc>k!+RpSd%|xX82}SI
zadYM2YLWt_3X@;2NncVHzBSmTBq+Gf7NHbiIle%lrd8K5+$nVLM?|o>okQY|-U5$E
zgIkq~X{5lfZp7?)!nf*&d+aL*WPco=ltG&)=mv%|PX@X;g+5ojrzHL5lsL4zrk$c`
z_Tu7|c4K%EG}?MAeLM}Pyrg7y1Q)Nvtb7}izsEM5SMOx>vJvQ0c*!UdarcE2B)!R*
zO)nMdsO0{yoIwbqH1nqY(xmUpj%x>QX#0MTkwgD;ocSA5VD1<ZV$}O8dvX4gfvVLj
zXe&o9r|=2lPWqD{kU3Q=q{Y84RSN03hoSs*><HoDHF>K>P3UgYf#6bWbec^sjeHt>
z2I~}HEfn#?IVIrA=km2s_#KsLP6hzwnESF*bQVADLJVDc(;Sz?y`Zj<&>N84{x!jo
z#<S*Xp`7(`>X{E=K0k*d!<SY4d+&UE+q3;vao!3Sao>8(KS{tNsjsnx-z#niK?yB_
zOtVwS2G>{>+hjx7(BqWk_fG*7n7lVR#k`>+{Nq^|awE#t*RJb)F_aAEd~rulNL-VZ
zC;PE!k5AKO^6<BorP3ZcKJm|nz(^;JInHra0PyqByb@%NQHqtFMnKs88%LVceSje8
z-d)E@!x!Pm7cBr09_@-zdJ?@xmA*{v2hdv<M$&!C<(D6D{gihTtMo)~nQof$7Ce5|
ziUU%g{OWF~EDRvD-TOQH?RJ^=2npLtn;n_&o{G?O>Ho=X%?p%1L((pg1|5<q(G+`T
zB)FS02ji7cj6!*ELeNtU6A-*lsMpLAv}5gkw3KNhgOgj0vkbC+^HQ_f+ce9$FzI)k
zIuK0i0C}7Z=Y)-G^p*?S7v#$kW1{L@3&gV5z%4ewW0Oyz0}V8fz+asM`Xo&28j;q(
z)zyKw6^}q>c|^osTrsJy%0HaX=FeB`tKu(af7gPWQKjgs9Z1L{i|dN?%&~A{0z?^}
ziRlQCnN4o)Syh96bNdlo<eK<JqJ#04z;woRt~BiaJJu!b>GmJRg;RgFJ@bEEk}?v0
z(^AMJqhWsFhNw={n$7(C;MDiP9>F5LdNZr-eH+))$yE<Ayss9WIdl?x)#N)ciZ#Lt
z3-zR}t`Xt)u+2ulBt0wepzjY5XJab=o=obqH{P(`(G2*mTGo}EFAl8o7OdoI18iRy
zO8y*Ur%Mpvm%$;ZO@PLq3nvzIJA}fs#}rEo4d*y%<H|6dD%WRawH$uFA>#hY*>cT#
zrmyQGx>aTl3{^>wt&aaBpCq?=EqsIfrj^gXNIs$RXjHA=$U&{wrKlv;nVco2?Akg_
z)0pl6g&1L}nfYJkMlRA5te@KHwiU79i6Z%2hWcV}g66hgXOOrAU+w8;h%s8mT(wI>
zI9cCy83`)gn@U6rvgeU!`VtG!CCA(hn>Va$4i3N9KtDPa7D5n06EKv7Ua*%e=n+S9
z<;DAF)42CJ)@(TFB{WIT3NCr};g3wGdBEx~6)EeaPN|G98(UL_{~o}HQy|~igkt2j
z`=&)q6MO29K3kNSg$nEWs-^w$*ksO}6Ytcym&+gs?zh_zH2qzY&a|pHM3P=M+Y|pj
z4;O+m3aLGASxYMIxuo|qm!oC6!PZ9eL&9UvrRHP0z7yZ!72s=&huEz2Xg(-M{Aong
z8phB*q0Y)$Vxw^yJXMKBRT}?ygM0#UvyjX}ZmtV8@B%^GtR)8caVeGcb*@XYXCukb
zi9BHEjpg3c;6QtSL*lxveY(g<6F4KQ@$-9i8_w0=E>u(V0WU~NHFe6JFB=tK{RH?F
zdd9{i%8)#=0!0oA=$O_M+)uE#9gFxn%swNd``PP5u}5<QGs9xVFPKGL^Z&fIc+jHq
zg2tMA#H%Zx4FzT&=3`YPd<`gD@Ux~?n2tWH*NOHeP}W*=7>D0r@fawz$nl!dyZF6b
zwtLt9!h#?th|v?nrk77O#n+{U+qt@M$a&%g?^f!pyPWi6riDQ&7#OFEe#KMBk1@%z
z357MnEx8QAMhKSZLR%&!y5iEX>)o$m5ZFbAg1;oMG7rMdNjgb!m7BZD{qRcKX5hXD
zE8&J+W}m$Y0Y;$Fe&H;`mXt0=X!HD&E_6Li)Fen`(~!LO`oZHl-<LThOz|xTzbTcI
z*!O))5~n5R5;(m^_ZQ$f2aH}BKw$Bkm9a<e-p7fl=ai36vOy2KZgo@XX)1<zkzKe5
zuGez<u0=n~zJ>k%)9hJcVwh`&^)$!LubUh#gfFb;O`ml88DxUVvASdCjB-{%VE1FW
z;XyZ}+eNa~*!`<RG5sR_#(2*rC85~e+~+J%Z?PAbLpdPxOJWoTCc->dabW7aEip!L
zfD7a`A#C;dDs=6OgS}8fRj#DpFX<B3{|6Cj?9pq1l6$W`>4x!SYqqfz<*)rbOh9a+
zTGT879+UjQf`3`u-_4kBZi#x68X!tz%1Nv*6q&iRa*Ap~UEhRPv2~c8Lr1C<YctLK
zZ5}#3*RjIhjve^XO-!CSNHOVwIm+2R&iT@HEAWkbNxpjNUW$5)=dt*Ul!IODQUAl7
zeuWfT>so@wCW|%0Lv0?e=s_vxdffnPzJ*^XN%x7WQw1r;$-!+D$b{yvXssC!?mZz)
zk-aYNeV3W{*+A0MqT;ZHYCI&k2y?f(iiiObSZgiQib~H<26k6!CDBZ}rP`_o8+uaw
zb%_tWX_fm!nY(h#Klir9D0Xj|M$jR0?RpH%D;8RdOPnRZ;CY$k0@(ngblXIhH6mU9
zW1uKAeB{q+)0jYsCeWt*QF6srUE&UCE1-PZG0eriz1OzM_xyud*!|@0XA>Ig-GrXJ
zqxn-=Kdp)n#9MdZviUb2&u7Lm4SnuY?fo77MN@mDSi9@T6LvXyjO%VXdlQ4aRFQm7
z*k}`<S7X0>^ym`Gl;MJ-pd!6ww>iCtd+bQl(mgt8B!qG(Sm{&xTQ}h?!)y}n(IG-I
z8IZdAA)>ZOT7!{SI&b3N0~fca^(;h09n25_FM1ltH-M)Yf0ez2?-Y3*GXHrCFOxd&
zK6CS!q<(Ic$Hzq8U=vm{I<bgd=c3BbBNHb4LMIAeENgl0$IfRuMf}ud-ejt#?C2UZ
zQ7{7PZT^bkjabo6StmoM*%MZVW--*38n3zfNOR-E_d#TV1b(?5I9HL-u?#IqWRz#j
z+Xetm<w;v<i&z5}^0?foHg&zq@5A9yf9_n${d{`3a_DNCE~nQ0c;d=$z2>cLxClWp
zh&29)uBGBycl~Unt#1~?J7ln%{X2dylvasVyK22pR+|>5eTTF($>NB-ofprBA?LWO
zPdOxD383zvXM9!cI5Y=v9m0$MUD2-g2BtaAz21E#|15b#kjXO{43PaU;3$kwXy*NV
zQloSmJ))H^-j1fEGt3r{a-3&s%cf+r<e+HIGc==w7^ZW4;t~)L?n{a0{HCg;iG!qk
z84$cP$xq3cB{!;4rrj-iW-xLz&U92d&;8|S{_fGba_Q)P<n0GINc7OVZP)mx+8^cO
zqkkMk&YutZ{T}fM?aG$s71FNv94*~!HpYH^k^!10M}&tC<e9xs<udSo1kNyQwRI>v
z%j{kr@9cXGo)ei)QnE`T42ZQaR#Toi02i1C+^8;pR`=!@wxoL=>Y#NyWs*oSvebtb
zpHvhjH8!jmW50zNtItN^r0@epo=A^$69H#kFPdO~Ho?FCGt{Z|KWZ1!MK|@2bHl$U
zumFd<8`sP2K_>HZ8AEt)!1ev$F=IrMm|6MLTQX(FRpB3IsUJE~9$@8#eRBrKc5{sV
zzG)+SUL~)-)j{z_#mR7}5L%SJwngSF9Lv^3<oNocQPL$^nzi2@8WZ0>Q~5<9nb6O_
z$64Hi1C^6}yeQ&9G~0hS>L*(#q3q=%*_^E+g}2i)3-5I_e2937A2)k=Zz}mcJ~c;~
zbZc4TR&PrFU~^YtatfumkY7)^|NLXyf>c-z`#>s~{@1VAk|U<lh6!>^#SnSVL9==0
z{V@^GW7d;q``9%MNQ|kyHl1St?5)^*W*_mhvLEb2aMLd8uUh+Xn8y<^s<r+;Ju}NG
z!@+Fnhv}Hk<vPVID4fuiH$78851Z6kVq!503h`Gux&L{=DN4zV$=A3)s*c)Zm~zVw
z?EHjz14o4kNNUaJ0M*E5fMZzqI_HsDGH+6B^9$L(RnS?UE}ZOTSJVhPb)JA7c8|=G
zS(K%?@P6(y)3QlCk|GjGT@H87LIpSeq<*<`EVjMV!EHCcG}gZi`my=n4dKvt9vgOh
z7^gNir&6TQSqT&R#ORNgvH+}N*UDOVSQ6xGu_o`KmyDSRN_|tCB%=#I?V6TWp3m`u
zq;cF@Xp1m+60E1Pr<Mrk3~Vy%Zc`73>KpW{tZ4x*({!>}?a#^@y_iyB`!hQh1|1)s
z^eYtWu>K+w)&ZJ?ZVnOw?IHpC3^;m@b365sNQ)@NJ!JWA+JIbQz*cr&#*<~hql@s>
zS_txB>}IsEc5WxBa`><TRvki3EDjBG84Me1S!R^7&BnQA&M72dJAcCLGQDf+v&$mi
z8nP^TDy-po!5Nyg$=QkCSN`-03klfrH0d4XZcDavR}`po-uh}lRQYZ-yau-c$6PJl
zyk|C(EnIzw3|C|e5w^IS#OC!WsK(U%LXn;Ll+_R@m%w)p6&e&<$rl_`>5T8=x)+AH
zaMAm4_}`7f);Q*cn)v;QaUx|tpKqx_`{$sY_pQXlQR6swAEGb|k8g%^N(gDsrI7i#
z{tpM;`owrs&*znrmH{a<0V6Rlc*}n`24MCVB9fETFy)DDCBj<<Lu-Wa3Fs!j)C=ke
zb2Hz$ii%Yo1@+=Ut&?*|@nE5$vUZ#3#NHlZ`%UBPlDR?iQA}O<F<ypD#D3<92R-;D
z=4%FWfMziuK%n#JX#V0Nd{cD6Mo1*d`Ll^LV+z+H24sG-1^ph%TqH!KJnL*O1a~lX
z;7z*P<!Mm88P^O3y17MMTuUxp3V~Q@=?q?Da1Z68Q`^hDljbm*9Y$KQcS)NHvj)@Z
zU?+}SiQ@ql8y6d9x*_AfN$O!qBHI|~AT^&+KFe91ICW1_O%7~M$V|JBnjqlsF-Ay=
zFf<c#@3h(xFEH{OTmXE1BjzQ2W>W)M5*IS1E^KWZM!f!Bu_T@PA%k%@VrIS!>%6*N
zq0)p|=pj{dCrG;Vt%_<e2TS*(?gp6E$4Qo{CPF}{{hS0S(dcj02(?l=JC=kuE$@os
zQncDJZzE!0MFZ(i17e;dR3Zet0!D3Em{lKaB-#c!P*jn}?}TiMWa$i2&NM-3O!h*S
zBy%uG7g}Yz=MM}x&2(`b_QhuLT)F=5Okt)$(X&*>wcO#ZSv;|22Z=8=RlWo?gx2K2
zax90{#M)ZDs*{m5tO1`g|GA;Q$}seA^6{v5cGxej#ns*$Q%Djo#$2Dd&!qcMwPF9e
z!LI&&8@0U*wt@DLXeMJYNc;IaW71Vs#R3`DYjGJc>)9`@LYWdQi>d74s1xW)lH$9I
z`St5HZmbl_&9Jsw(FOgpC9z&?{@r=n>|zbXsjnorpd?Z5@p&mfD^U&H$l2VA_}9NF
zz}>V8>+oA9?4xa-8L-@Qc`=#U5RmHgPG%K(7DZcbYwpkBRUH0xIp_B&@mE3ByN}(9
zgvUC^IQh~CXmXD%k(C5br%!Os+J;+^XL3f-yBDr%B5AuJ8syo^;g0TEaM2c?dfq$R
zG0ysfsz?zcC1q~{fZ#i1?qlc_B3`VDoO|f{R9h8XT1;yGNm7ebdHXAC>*YN<kjLne
zrI-F?S)*x^&mLnzk#vO+u9cz1|3xKr^9_R800llGx|iDDMy+ywn%hS5Gi|~rkdUQJ
zjFnZ%5Cfnlo<!Qc^NAzesZdvjk3v2BZm2x{CM7(t<?Z^|w7r)8E($OfNveks^4>gJ
z#OR{C6cWc`kxDmumz#LG2oiisA-y#>GqZ@4+VwaKD?yg?4T$IQntYSud>ec$&5mK7
z<zDrFH>?;tNGl5BFZ9am%N5NLjF#6|ABdM##w&o67n#*vD*^&{+prFN+2m(f^dNs~
zPh!j;qH)5~1pRspJo|3<d+^`g@~3q&On~+H;YSB^ANxCnw*;3VY0h)Vy=>mTWK#Q8
z>*Ukx<DnQ5N7aB--mq-d9?7G1N~__b6drAGY|o~Pf15X!1N;Q?0gMHrX@qSzLMYj3
zly1REFnGW`tre4yk1OfL^y!%mlv@S<l4*{Dwd>U2rbZ<Qsp(1@M?B?MB4>+DbJTg+
z#M^y3lp+~VYLxsh$9b3cz|~q?e<Fq;-w)6zWQ?mlM?RiEH}4R4%)%}7T8}~6>D!6K
z-3*J?7=&F@%10icd4UFw#afYNxF}(keZZgyrb5WtMJ7W9>m}K)^qT=G!YR^K4u?1~
zzb|1z5};$R)!hdqee2vwFM>`==>kILS(+62u{-s&CU>>9D4rC9n;XQ2AHHR2NGjLi
z?1V>8_TorWD^8<Cn92plKj0r^z`{}4rel2J^SDKTmAff2|M>7aa!z5j!PU?Gf)t~E
zaP(~L!!JbY&h>*+Ni_%gA8UtdO}_#a(o)7RHdVjMdKily_`08cNV?|O{}3H1vH##r
zM3lk1z6kDX2XiI4DTK>f^48KIhe-VT{^e|?BE4QU%igB6oaHY(j=A>u#k0fGTv&*M
z%7J(B`aKg<jKO@<RI3e93F`fL608SWrf?DxejInae>AqBX*doJQtMlLM!iP-w<gEp
zg#x*1Gr3GYSqu-BxAk!oFBu>ca-|=VT9*fhf`O&x9n^sFHxB0bm_(!1&#DnG@&O<*
zY7Y9{&rxxY;0w7)KCZL!FEt|x;m&LLP}${B9^UOrA?`m6PS12b7bongZ{KBfCjPwt
z&~@3amOP_eTw&HetBkYIhJ*{Z9ZBhl&U4_7o=k!$dR!q01f`smwnCGNDrb}j$XAJr
zxSa^L1j<j!==NsQw*3p|$l1a!19{Zg0$*zO+&@$Li(h{&XJQBnUwQ%Ms&2L%7MtX^
z6FvEzUD6C61Mww`1~+hZ4^oF0Q0g7a2~kI3*`GeINSise_FYZJ`-j~2X3Vg7u@EGe
zSPPjn#1@xO#N}J`Y&<-^YL-6kq_POm*~-kwM3*QNao%~Y%~HzIEFLMsg#7LUaQ*+1
zwgHwv<sdfy;LGdxK?FBxGvu%7N=uUAd^d7#qTue)7^Rw}P)QB(vPOs-i3+uQpDttT
z3<z3Iz4pye$ypl&1~v(lcc%vxa|h{;VmchkrtB8QL|(g(?gf2c3b~TkSK;akk!7SF
zKCQZL*EtM@EFeT58UK4NC)u;RMw#!cP@H^6KM+_t!nHmO_AFL>^10}|gv@={f~b&-
z5MHziz$lkCu0<ZHj2!?fe=B5d#Vhs@<d4znEzb>h9IDG$iJAME_Z2)-vPxF<5NBQC
zjwN&I89qjJ+4SJxsyU(p`Uqwc%sY|rfa3QWYfg(rs5EV$#m_aO+(Y#Pu_#?R5bWK)
z7%8{or-XJ*9+tc2;lmRZ>Y^Xp{N6hae)gl27$2dmzkeq_>;di64)BqEjX?EAuYOu^
z(V<d?7pHIVuEzKOZd_LWccawdGUUG-<;3lGM-HR~&G@^25j`v<9cjOsK@a1$26u#}
zXhge++3gQfkfC~y-2$1Cf89~W##g#%OjMq0&7M)3YMt`&H<A)BcS<vcP3zpQ58*vu
z9@UY#+cF=ItgtR>swYrl3q#zr{AQ)A)L9~+iv1>9MOCRz+ekh1A=F8tBu8@D@T0q{
zsLjxfc&7rvN}he^!7u}X5TZc~=^nb>BV9E@8m#T2Z0%?(uuqHH5{H5|ah^sgBEilc
zN%xtX&U&j`r0nL-I&KnaN@Enq0lUKHTv;R62;|s&#hQENgmwHMp5J(2Q-XAQOu0?Z
z`*NC6UEB8}&x{I9=LwvibTh`^TpkTb+R$#|cn-9SbFAfVZ}W+fwOT|)<kuiu%*v3M
z6*LLMDy7mTcS)$$v>-~#c7Ap)(M#~MQ#B?ia*;qH+Hd|yO&{itCP|^$^Pixk6t0we
z2RW5;NmGA`8lm!G=$*9Gtu*v7zaq$uqvFhUCy_Q*AG4WkTplC7zMIAR#@gKw`(7o*
zJ|!~i(55G8Fh(J_Q)VOHhTraFp&qVwO1?0Y@W{lm)|@n8{i1*hRkZapb%gg#n%n(e
z-!VcdCeFW~mfa`d#OQ2_!P?+?ZNJ*0=Sr!`E=~I058y?}UK>vVDVcJU;AU=QSITiB
zjiLdEH#`V0IL+Gkt}OOvGwf99?{<D9+Yg+`TG*0C%<1EICiSzw)SM=_`%$VPWPk<3
zwj#vPrCV=cB<-r(-zc8%J4b3u)UXfFD=FoL77@u))*gTQ6^{HSON5x79_=QXlYFW&
z2ir@6A+lzDBgkPLy_RCB{9~1Vn+Zi8D1~Xuh+F3|>8$>?S}cLZ92N1^<BJSkMW5&M
zYukrw=GzSJ-%5D*A8c&0cQxw|i^o3n^T(+N^eTtQF^adT3|e!+GH7P~{iV2CFhUFk
z(<n>v=3+;_Mx;_Dx}#HZ^Q($SiqNFE;Lu`^M34Hi%GOS`ZUs;-%AY8_#Azcb<L3Xt
zb9ig#UG`~@g@ZMZdWsfV8!q_u@rMl38s?wrb)jZJ&*Ir${$bc-S=G3$1TTELG&AoH
zcn#a}=m;di7=yn=Dp4jxtgy24k?%*}?Ox=d{Kml05`SMdgx{pwKdxW4dbEq>ZL4pL
z;AhZMFMp9OO$_{7C=JEah-VP&UbK4_rEu3b2rb5%H^|+``!C<gDApkQ9@}5?id{=9
zD$k+l(Rits0A3-1xTePO84X4O7^`#Hg@oiUEkk<MR>8j%cB?5|{6adTzgz>nReYcg
zq*aTHc00!Ky917{ZV({Ye#7n|6f(N;gs1KE7zctE(!rs0YDGW)l~vfTLEcK_ZG^T=
zco&xd=?qry&sQzGGvAw@pBFFF?rI}oe+o~Cd>?Oj{AHcq+pQ@?>FR*rbf|}SHUiIK
z5Ra}s;H|q@f3l*$&KkXz%=J;O;zuvwK{jB=s?Oj$Pw;?eu9n7Q3EaHD4m4t@r_Z@8
zn^sacR#pMcv=@yEA~E18wGBvCp{)RlnN!x@O+L+RXLvko!;>%fcz-~QAQMVj2BmYK
zMx0)K%u*?GC(-1EN3-GCdhJ6<0RheIZ~-WUmT&_O0Ki8SqStPUQ2vi%rlg;qM&f#;
z7s&Enctvxf5UPwH>a^MZzX2zH4xl6m$8+7%DAJNF8VnAb4i9(RA}&@tiK(EpIbg3D
zc7h_H;Fl<)DDq$bmPVYT>93j%tv}0n2I$@aCqou<W!f-za^@>AjRMWuNG3ZOK75aA
zf|J{WZ!uYY=?@A3x=XY@QGeD%1}QV|0#mMCG#4IT{9rFR(g=zxK3K?lnU$rcW$f3&
z8IX>H3}|=q^#oI~i3Guf?zl5$vwv(U3hH5d9d!MaEt5$^VtZ?%7~iv-6M{FFYTk|{
zyDq*AzmlJgRR4RY@0>_BlU?Xo(0+m?<R4@R$aGjq?ooYMdn{3Z`mQhVvBv88#IL|>
ze)xI5)$oQ`s2@mYKTM``np=0ktgQ94lWroQ+SOe4)#5)j%{Fhq*YrxER^q9+Shel+
zgc+4a9V4iQtLHXSuq?;Zt74P*En=CeHzVHx>Y32YD1%<xeaj}{Jbe-)`45l*?WYn^
zks)8L)>t1d+9}3g{!-`~7sePJFP7LT_V-K*&%4<a#W!BY2-_48>%7GjKnLbtqV6-)
z?LFV*Y|Y>u%9nrTceLXBuoAere#J~PFDyI0Iyp`NPxz)<))_tr)Dx|gWER-gpI;hX
z$`1WHq$^NDY5!ovYI3nD0b~~7U_@JOTW^HF&qh?m)MZ8Lj9CI#;%Yxv?-9N#h5=n-
z4{S=(<q8kK9F2fP#>+kcB}tMZ93d6<kjA{`)Bmyb?(t0c|NDOlDJ17pBsS+mP7_km
z$i~QN&T`o1w2|{UNh*gGn?sJ3^Z76gIV*=b6>^wimGh}N6glMe+xzqT{^`HD-PrT-
zxE|Ma-3M^xyu(d{^N~rD{|dNBLNn9EQInl3i_b}QljWE9Xw*?b8~vdagEDA5Te)4x
znJskcs)o@kaIxi@$-loNRuS=zT7!2XX9+Klea~n=IsQml6cnv<7;Y+N6AxE5>NCm)
zp&vR=(iUnX?<Tzb^p1b(zjKW_MLVnT2Qa%pCn_;;(6SF&V5Yh7jx_)0%3`NL>C&KO
zryCE{j_Y+~W>RH)y$rC8sG42!P^h4#Nz&(6HmS6`XX`WCqT*^OFwF^1N*#YkQbk^t
zPzTYuG9!B@ld7IS7hd|g_GpM~gC19+g^OH&&-`A#l}7BQQmH)kIgI8WEPeNHO=_=8
zv?K(tXIsk}*PaI<=}9ZTbiU4CmfPc+|DEF&s3==K-On1DDX;OUpI~OR#%|2XW$#?E
zN|i^vJ}YTFI2_8tgsKCfc(sZZ<?B`fwthk0x|wpphK5N`EIFak9iD|7eTuCt(<PH$
zhl6GnYVGBTiYmw=-rsbLA_1D-c??hI<kX6-7Fs6PtiHv=mY_F!<b;3pK_@ENr*vD*
zR-c5Npm0U0$R9JKIv}<2D?LxhSs@;?iB(x#Uk#r>_*QpV8C`K6FmZEknKuI1>LO+N
zwbJT!7SDLRO}i|rYW^n6nrqywt=!3S>Xx~wI?gC?F01NS%iu!yi4!L}MDU?XVO2Bj
z8Rk9lx30kHl$*$*T+&1nB+GPG%XLusVOXc8Kjx#_k*;mJmIdr~B1OJqM;H$8>XWYX
zR(-x%U^?dt?uey$_vMX&IE}{EqiFnT=s=a1yXTsW7Dum85qTblzn3-j<uzswhTRiT
z$k@D!zy?py;Ye@r=x90F>vR65|Bh`isUg-F_M<t@i7!upo=Y(c&(hm&q`i^MR}PiA
z82W6U(4n!YQe>*@t&D=JuF&v(w(5V?$m#+(sAJBY%8)n113zFZ@xY~OZBnD|Tfa%J
zG{(AV8&{>Q?`7s%Hgm0cA+B<nJt08vV0kw?Y%D9nuz{?Xr?JTNa>rB4_*_m%U{M?m
zi@+2aQd13or@Xm9rAlV^-{%c{{iYZcH<YrC`=s=7O#)DF*p^O9v>_E@*UtwZYrGyz
z7_TD+KK@xvKd!W~A-sN2UNm)=9J0?Umy6f>=C+LZGCeQ5R7{K5wHJ`CTgsAcw3<5o
z_oP{lochsqx>$M_c6+6vO4wxZ_2;Lqv~?%HQ>;S4!MoqFXNA$HGXI@(jN~}}ddgb#
zw`?b6Bar%X!_;6x#!c4i0jh14qxARPl_-t%64={6QFkXzH0Zd_sk_Ix|ISU;{Gs;#
zcdn8?-9pAsWp$psG5Wyqa^1+PkAVxXPij9HT5sn#3YFG~K2b87_$Td0{#!AJl^6*<
zb}vJ0`UZUvFSjc`jQD=WtZ6G^7GK_MVr=I@sI~H9qO|Ak`Hal!8(y}$a(5LN&Nx5z
z#<&UDbYxr6_^fg($_dokI=Jtet1{C&Z`T`n#VO_0DC~`>KTH81XzA-{Xixd?TqqG6
zIILn{`|qX5jfA~G5aIp^6WZmkmKC4HM*WtH;hdGW?7fV}P$yN$N=oxm=>YSg5@we?
zrkE(U8%UL;AZ^w8ZQXnRj@Eqnn^yv_uq?gp{{B>H@|d=v6L$QD_}Gc8IvgL;J&i25
z1FIKF(R8}FXI#Ttcz%@8DDFrt=E)0icknO+n_xaah3n$k<8$a$6*9wXO3Ni_=WVVF
zNyCJ`WT#R0P<Fii!hPrIB+$LFKDJ9?CCqQ%pCCT~hnvV|SrsIAgsJ9Ll&}~rhVdPm
zCA~$ZMjW*%IiNDbu;N`OrI3iK=-ETu>RvL?fjo%wH}_Ms-Q_z@7^$%!iMUA*g*Ih1
zd}4N5;XBtcQ-q3l7*a_g{#r(r)2~ZlK^*8XcHnmA;9$HRXh1h|X$L`Rz8ypC!Uhec
zT<qY}{XO&@OGMIZ*)Jfd+lf~XyOU>D3`MWibTiOT?MD*9tC5wuOI@)pt77C#NieiS
zRUz34%o=)CCWWcZC7)@if@kga%A;<fy>sogT$YP!Uh$9610G7G*mK=ay;45KWpS6O
z{>=$fgG*Wc5;!Z^sx)b{j;e=xK&LOp3sxIqWkhoD-0yUDoGt1`*^&`xRRC*TGJ;K6
z1yw=42sUvZQ;85rQ6FXF;#Tp<$azKYNj=R(^VI@gKk-)+q_>O2L^>mD2iQ{zv6-$W
zf3G@s&0KJ9jCD3K#HJ+!_jWTS5dY)#DbrE}tI3>%{~wkktU~@@1xWOLi3tg@OBV*|
zrK$xXI+>VCPSH#SZLW*^ih=$EkljyP&B~}gE$o5LX4HO8cB3TLrRqHU*GxUDRij0i
z7^E$b3wd-t^`IP-L8PnI=lgPB-HZ|RtSgU*emAW`!Eh&@;k5+DPBom!<D0*v%p%^q
z@GS$%Yuh(f1;vZ$r1nIM&bJrOpwsJdOY>8lZ%OBIDSraKpfRM0Fr-YceGMZCsgOVJ
zGc8z%xBfn8?uGTkI>6Pf|M@RqkwnYD(M8aPZ2RV^N+99IBi|-)RsmR{ZlUQ~Mr#B|
z;=m?8Jh7R|fBdsR#ObZRY$$0;#_XL$V-@+C$uDimBH{Q(!&iDn)iO`W#PKjNVeQp;
z;n@pv#%|1$RpX(;v1;eXK-=Dkl`_!CTy>iWvwr=bD<{DxNTAO!WdF@G4Rz0-_SwH8
zelFFV?{Ae(6waBP?Z~t+!3mpQVpTe5=uRnD|5K^G)H_b}pP-xAJc;IKnN3My(z9dJ
zVS1_K(CPZPAMRokGfYPRog1CWP*lN|kD+91E*g$1C$h6;yrd1i-WN@hZfwVq?i)&n
zcYb06V!m&b<57}-8YWt-!<5kemlY6Yv35)h&*<B=9An<6Tp$mT7c^Q}iL8rwbYwG9
z4t>Er$yUHOZzlf49e$YoTGTdmyk1(F)z|M9$%neX2T>ykY2g$44PPw(`d8}@*<`XJ
z+IOt<YEO|<H(9Q=LVMU;D;K>m#G^7DA&0cFf}$)FExdaqGAJCFP|r11-xB<DyZD-~
z9CEO_VZABp==Lh`1x^N#jb^_go||_+$+x!BPQmqoMR^MdG_=D$TgF&au3l`ZUgt*3
zf;NmeIN9*CSct%{zisH7%)}k#cesh+#Ruj^&Sr`d*2;H!xQ^oUBSA48Tn=!~CdgX3
z%f=hIPSj58jDCx_$QD8+%&g7aY|XyJS6cVCiP@s3*WGBey%95h$V~Po!ZK}YTLRUl
zIcgqKAPe!mZ@wUwO*4(&&MJ4EmNI+=w6siCdq)NOE{Cf7w4GOJzf;{KSnjA&A*<!)
zXX(zRm*b*DLPpq%R$&ZZ4TPM96;6%(kag9Iu$)O7nTR$Po3P_cKjuN|ILZ?K>g@#v
z7-?<=8hjX%gDxp2q6)&60)7u7kg^uhMF)o?(!>`hN?^o`{A&E0Y0WmtQ+6kznLd^E
zjUt783a@$bnEJcN4pynH;)q4F(lpcrS3){YVKT(ue5q1=1Ke-?RFKCfl(c}Iz@!(x
zFkSw{Zy4(O$`zx3B&Rm%fhZkbCel~T1Lz6LoA<`7d>olKsfFA&7!?PafAK@Xm%q=m
z<T+L{C0-Z17P9|F<W$HU^YyejMp^0gt=-Jx;Yv9dL9e`&2h^l03-uz;7sXu3Ds{~7
zHWP@I1tzyM!Me@;;mayk|HSKGc!c}#MjV^cLVdMlETGsPr~zdXmCo5v=lJ}Ig&Z7I
z6&xT7LrA1+nH0@lOW}SuAvrrx@+*I_*eUtSByAfH|20T;E=<uEiPIQ?d-AHMJAJtK
z^aZk<-*1qt4&$Md<Q!}$Mg~wLnF#BxW$QxN3OSf2^_Wb1a+Hb-#G2oCERY2Swk{~l
z7Rb?4r)E0bb(lNkV*Dqqnt<{TO(sP~zIlHEc}$8g4d>dLBL4-LSUvlz4=z(jX17{7
z?-4|7a|`&Fi#RB1Z4y={{l*iXyt3ZDdpkB^Jay=~fo~KaPm3O<R4ZlKO_vkrGiDs>
zyzoWhe7$wC3>-R67~fs^OOoqR%exqr;k`Jje*XoasKVu=eTRxyZ3CyJw8>qc2-o&X
zr@=IwIM>f3lEoIWdN9_iuz+Z-N5wOJ8UO5+GYv??W{0LTJtB9vqS7Bb;=tonDNVYe
zBalnO))sg0gFp1X{Ve$h9Z2(Hyhm1fVji^Ji_Gh7B|llVekTAJtb1A)CCm{y_XNT}
zG2YNu$ZM<S)5d#$0-!JZs93PxZ=MgAJ|-IR_J?ihX<br?uV-|QTTpDl2EA4=joR*y
zoG&mNO-RyP%U>H<pZVh>B3d~<&}~6cXCmc1DUL2$GkL;&SF7VN_f0n*&_!RxGcQVM
zKFqhSMTtUUUnq9Hm~Z{w%P_@ue>;<z1An77Z3ks8t~sQK{8t;!&b58m6(2mH>oH6Q
zVJbE=g@M$h%VqdcvP$iJhW8dKmikW9$!kL$8~}maDRL=d!-Ol3Z50G5a+1cm`yiw9
zjmrr3;Qv`50j>L?$}=v`?r2dKzQdcYv7MTH@sTdc^<1vDZrkzncuESviv+`pNK7U*
z(KVkksb{X!w<z3JE|n>_Y4JjH@`CkR=N@UYk8xSGC48=A=x4omIqGo(AIHRndm8wz
zE6DG$Jd8u+9+KFB1Ea<Q;c6FqIWc4)$;XM&X9gYX)5nM+5F}6cU&Ycg^Dk1)f%xtf
z-~(m2V-a5+X|_2rFS!`2Xg9zF3L4A(<S@3I=`0g;UGgzqo$gLxJ74d7ZZ|pKUyZfV
z3CxI(`vjIUcaLl#NN-k?-fX}Jo}eQZZcmUaMEtfguxZ&hm`ItZ+e?g9Eg5(S!@)qN
z5`V0M^K@K%t+>Ag0$J-Yf&<@1cwFHZXB(1GJVzQJ5!6x$SM7!e;=9;0QLq#)dMtQw
z;QD5a@W_(x@sb{Y3KRT)7nlEg@k!sYnLYzE^fZelbittX><tSz+Z7#BQ%yd0s&Qv}
z^<Awy?{#6RJ@%UK7g{FXa~?51j2=tmZ)+;~w!MYQp9t*v*LYv?-ASOC{ny9QXzn8>
zD-W#@yF&X;&OS49Yd%F!HLL(3wNP!K0CUuDi7ia|7&F#b=A$OE2XPuM&Z-c$1@=kP
zh$|?FjNR8U<y4<XpVjo|dV8Dh{dewyFk;mD0d+eJ8lzuGPiVwU3cmZ*w>D=&F8yYY
z$xiHzl}+ijimEL<O>3aI8vwyM2&>sG`&vYenaVFyA&o(l*DHQPhFd~0-BiAIZTvJ#
z#ZAz%D?ytb3qLC1Y49-*RWsBoy1`pa9A2@LJl4HrDU?_I58SK@1Eyf)>0oz;orVh$
z|8AC2zl)mh1tq~BJ}huOd0YD5IVfuaoM;RIyh;B|Y%1=oR;%}AA>=)n=206m$x7*^
z{{EC;OIYrkmAoXVW*tSOZ#k}%%A}IX!W_qc8hzwy<-p!=R^agswpU+FvZ_XiBjeQ&
zIzImSZf=ZU5)ROMbkJGsYBeOe7=<osMl+MF7NvqKzxZ}r^C=9r{q2?#46RErQezW3
zpJ`=JJt77!T9REgQR8AFBd2$c(zerhko`c<al0%C>vk627ARSJ$y|av!2htD_>bD>
z;81t}*4M3r;46P?S^y~7<UX)(iRd!<nw?jRa{<p3?|VjEA5K9Of2yjOL)VNxys9nT
zg3NdBN0wC>aQhg%RgSsgs{K;qbl=ZGSZ%UG+E9NIkl7X+yP31EvCJKqDjI7h(D6RM
zylJ%asVAMmrI_VuSD0Vj(GftWOw_DydCCP{pBo-6C<jrB@d#CnKRKb{$Qd%EXAmLL
zsfa8ZB`c4~wAt}E^1u1+%Q8MrmL9m>>Pt&-2$aQ_E(?j#1lOUhO1_$fdzsND&@rhZ
zuJoqscOzG?X|eipjeT)&(_xfJM&Cwlm_L%tQ8l{HP*hPSdFi9oerk0Y`;@29^E%ES
zVNPUiS^?d;UlLaLR@PVKklJCko;AHRJyGSJaj*!;5ea46sLFhf8+1)atdC8n!U4vM
zT+82p7$#nO6wY_nogEk|FGhh`tK2e~Q75T8T`JDZU)%Oe3#O{_%%|q}a0?j77nGmE
z$wmT(YlN8**Yk}UP5XT^N`9$x9eb`G83mq8)AE<o<ibV5e;HdTh4@l5`h&as(|_}p
z!UkGbc$^%w9xV;OG0!+v>X4)8zJ@2&)f+P$aR^$;tC~Y9kd{JeKn`@nI$O2T4ZoMb
zfwY&2`*{DfnMGx_dF4xHTN7yyw%eW~cH+kamyEw*nuvEp8na;|s~@tGpFhx*11UI&
z*JC)YwW_kLyGZYw{9-M9X8GIma;t+;RlSj9HZ+MX*XVv9Uj=~jfdgLQRT{Bu1UD&j
z>T@oufcX=?cWb?dK!1IuSOS5(?cYnsPaZvGwD}zEM2@iMU}wz#1GI<JoihdB-JU<!
z$};!xD!-GnH0R4u)>`jW{hrfsfcEdE*Q7AMLQ$n*!qBOki0oBrzBLZawH~r)tNOR^
zB@KV<KJMX_PB*x-dL}sJT}gdY5>(ulN0ubp`$n<*32bQuuT(>4{!xH|1><wy?;nyR
z7i`N(6A!`Sm3oCz0lYzI5qTmKPsC#<0Qjvb5cYZ&;hwig3tZ+hM4rc9T&BCD+Rr!M
zE%BZ@VXUHq*O$2lM_9akSHd6kXdj(zPR>o+;GdGS`n7`BBh)|Tv|=y2^Y^2NlaS#P
zNB#6vpD)E4rpzsBd$!#0^rGe6XJZan(cCIXLA1cKJcs1Wtm%E4iQOQ7+_S~2>=cFy
zDV%48GO&3-wxxB%y!SFT2^<9ZAp1$(sMxDb0!Y%YozWaFim6Umw?IR6JFgp0G~*|E
zpVXL~og&+kEL7$gn@D{!ihY|GF4>1WwjmZ10JUB@L)^v@bQip%q$i*}F|bbhF!D#g
z0^y@tP>T6^n!`#H!3ZRMPFb(6c<Q3WhHKZ#DeX7Og<M`NHsKkSKoNEnHj)C%tE<md
zKOqB7fR*V6^wt*1g;S)Xgm>V}32)dz1YAo?a%fCqFR>H5*BGYGAEsrq6<{MTY(!c-
zwJ!biLo>((4E`Y(xye3z$NG5gZ<N-v?RUI~a$0tGrQ@BBl38yJ4rFyFbUoVdUSl{K
zrnwsYNpm(1{bb9`ks~PXSiUb0ap+(KJsx8Uh^4!Lwx3=g&Ony#=X@CwiS5^Px*Xr}
zC-drwvoT;wB#z(|+=T-!-eBHWVnkN*ZoL|k#%79PhQM6(uSYO!Y7~rqev=`jZ6KB?
zkIj^XZCgEMK2mjFc0pZ_6jtF7MmfR6{r*a75Pz|MWedl@WdG;4@|7=WQK4|k6DFkp
zD<iCe``SL99uHNc!>7p?5dZ4#Nwf>5-_Ilf$(6zPboRs!RTaorDkyZXMkLEcv^_dz
zI3>5vmDPIm9F+C~{K@|N{<8wFy=B`A&OI@~1w@`3gU<K%(^7JT#4XEjeqf7xsi=#p
ze$0T@UQ%Pr=rD+PkgtBWhi1=wkElUQKSUzPwP7mg27tnn2k%XUKr+fu^s$6!0Nv0j
zF2S15Wq!*+KH+FWXFoFQ#?il-?Tn2uUPG$Zm1&!O<4SO-@no^sTh65C@LGbDggk%U
z3z9f9ja^8!59~7?!;NQf7*Iv1p`Xv#a?;m-HhPgcJurGvGab;{d_nzf3ZDKOiL3s<
z69!#RpjTYXaLTti_r)q=g#qmE#zFCPT5jb(HGAUL(o$8v`+c*_;-<<lf2(`hP;x<&
zaZ(|vwfbZS<)8Z%qVX$KZ}4)sl6T?4X*5ksx?~3lom7afJ%ex^6$}+0ln+CQ<*ySQ
z@BR|{h<8(9k_ySmHbU7!VFOyYobVuPt7f{0RoQSv-+~Q(NR!@}MV~Mh@s00cq+qc@
zS@j-3-C{LsVHe>cR?m6nX7y?=rf(sv^nGd(cNUolo9-eo1JkJ-!U&4f1n3vZW>}#C
zb6Ze7>`c|aydlUwaiabCpMV2uPOnxA*Aqh=anwrmEL(Uq1*`Bjyk53jO-8AMN>#)8
zU`v+^Ou?OUYQY**dK~YT?k!43tiEy%GYLbaM)KO%7q!qaO}{LQhg$+OT)$F**SFm4
z$o>!o`QY|BZX?6g_6|EqjJk;S1NW2>ZJMsYUdfnUfi%};N1T-Qf*ob7d3EV&{B>&u
zuaDH(eIrG@%C9+i!kcn=a$Ii3pI*g^^HQN(aw{O=0F%E>pJx*m(RDYtW3UPIQjnps
z>|min#THp%e*4goA99!aZ{5$Ay2&eNMxkXcQJd>Z*Jl3#`%Pas5{L`zln`|JpRDD4
z<zKWziYjU9s@mtEsVwp;C2DR0!d-X(>y2{M!tMCl)6J;vBD)AmjbA`qq2p5a*yv>_
z#U4=SY#}brMGKR!#5wpI_FLxHxw~%2?oNLn3N_x;B;sB*x~u;lZg*}jT~6@4tiNp0
ziJn)Bu@99Df7N7g`0Hp;cGl1`0#z}ux9~{xuhe&ZFF1WHUO2y6YCSQnE^+Y}y7bJw
zcqf|okd`?4JWQuT9@k^VQk1^mG@<+|DVHwBYd?6=cH~!N`8LEj-9OFiO$qU^LmA@Q
zt7~7KnB;oI+1^|r@9l#538-tXmu;D+`ur`f#mY{oxm8Rb*<cgD9EDn-@xc>XS6&t3
zpy^Y860GsOVRo9vDEyIs0_I6nozte_g4-LQ0aCdDW<jJH4;O?1`En&Zz-}TJKGUrr
z=*{QmVI6ClzssV^3Wt|l*QPF~=6`;@9+|sS{Z%eZZCpnWfDSE50^qO(`AzS!<v~w1
zS*vKG?Zv{CpVjITOmvl3@f0RN9{V^Q%_N-GIv#PdQm}38hhlpDvh-I7g%vV$SAHA@
zM*5jIj{spw0e=PNVCM3#`zGJwx4zl=&T_`LEtev84o0k2ep`FW2~)z8eT|#7-!``n
zGT9>g%+llvg>FX$$Whv}79S<E;P1VsplA!bMgn2iK01RJ<XdV+zD#wrWZOg~`YNz>
z>X=SDi(l5P@D|4#-MWd37ZiK<g_QO<`rQpdivc--E&HAa@1_Cim22}$NC(C2Dr)s#
zmkDK7a77u%kizrjL*=lqg>7Uz559Z-?wDznbR|-y)eF47=V23&{;`Pbyc2E$U?73+
zC2StRdu<Db<FSD&xizCa__~IA2_F4r{)m7(D;0H~d&Wbz$iRx(p~^`sR+Bx?&##WZ
z)tI2>ANKke+!PhsYN@C+lkx`Q6jSf%5scVhjtFs7KIc#@IYE}ADU)@xRs#DCMyoyn
zCO_+o6(|jA%%xP~G-f&DMZ6mtVi*TikNDWqT`?Bo$Xjm76^{v~oCQGi59BnJ*{z@*
z_gNaM*c)H>0DYZXANsG*7#b`M+-?Z+MV+8z2}7424#0s9i9WyfgDaTRJlgM+X6w+B
z<j{HkwXpeXZBchx)D0R2U+mg79-cX=8ikC>*7<D1=Bvt8JcDJID&NPcBve!W_C~we
zduGFvOT)HDp_DFlgCvWweEq|=9K3piLw4GMiN7tl?pe8!g4#=73ZOitD5oAgr|q{Y
zRPZQ;Lf|-}$QPPgHaG-ocsfw9FTyX!>S%}`T8=Eg;cKw4Z{S#=s7%=0hskFF>}@Yj
z@b17H0Q!O<VXKOj6CIg`7fy{%P}wAm5U)sMInA+-{9#-OLr}ujo?iBxmHypY5Xm1_
z^`RnEVRpqjqGmO2B~FqI4ic*JeG&kvC|674V}KWJ`Mw;~l0q_8H8S>&71lj_S^_Bw
zspu)ovMzpDm}^#_<lQ|9l)tXATb<u7ngZ0dIC?^!x}0*<`G;zC*ZfXa_#H%hJv@>j
z!j>WlrtcCj2nH6a9|(5CL_>b2rxNaxBiU$C6?d1$U(PeQ_MMO7kM;Nu2IA?%G?<qv
zcdm6|U;Ap?kSh^d4F_=$`E;LL8`^o5_RoI+AbOOs2WTrcPRSYF0hjgJkuUsyJSfXl
zZG7o{dz<q<0qC={@andU>y>*PN$FQ#9A$}h$8vPACkMp6(y)!py0YL;#%qUky$X3e
zur`z|{JZOXSNx)nF-)a^-pBc<7(OOZ7{0oTNyv>K|4d%(@YGfz1%L%?Z(K)@+LInG
zERdb!+jor5?Y=|^a%QwIfoG?Xul^2PV7U0?Ppr7L+>S0SExv|fO+Ipo=gKHUtOfP*
zjmfCyPh!_21urPh0Ce4RTE)y;y3n~+$%57$U{k7uAwQ8pIJ@Qc#ii-YbB+W+yj)8x
z-$)D|A=Z)tRzN+ci~Z^J=5Hm<^a1!waS3`tL^Ivsv3T${Hd(>%f2H2cde8^Ex9mk<
zt{Q4kuYW_j?YH&ws?;6Tns}>^aDhJ1!U`w21(ns>P__)@-z33WyY2)am7xG3*_~_~
zk}2*_0HL;qn08Z`&({to_Qg`g*qa98sN!t@PdA|FY0%ghVM#io77GwaAsEW@6TvC}
zBucx#=>f0tN4l>~`JddXRsgtxm;IykgGZBXiVDm<`;zOQd$g#g(OPh@AH0yp*zlJ1
z9hz}dr^cf6EfPXga$N-}WFX=9^G!AKONDx<$6&v1@eSDd)4HG84Kel@))IeMQ{%PJ
z;YP`{AG|xg&Oiaq3B&$ytUWEs-lW^#RQ|(PTcM-uC{@Vnjzf`rvB#HUVO)rB#N9<j
z_K%1;BCp`c1H*vTyqgtH9yknwR8kV9P4O|FdeoA2@<>qF+Va`4*E-hTDM0zqd`moS
zk3M@yuFri$Lo6M$wzm`g{D}k@T<Y{JSOpmJEj5>@YVP@frQ-F~FH-~MDc##Ewad;w
z#}9sUrS(0ur^d9Z8U<P&-A=7|h)B6t7#-<Zm1s+han*l;C3=79Fe7dYnexTt6s>*u
z782$U>uml)$uE(7%<{EEI|s>59MM-$VAql}tkdg{IF35%jfm*Zkl#Zk^cgd1guM~4
zDmSPKyda|xTD;(^CAm2u&$d?JHA7ycWi}9wff24ftlT9C>*usI^*S$a`H=MYxf7qV
zlgzOV4lT~CLF$-YsmN9S$)s#gSvm0U88)c&-6qX8BW6tF4{nirS#FoW^VLyFLwiZ(
zD4;8#{oQqkd9B9I6ApTu43ntV!en<qZ)!A<#jEqBs3kqGs__+7;+1-Dn_`SKYb0!c
z()OCUqgBAYp>uCS=9*@uv*)cvrh88r!=u{m{TEze{v<!VwRmvw$H1(p-G<x-)$F#V
zVs{fE*Ywy2g092G)MhtcKK;76;tq0YV|kZ$2DtA&;afRgXp7=++#LOfY}aT(IXzyn
zTYR0=&?)II<-yaA?~?(h`c4cLJ2-!-s|)weZCgPqDm08jWNu$ID&4r_Ql?FxY>Im2
z6@<+#E63Hf+~JZ3``FpKiZAH#@z=!}*m|3rb@>J7nS*m!f1X6k3-4ee9INp6{kU2W
zM(l0XmNxM!r-Xut+uQ$CZ}Sj_a3<t?tp>qk4+P588sPXnu$y~`g>Q5Rn`|f3sPZe_
z@lO;lUjSufX%=SjgymZ+!BIQfc<Lbqn$if}uF6Vrcnb-gDQ!yGfTW<yiTr?XzDqi@
z_vJYF4Jgf4AF9<dvkbpB)%|-6x!u4qD6lbZ_n>|9`XlsOR>B~W%j}bgR5`Ebk#9iG
z5JX!`%1A)cKR-FcZ4?56$rj4g7>9ed)Q@WTjJVb1$E^PX$@M8{{Hjf8N@x4?P<Y^X
zS)b)507r+Prg{dFW~%=zGKtD17bSTlXF;pi>_hyQ^896PZP_)2JNan5g_if_n&m^N
zskf@?3w)BsZSHH94w`OeCWPRNg$f}+b;gu^3Lw?VtisG#&mGA(t9iUMo}Ms)l{B+b
zF)5^AYd>1?xQ11zWj%x5DAvaG!RWq`)=NGHI?9Qw^>o=Qs)|hi-$nHs8%R`SdF>dn
z*{2Ve>Ps}<@m)`_sW-hPYne2;BeL_eq1(KRu`<4qraNj%YRI<q9P!dt;&V-t^G2EX
zewfUEP~P3tF95Xfu0iK<et7>>|B}f%?X^AqQh~5G*(jO_nf*(Z!hH9<{Xy;5Fd&%D
zS53>prk#|nb$tfP@E<Q#_{mt!Dr)n|I850GCbkKrCnpC`Xm3Pq3LJc0_~2Un!EOJY
z+Z2Lq<pw%{(rWMLW;p<tS?vXmEAi#6LuiB9aaPFyVIjYg22cH}T&+M{vT#}_f7-3e
zSeO~Uq!C<K!P+-Vzc0*SgdQgUBP2rHUGWp{Hiq)!)^0L#`Q8MWC7>L(+oyn-Hy@O?
z`Mo9b$T&@t`;EMSIBAk+KZ{66s+de`xw<|&Thga6HKGom8C1?!es$LYw<mj|X3I*f
zxV4)$t2WtcqxCs80slpK!P;1q@2R}oRqg%M$5_CEO$NVvSd^+|sV7gLa+9l0z4D=K
zTf#>*H^6xD$vkRaAsr+UR9a3xb}g;Uw=3FM?3e*2@diJibh`2-5)Sf7e8yGVI=8pv
z^|7w;m4B4RNJwnCk>$fHpT6*!*|>X5`VGmaS0K$P5ua={rdXNM<Mjcd{g-ST6Fl<a
z4pZw?ARHt6+4SY#LULrtL$e7p?wK!omT!8a%T3=+n*JDDyw!E|eT>35_$;wakn8(=
z@?GOW5?4(Qy}l~ah42++LocVk%K{1O{;qp4IH1&lcxASzaP^u?u<qe7Q5P^c1fD`B
zsP-`qT)P8_tI^DviA~JExEY3%G&yljXHopCLNw@%939AWkbMOx=0NI(IkIRhvL`!l
z#rqQZpXsKhbj<xg7%kp-?|AZ~okj6H9F1o-_3BEw#d@vg++_jVhl&^OogLk@2l08x
ztv}T;kdkgy$4))n6m34kwC{U8xmU+EGVIXCnclJ}C)}--r|o=jfo6NvB~X_ZYJ1`L
z5Q46F2n{o3oeG|VY&qBCn2D8w!fYvWotlWG7S21LI`ps<dKLHZ+64;py}YB#aDhPM
ztG|{EDIrwmTT%esKKA&GK3$iv<csJ`fN6&?-x^sE#K>wO42rkwdzI%eM!%2Xn6_iP
z=_U*=UvVks|NAk8t1<mm8Y!igK+j<$6Rc&=(OIivb^~VdqqiYL_<O^DN+M`)K-O;i
zueo_QI6DGG|L!NMBe@cLT(ewnow4xINhRGW^#J6FGZG*2g2W6&1&Eo@j|g*g3@bK8
zST>)Y;TWQe1$+da${c}gS-Ao?UFXJRI>7bVyRpCQ{Kc@x)9<48e>YT*jofz5nkavq
zG6=$CMLJ0Xn(epB?&}tbm<&a(SVZJgW*%4y_UPRGDA=M6J>V+SN01l=y}$L>XHM{K
zuL(+~&EOr-nBxWSK|yJXul#5x)KTTR;>$d;60D@I;w1ici;&^#;_|<Jwn5{J*}=-<
zMwSUi33PpIq)9j>tI<f8rOc>Vef2DWFPl^bDG7p`+Sqc1RmJ?PuM92HHBBRbP2mQA
zbn7*053LMcJ$mZb6!X-;dSXUfr=HbH0#@=Yzs~7BrOQgVF<t1V*REqSwv?-=bw(8y
z^x-TmOAcPt+`rwLZC3Jmxz|WudfC=o0i#Pxx9BdM-LfHgZDVcr!{YDs(B_y6CuGbp
z7^0_EhJ=F)uwJhLf6*98J?wk3>V2OXFRgQlIbpT#_aMxT<P!BsV2*r0S||Ig#l*6P
z5MCg&x8#7^DDQOeYZvr|aPv%-?2$k;>^yYj8{8%(W+GwgEH79&M8=#c!mHB0$*Q-#
ztPeLeIOk|){$^-B*YL$jV#nbqo$2Xo;G?jHQ+^JBeLMUU?a(jm*_%Mu+gP%O5`-RP
z^?t;FW$n;-`@%*-`BDFy6qD=^&-LDR)Li@L=Yv~H_!TOvF1?;cX;cz@90X5rzz=q*
zOsd-u+GTGGmH-Ow;k||QlVi-ny(Q}VQ077P!OMx=MJhL0^koV@cdn{?5m4~j&nYb2
z1_y2qQG1>mrn>O;<Y7L~il-fuqYmCkY06(_aDF_0y{RzG>2Hnq;u3Y(1Sf9U2|K7M
z$>E{%n)FGa1vV2dd7%m_FHdgvQns5k?l<8#cK>z2y)37na5jjEH(}@r+lYETJ+X77
z2facGSlvz+P4b6*)c1TfX#Q#rLmvvVS88}vN^4H=6X8Y^RDUP_^;lD&Qg=f)A4RBj
z8|TgESstP<qKT8JKwRBJ><TLq;>VbU+N74}Ry(XrMzlT}*nG4ZY)DUgu%2a-%i|(T
z6cXQ<{}}RGxFPogX0)<42?<y4Bj+jFUT)0gR;GK|G#XZESyo@W)n7lI6lQd811jDj
zyXl(Nqam^d#Y2ZZRd%RJZ$dLfxgvBo6U$(#*8N=Q&t=e?h>^(x&t-kyTQRb%n)#c`
zj?8e|!fW^HQ)$PG!rqd2%=h>~bEZ^lACPiF<umhf{lO;R@ZiI0vc^b}>u!U`oVkMp
zESMr$brU}#7K032nIN=v8hc9N!w>59Gq2`rc~&g!dHS(`U;9+TiT8i-(s%9?-(Z$U
zCLb=zaLj1*c6N{JX^c{;XZ*58y`ms!Z;}xx<9qqGQqrx$vRZG=2~iOUQepp;P!X6^
z;fNMt|JH6Pxm^vKVsCWQ{JjFvscLh@37J*6Neh3TY&HuuZ^LR6LSCn<Fg67r9Fn4s
z#-vD+01@<1$d6qraJ#$cqnor)fq;({`~J%zUU{381hlg05RFDl`<7rym1GMtSg}?3
z64G(`=Z?=r;KNHEUMcIHqBjFSs#+CrPAp{24c6%4O;YkH9gDMvBkH|E_FwcR48|U~
zDn;T7hrbTt<Qq!vI=Jz@YgiC2Rz3<a!V@WoX|{Y!8@lZ3UFq=L!_^ZPe%B1(Gjk0;
z?5~hMtoKEuoEN%5jgxSb6mt#ejc@blErEs^Q2Cg#8}ssN;v$778&qBT)<;_!^iKt&
zv8|qJNp@@`&IJ$N`gD1%-E94Hl2@?>U4&NzQ(ewYY{>p)qxPu7*XaP0Sa#sSBAW%{
zT96hIElM4d?bBbFo#|6tHA-$GDlAH;DF->?=Fe<^_8KOdtjG%wS;!m9BeT9tsiQHE
z0a^AZD7#`A$_Fwg-f&B>`1JBlGRcFGOix!WT+~QG_P>4%jVs)dFSwc{XE;Ihv>)NC
zJ<zTddQFKZ@=UQ?`MRnYF{t-umRrK)3axb0hnjNoAc%rML`TJ-TB~eD7Y`)$fprIn
zTw<w=%{cN7Z*!<xs;K{PB=-e>xdiohsT3^V^k(>A<WVgBZ}=&^N}JY{!`Ya!-up(n
zL;G!$J9N}&eV(}A^;So?Lccf3Z6A25J*|zxttY)>X}R-eB&cLxtA2xhF8%Ci<}7;S
z_+6QXlKe_q$Mx7ueyW0V%vo6KG~$wNZZgE$jr}rMZj|jShH^L{|7jGNeB10PzMd#7
z7Cfi%>7MEIWiC-~$w~T?buFaJO!2RW&7Bo~=VbLhh5@|KjEV+YjStP5&#!W$Q->fn
z+=;vLC!G@w>|-<vRbf6k>~6S`d~U^_NA&R#pl4K>jt^T^MVL}<uu|o@HEuhf%X>@!
zx2k8Z+1AzH$RyJ5)biN(j(_Rr40L7btpOr*Vdfzbof#WI2dWDUn)xU~E%kH#W+Uzd
z<fq-xpGo)2I+7I*>+(?Cmp`Ajoq*I0>M$%FJwVwiJUeUQX17Fnl#)GS24WV`@|kIx
zpPWr4LAs4jY)Q13i<mt!SBtHz!N=iNwBF~gTJLL#=h~CvZc-%y`$KV($n0|}Pn`j>
z-}?=^*pnHigy{i@77xS0ff<lNSJG|@ccAmYO#*P0RRU!z%y1)O5e%jMPwYp)o^rKZ
zbs>Nlg?}Je9ai+2C7mGcho2!<Z6s#FYr@fj*=piBilDE`NA*|yR>rbXTC~v2Ck1Xu
z5s&FhZ~oVSg?4gi`OqMKTN0EB@@`M69Ppzuu$Iu9rWVEjFW<>Y_}}ai!_`5@l`wmd
z#n3aEATV@+JxllLy9r*|pMID_YTuLGe(bd-%moIgnoMxS<ddV1QNE1Z^61jKS6M}0
zL~o{580T1c0?}&nQ0Z-b`(+a8rFNj++?x@x4vv+dlXAcekMSghIC~fOH=vb0qL<>v
zXf8<Pt_==HYZ68&WO_oTbuY0^u45w3*|{{rHKS4h<n_K^Bg88KJU>5S#d5unS0cds
z*@XoLACm5jFjQFD5E`0t9<{{k4`R1qcjR^U=x#K#XA!J`<fx(DSg4kCwJA<j%G056
z!ZKJDV)4^N#g}bfFF?)_)8qT(NDs2j`Z+DXP&9K;pF`(MhYYcb!BjreK+J#|K6`^o
z>${x_hpIFoBMbBLa+X)$jBL3sdu3+NqFnLL+dg}2)#LSH=$;3bkociSCr*>e55t)g
zmPnS5`P-Hu)D9Ku_fLN%CNHi)6(B!}${3B_i-JbAWpYrP`PPiYkHrmMN+jLxeb)H0
z$xJ`DkiRS`!hzuV(sWdQ9u{TgI>1hR8K$K9xNWTuY5i|R6Z~c<m2AHlT@bt8{~;m2
zU?T?hmFy5U0+@nOdRR{1D?;u^KiWU~dD7L2|JeN`!&abx<bLr@vAMaYnyZXH6=68d
zdm&ean&al-^>6rnHcz6IbH^QW&UpSJM!H_8F+BhL(Gxnnd{TVTh*=nkD8THuBw9gt
zV0YIAd#n`Htd|2PGbJf-MW`XmVD~r;eh@`SY*orj$U(^ZcDJ0YIJ8FNc@x#%t8g1Z
zD6U5%MP7TLZ=T<Wvw@Pjv$O+}6u?VHM<t$9(Gm2>!pF}DU+7k}^fu0`ouoc;Mz+8e
z2Yx!J`+ai~SVv#6j9y3&TdjWCi(_)-Y>FN_K}qlt@-8d4*n^ge3T?T5?wes;vVtIn
zr3-yoKHvU3r_ntW82_cvD(?de&AjQ|J~}m=u|+3mqiuehcDxFyzHcc8djw<Iw|1Iw
zuZMYlRR&nuwGM7>Id}Y$v~G+H1yTR`Q6rAOPt0Kz{PmU7j+f>SJvanQYo0w%bDrkX
z6-dQcjb5^-CIcxbGfGIus;0l6-ejY(hJI42@cGIqw?J05_C-WKbky4Iv~3leT@G_H
z+z>59s0n8_LJ;<y4Q_%dO{M;=okZ&#F?<V?N-WD@fqU6qMV=FllP8zcoBOmMV-aZ;
zYg|@vE_7gG2Wj=x-;7k=Chiho@F*QK6ka(G&kV7bl?Pl`3A`43cU4u&l!BL4upN)!
zXIHuo?N)r-yEu30JpBp+MD@R^whjpeQ7T^!6-m3T5aswv^L8LytT)>KMnoxxj%k|$
z8RNYTbUKc*=?{%93Gh?&)$F64-K?<4($r9@qysx#`Ney$#SU|fa~-(QuxrdrV{*ue
zoZH4*mBwsNxEGF(qA~9p<Z)rRtz;AmzSkbR{fs~5GXQG6)~u>H`C+-`>zVKP^E1}$
z=ta(aIgt~>${ADK7mMAO3qyT-=Q^|xF2a4qWblT3m49v%e{WjY>iaJ{Eu<vbB}h@I
z@m8lmR8>7c-ZOsPIfFV%H^7%Hx9T7N5UFq%s5WJ2CK|~=VH#F$s~T?;tU7@YaoT$=
z*%9gxrPxc-H+?{MFBJTo3yf>k3=!IXJ~MEekFBCPfFz)H+9N?E>cwQC?*Yj-SfnOr
zJy+$;3)hb?c0^jkx=$X*6)TqZkIx6lQm;Sv7D-bYsS3KCM#JqO-K5>D{9m8>$`|RK
z3L*$+Ton{hflYmWxJMmACGvcyW~|89Jc$QisXSGPg`^~+$SLZl9P<pe*ZOpoZ2@}3
z$8_brl6OxP)`|0dp;g6B)8U9&M=bzZ2$Z?h6RZqP&_)XV-fgF97QJ*^(3>;@Mre#)
ziez`L5`wZ5w(!gKoaltf;O(?=*UI-Il@Z5b4vmCyE;Tj{zliSABc#xyVV{4ykhM-G
z<M{O_l%<5XeSCf`mWz&A4GD8IUM-a-uLw&`(AzF!4QBTRy*j;m*X9C8v!WC18!4mw
zD0~~shE?*lza#2~n~vPO0U4%0!rKjpn{^Am9EFu4e~9|l{JCqo-VRZ^xYAyaJ{7n#
z>b@hLHaM6pcR1WM#;|bdH=_au-v^U$38_Uik-N8&?;PQE<F0D^#r8c->yE#?K&E#i
z<gkEtW5wY+^0@YWh1alE4jf6PBhuM8=jQVnkZ#Z@jFXFAM^!np^o&y8rMoJL>Ne_m
zp0SyUh9z@-zwxK*@%15gL5k$W4V4AGWIw8EZ7oG1rTS#Wi6sNztsBx|#5P1~8YVDR
z;s&^*I}94o^5s3-)#UVh&94xM8AJ9=1S^ehw+l&BOENJl930pdZ9_H?M`UbOwv5^y
zBP-*6)Lr~TB8=s}RsowvYM<-8Wn1X-Knhm@szDES(ZLtM0o~&twW=B2bXF(m-rz0)
z#0S_<5n5Y4r{3W&<{&@L`uL;dCpi`VOd0^Xz~|{a6B|rlx@&pjkAKpO7Ib6t>2EAP
zkZw$ne@0_q8i-Y)9|=?<5U*ZF?GjWSYb8+Vv07%1q^CKrX1u|yw&`oJIUsO$3uk)L
z`Kv7~2!mmAIf!va0fC?!59t0t=e1`iD~7X$eo~4s&JM)r;h&-G8%JMa)sK^2uI4Ug
zY$WG;+%X>i!!6!{xVxvk(w4kImkgHZ@N)#XYRM~BjvIljM=%e>M4-^-QW6IAny7Xe
z6SPU8c%S}}?_Ha}lLcwSOj+-Vdsim=6i*S=X~mZBEomXdKeqAq-UTug7{1!S#aP|&
z*3#W#6B*$|i{PC{wy8P**rgEre|NtKrokvG%`Nsov67;+UmejS1bl`Ye{)^Zw<0D@
z6P=EhDy@sxCd4uYKbP4rw2aX25f3+kch%nN(U<nUgQ<ig`QMOI32B<RPG>r@gm>V&
zp#0hw(}{f%OCNqyk-AoTN95fk3vW(aE@_%2k5Y(K&Qyfz+eIRvVXiP01E8I0+Ao?Z
zO?WE{Zs-=5X^!Yi-^O;k;O?cDLfVgR8T-^rIOTy7zWNNx6c&cn2xe^PNM35lEV-L;
zWC72v4z5gJqii$%v7yL8+4qeSAs7DYKmQ<NhVO<SlKPcWvV2t-_~qWxQ_i=BGFW^s
z@GbhH1CI+B$y5wIGd%v~XaE9*j;R8IJwxOKNWiG1Z2zXyx+{P>3CqsaoX<rhbG426
zt%0PO_U2>P^~v|$1q_uxTUR0O75s6(TwgaW=OvdhuQ=e~wnalIH+pllDC|o~k^-C|
z5VvG0QJi3>sZF;{3Rc1cU8Hm!HG^0ai>RBKzke>L`%t=)o=);iHcopls>{|Dw_eR7
z0t);Ohbt1`Tz#?2Yl(ol)7C8n1>g5<-K{RMoVpqPBfE#-v-jz1KRAxqPd(j^Q#-xf
zKNqwCKBXyZ9{hp_y!H`Wgyy~4kkC$3W;fJq$ohJce)U&%1?zVj?x2#dR?Vxb+v{|}
zOJ`RqDiG14?Ec3)S}=7d`=S*J2@BG(YGyiXDWo(Z@_&{ImreY!M`haNwxOaxR8(Q^
z_Xj01R-nU~nn=r{j+n|lC{}p=qe0(c$8~;swHU04{6Sm=E|$);wM2dylw>m)tyohg
z4Eq#5TuQao(hs<&5BjiUjj4B7c-v4tBH+D2x3J(+nv!Z>KeT`};!^GxUxSSArwgI0
zfTmrw&Bq?;(>@fJpvVVGY(mU>eM&sph&=N!WZwb|Rn;Fg^y{c^<knZCVqCst$#abn
zdy~w!MoH~GPGw-wd;Q@*!5ZJ5Cx!dpkb=<tbIZbRL+R-WTH%#$6XetC=?rzZ&~i!3
zEI^zNme|`@Z3JZX=rvINs;+own>t&e_D;}w{GEw;J8Mb6de-;NJ)Ut@9k+7xlDMtV
z9%zFX>p-m-Nw7#g0&X3}8q9fhoZ^eAhoQ!W{DVnF2;L&Ov&S~@WV?)|wa9j9gVNci
z?aV_*0_MFtkNkh<boQ;mM|Xy^4Y8hKk2L*@@WI<RlaP}M7JnjsP71@M(1kNo#dI~L
z?qFEXh#8rIXtHy&^F6JMzQY8xc84|e`b<Pnn5^}dHj=#n>t2=zt^WdS>ZV{Kvh5P4
zjZVlm8|X%%TU3Ka<YC+RC*acT^$b75ijL_gGcIfM#QJ1GhY!}<U$(JdL0LAuX_z3`
zL66#tcfQQGI`R$!R~+m=$rdoaHCT^EPjPoO^)q=X65Zx*4VR!7aurfjev_guZg1mE
z^-GciM;&Y$tchASvV9Mem#mrs=SQqO+6@l4Zx7Cv$bsss_@4DY(UF=KxRU|^l`PNf
z?LUKUNqmP%W;#Y)ahRA_-ZJd*q1-w1A0n3rrFfTso#m)m-R=_Cg9q8AOP(0x>PjYL
z8W-g4PUqi6F4PbpK<7!m`NOJOYvLUPKqg0`cg$fgY3yO;tEtkAK6*-*#*5AR`CO*S
zEzkS1NM$_T;=2^$jke$MQtY=X3MNyMCbdZKNZy6wpC0;ln868h!Ymkjrn4seD8aK$
z7TF&Jt=8*w$Q7){`Y)${xr7_#oVdU4cu&ojUWgecTjyKGo6<>3UhwGgGHh}`I_wuo
z=r&`Sdpq%S$2a{_RFTJA1M8d<+53LGP@!mY#Y)XO)_>*)U+>wCmC7OBXOAwhjO43=
z_E5RPnA>A^QTE%vY0t*Skk0E|TM73+Ef99yEcfK+|Fl~9Wu1!f-p*fa6*$liCC$Py
z>J8ou%Usxc)9)JvKk}mc+*u;ICz1(SoDVQ9%Fj%t=smCYe{Mx-a47wI^R(4<?KDR5
zbg5Q`o*;PZbPoErYh0IJOhk%l_06m^%y5rn$p~QJC}x8^wnZZ5G5A0O#eMDWi+ock
zY@j|ZG0;fztmRoNGj>m$w`&l7-9gms{cy6*uC3x)Mv|zZ#>vTw;Dw)rH6jP?shI%i
z2L0WiB`n+%8TCh-DJ}$v5C5nKG_G<QbSFq|FS5Qz-<d(@jmUx8K-yBT$ro6+p9*i{
z#lr$5%dr%`f$QjjxW7YS_7*<uZwJBjtpO*FF5#q+6xI+EasEb4cZ55IQM!BeoNEwJ
z<fX4*kz{Vnw~aoh1#&g~{?-yfCOh%dJ?O!}xNZbozdq`*0@HHTIn~NrQwi8FkpxSh
zx$o!uI2bu{uD;YfFSmH!XC%z#g2VS-W;uhHRTUx7jOq=%m=oD5OE)go1?3J%i~H$u
zlRo(v$I3}K50L4+Ebu-N?2dkw!$Lm?RK9x2#rl}O^B1C7L)jV-qW)q7N!xEj#eJi8
z$QRNc-ULx^+_0kW>;Jpzj~kG<QC%dJfmmkx@K9Zg5ZNA1M%?>8Fhr+|(IFZC(+k*B
zYHu|B@FN;bE1l+yW3HZ(MD|v`&P1+9Adiz6H`|BD|JZVT(eA%KdE;no=f{W&NA~r|
zmIAnM=5@`0c$>{PIbq`U$I)uP`{X`)ZSB6RTKs4F;1W<H#-K=`Xb!X3lsC8Br$CV=
z9(2AN0tg|*^yfPkR8HW6CB4fYpEVw37#=Lp4Q0bMjfDqDwOkO(h5+pM!d44a%N&~7
zn6Z0$rkq~-S<9zy+TrLJF@bq+8}hw8>6zaAyd(K7ydcn1YeLQIDgbLUzm%2Ewkf^T
z1J_A>kTxQjP3oLO3e2;`8-?#Usl4`-xU|hHC~hE3WQ$E28u>nqGZ4&52xLVHzlF*G
zQ8_KhV1-(#^MOVGA|FjKwbd)MmdUdn=c-t-&Iy{020n&a@LMI}74hN49jccM#`)+l
z;YW&$66N|BsdvP{u*il}E}KI(<PE`oOIL@y|IQtMdKQfjh{bnJOyZvP98f#%`ffqs
zHhsbyv9iJZz1y*&dzWDd8Pqv>qQ<{~Ad<6Ps$-#p`tK!{&zY^6(5jnxa2B_l?Y}9e
zXF7F8U7C;n4Q3c8fD5a(+jkRd(xD~>Y9ArSuVmI2+Up_hsxV>$K<`l&to2T$ll9Pd
zkzK}kagDMA#|ok>Y*SazA3kQ`JgCLHkV;`po7ppR9kkml-qx4Fmy@y$B}5dL8WR3S
ze?i+VkwLS)d`(4Zv^wE7da10ZxxK1DKdw-{Y2s#@Jfi2|PptBD@N=UIlTpu>2lcjD
zzTiRiB9X&}pPd~xHBar{hli^j|E2|H`HsffS(2l~^1i<mMj<xOWhWH(JYL^KUj4Ie
znp04+168m}nDEP06Bho*X4UC+A5dP@xnmg~Zn@BMF*xVeQg89}Bk@@;fedg))`+wL
zVWfFg`dr>v#IK!Yov}O{QiDgux0+b5;U;f}|BtA%jB5IS<30j{2&i;-w=@zeI7WkX
zhcHHMAV{Z_bZyiqrMqEtiA+W*9Rr3m2!eD7zkC1tesDkAIeYLuXJ_YpKcDM*zhAHX
zhA9-EHiM8C%jMtPx*_o9NWL*Kd$i3JP_(&zxNQAUyqZfv<X6IXB)F$tV#;G}iEp1o
zD@p3;7M#^4JRP5DHX=TixIwVI^upTav1aN=@$`2+1Z@|CX1WPAoN$eOffUX^pxR>b
z%rHKRf_f8drMtH$>Pfb(@2C^rB*%-@>SU{NuabdI-<a82>E$<Bz2XU4gr!W+5;-Z9
zv@5ll+Kye&qvC8<Tl{;n6<_{rvAGW4e{0|zhWz|v-UB|0Z!oAq@bO30HmG&w{cYB?
z(f*olV34_fHEtOo*1D<wRl32p4;OYVga^u`W&l;0Rst|Yxr+jns0vk<wAE8@c(b3s
zm~K1T`-r-y=M`kcB~-?K$~#ux^ZY?ysp<7Q(rQ^jfBQF6`>tPO;=daME!&33?1XLd
zj6b5oCKl#fZNW#enlCFFL`Sh9N0O}i!iwa|(Ew04OFb?hmC<Z)ExN^kZ*xxB2N^4w
zdkf5Il8OtISyo-R9EWZtJNF22g}u#92Iu~GgFEAcjC+}ljXBfvcN#N)(=4pdrvw-6
zElG!b)aTQk@jJR}^8bpPFMFd1+sd7<LJ|C)(rwz6S{mIQP@FyT^O{h<=ERrg5E+iq
zC>!B9x$sB(4eo#Hj-eb--qC3<`nOz1(I-ZunR3l4TpYqDL6<{i$K(%a_erW0#Ou7m
zB1458Dq<h(ydRS0E|Jsy^zd8fip$N~deI(}TjS(@)^?K=U)a?+xJBorf25|9v|22&
zA+BzJyo}SdY|pKEhj+L6XG40G+dJg8Mn!J?@pGmw4O3AUp4GUL6^RA(*{8w*$7$}<
zR6_4Pk5E=g9UI7ub@^Jkg9<GoILi9|;lH7}N~VBv8sj9&6b&*J#cmFq*d~}K2nc5Y
z1f+LQ3ZYiw-BGlA<f2u5M+rZF(Ur7S(iDTc*18#W_9OVdXq}GVw_ogjrd|?A%v@2!
zP*=DgyM}A)FLnK~KWa4f9S2U6%Jp79{SOb;kLWTU2O}A@b2}4@N1OjOcpfqtc~5ta
zIz>`<KH70~o~JIUlR<ZaGepAa)n>Z?h9TWtQ|n39R4B2YO4kGIo!`OJjN}WqzY2A@
zjCsfvKIGbR<{!A~nEZ$z5aVs}ChZU~?X}jgu!(xSQdZRs?vzKJ=~El8tT*XqaM<X!
zq&+CztXq(WtfF`-D@=R!)^>Z}+r;uBY!&ViyhlXS<&oQ=3NO@3kMDTT&{=Qq%_n8%
z^d|np`$pyWDAfG#<~8a#)GO{a<^hl24$)X7+&R42ZY+m54@N}dgrsyVR<H&?1SPoF
zi=>KT!0HNfwA?RelVchGA_CY2n`Oaq@E*GpdJ_ME6`0KORuQM}?7wCnUtJ^KF^R`7
zMvHhXu*4tL3vNNq%oE;UcjboT+J4{yb`$uzx*zF~Y~f^&*dR<oPZZYhPQgHQ!!SeL
zsTjwXg{^@r5%@?KNd9TB>E`gZIRVPVHSTLH>lLVGLPfI<k94U?@L@Nh*3rl3UJ^Qf
zH7&eY38L9F{L^saCn`xEauCGZ{j!N|7tID@1AD6v8maa!k|6d&jKn~L=?^)4<)j?a
z+S@gvt{q;YruMPPw`P~4T(5htsX%|WorkBfLY$_Uhrm>k3-r%iE{^DrVi5WgvMxDt
zs%p+J!#a`dHVbLEY+!#9oZWKixQWNZtv_iX4TpZlwf@ZU=Kv~4y#Soc5~APC`IO;d
z^rZQ7vT=PTPU0}66D0gQK)ZeHfL6l-^{wCvr`Cq(HvkqJaa9FQlN0u_w3^{?OU(0O
zO5C7eSYcb8wuwu6hZlzwkOcj&Uf68OZZVej;{SsMR?-@{Kx*gy!)XjE&(#0K^fBDV
zS?X*fMxKsOO6F~Rn5Vzwv3z>{uc>S`P?o968I|vuq5!$Y?e0fAp^a6>&93{;NQov`
zSG6m7YqoL0{NI$~aQ?Q7`>P?T#z_5;Vp^o?`AgqhaHFxxL1sZs#JGSD!h$zF&1BH|
z!fzGVrT-wSIW{7I)BMqymWR|!t*wTOx%w!RsMMK~@ZN@HMCcy7kxvbeP?RK&w=EyS
z5)KA4UK+W)u_|hEwOX7Ic;5_d`vZV!rI<)2{o!3c?6viEfLr-D6&izzd{=gaTTQ&$
z7=9B{rf)V`W>Mc?x6v?vbA`(X#?Pu!cUx{euava(?rRkHP0`f0@2)4=kSSVfaK8}%
zsn(A6R5j#4t#ijfpQbZp7msd1g=Ut|XK%fqkv~Qys9^VU^>=yPgvT7oB}q*354&@=
zn1ypR6j#T~xZ~(N?v)zO*?<_Rb1BsOi*JA|h9@&eTt})qMO_}2zP-RC=7*sZ^oae9
zX*M<|%=$TYuevo^^I41;6XY;(ugyOXPA3uPsz_a6GMyLI6-{dOT~O0z%8oFH&Lg4N
zvoCJUnlNtf+CRCN2`#aV#|?=x?Ydi|CCgv`!)xGkJ=3a~o%ON7u>H9FN}}->hk0>)
zc;Q^>ns9eoE_|;o>!Gp~=wLl~L(dJbNTvm9le`k4Vd+9ht=fD#h1q{mdRdaRpPvUe
ze~nEy;^G(1-}v$QFY1L0k-V%SAYZMt6O;P&H`xdOpJ%^B!g37!Y7IP&GRkWxeOx&p
zAav&V!|h_Gu!yO3+fweKjFZ3yw<ZlQ))WIyQ#}WF4<|`}+W@+JgiZd4xZ{vzYsd$E
ztFDLfB(KwpE-d`ItC|aIghv0v<KbQm?95>7x*Xa8<OHbmR7K&OHddB?KhfMx<1^`H
zs11Wn-75`$P!`Rmg};_2#fqnX&2!y*f^s4X^^xDB(9KNATha;lc(5cb0{^go+vN_m
z*FH@oo;qluzJ1v}Z56r`$$YTHL-{(|08z>#=<7PIRg`hv6qr&q9^>#tj>O>BTlBQL
zv$m=M23)_j`<kRZA*T7qf$=UJy`Q8XZwqac@pPk?<;}s-3Qd&Rw=ajPd^TaFUHGMd
za_c|s7XwR6l2)zFQldF!U8@?3ywH7ww&RO$T+Kbqzs$ObFy8J0(}#U-M+xecEsPpu
z>pzc0vB49DB`#F~V6M_;Q)#EX6>eAmYAFz9ht_8;_o20UL3`x=W1;PEPQ5;cY?Ou>
zcg|wcO^`9O-wn7xN>?`X(Xzds|IydGJZ{(zxNFALGjkG~w5`Q%-`+O?;8OClIUL(|
zXwqIVE?0!}p()YKI<EUBl}TQ^?|t{C7xSl`|L+u|Y1&*>uE}cc!6mA`aU+OMgU?=G
z3xgurMr)9T$yP3}uYQo6h!!1Z(41t&jJC~7&HkceT^K0)BVqq%ZC^+!!Bw7?KjDFC
z_`np8u{P(EO3@s*yz06k#Zpx|&-kT<!yie1p*0NwOO*Pc6<~oZ=80U1t43cMNMn+A
zed(pd+uC>=eGb7sW8eD29Pi`tPUrT{FEvD>bBdeu>YO?Ma-IwGk~v(|bjm`VXN;ik
zx8UjrdRYTr*Io<d(4504!-Q(qIuvU@`VmXt4sUK5T4AzCepVyU=EZG$D{XXJ;Z$=E
z&WbeXuSUw{UEi#FE)G>-3tenAR-b|vAxRluym#rzs20SVDYiVZeR;J3%n`>c4l9ol
z85T`=umIf=EQgJZKYJveW^!t_;iRf_A{?LBeUxDz*R5|{(f-lqs@hTlV=1J26B~A=
zbUidzv+72)Dq{U1UE|%`16pO%h7URRpTpyd)Ky$Pbrz#-bWG9uaSZ7OUN5XEFR8Ig
z`FD*P;#aJPPwo}$i^7p(5`HZDnODCcmgbHWh0<)Lb_=VI?I}T;-y>0$;IuEOw~jPe
z*UNDYLv(AWV}B3gXvmb_l!Zxaz(IMR`?OaYJgL%c)an`1P_;PgFTChIyNYrVTI{rR
z(#)JW9Ni#iRKK!K8o3vJ&Rr7d(C4@#uPCUt**CAG8)mFkl9JQAnE#oBAUO`+3mPXZ
zxQvuaRls%Wek!gFvX?XDQv$@gQg_+t2^xAA9`o#wZ)#W!r-sgFkJ3fT`D({t>6&?W
zB)-umJO{n$C0{6^CuvrqkZ|j@;IuvtV8@1IBmmV%sRYJta*sSL*UoTm=x69Oh1J(j
zK-L5kCa+J!+5O*NJ=^Ggy_nQC))ztYDma=djmU8mo!LMgj^TDLP}p3Mv>^<C?~Hpc
zzibeQw?7F-D;x!3k=o>kkyP*Jo^mtUeO!EI4ySg!d(PK@K@sW{+dEuQmB@EL5i~LK
z?z#a_lY+nYXIQnk(BJ&B*O~xY$~eXY>LD$>LX>3BsUeTUvmlhJuWvV}R7lMpnptk(
z7mN6fL}j2SsUo@Tfc@cays>#TNx08|y8lHU(wH1Lc|86%<ZoJ};Qc#E>3$T>B=mnT
z`VZhitFP%LWN3lIaRL1izaMi3yyvuveNN@@8p6PY`z=tkN<_kB+v=jXcEA9PgtA{6
z)05cT9SPj~TZrr3|J>G`G9u4J8UIHac%uv8=KV{9v+J&=z5Q2EfB)oFIp5D|s=}E7
zA-2g}R^j4bTbrY2Je%#g_bV^54s&dE|D_NFM|OXn7W;WEN_;L-1AkOk`}>s@mT}|$
zJ9V!AE<$A3Pk3=0EAr*;ATTXlL?>|ElCGCRr?Xjb)gE8paev{}jzEC4$^3w@JH$W+
z+X>xawQ7pvuC2dM&fKc=;F!QdcCs?9tYs}2)XI6^ruXO=jP{-1IB5$PyTOMiTimoO
zUAwkvYPT5DKMMgGGz3J}tzEijTr7(zD?zn|mD@jU438qMJe&0cE%>TM^ELJ&gTsDH
z6Hg1i^`2AFni2glyHa<pl{96f;h8dEFoi-xmoUx!@6bbJ;6J-r=4HjzIWKP=b0D5N
zGFA>8ekDr$(D?P6fy^nBH!K3QKcPZ>dAS-ZYr9(swjBmjl>Z7%Og45{I)k2{rbT5Z
zO#i|^y0Kf&<b7d&knMwir#gfz9q&Uz%dmq2o@0o~S=JIq`ot_vuUDp$v{l)ve9T3T
zlrPKF+iY@cunHrju<!rjJyhAuayh)bH|NKcPL}+}qa@c)&QuWjb4xEEDJXOp_5<CD
z>Q~`Cun9wo2`)+NYL29bU*|}Z`4ac}Nn{DLyR_vqil^wb!rNGSnmPirwb*l)ChI6O
z$6fes^p0|@iYkn(-$pqq<RERnLvS}T6}ZM^iwfs`2+xd8+v%-+rngl$BPC7hl)~!V
zb`!ksjx0B)df-j9LrBCMR!KU{_mu5{u73|ZbrewTRf-<DtC#m`vZG%j|HIov$vYfI
zW%M{QAkSo(?6o^LP-blcj#MTm+ycBdGv>%YCYuc=1^V(kENwP22QR(Wi{Umon{^k?
zL$$l>U8Z?U86g$<rB%WO2-WD1M?v@3@vR20^L<@_I*ppKT^AwEhQS#{I<l&SH;HC|
zj07ztrAN{*l@<V=Dh^+Ymm4>MEE`lkOL$<9>7&C6*c(NdEvoDz4TK!4@67~}rIXy)
zcPuOdR#)4XYezY{KZVQYf!9r(#C`J;8|Er&JFpD~<5Y?THs<w)UR55~{=H9#J?Ahh
zyJ?tqDb&JF{o~WOX?&*)1&7}HSp$Z9R^L9Rl(->X6q1;bvt*PK=Kzp<8D{hJ3rwAB
z&cpyNL|#)RDIzSIBn^Hvt2L!>#k8GMMmMx&M=2bqHfud`rJ$QVG{ss70ucKgsU^-B
zcf&GW<LO6u_tCyK!C(u0f5`$Jc$EanxW@a|p!K3A7e!EENn)I`pRRVV3T85PKsGT%
zM)2~+;bN<~2ySLcqMFM>peD6#*%R5`eW6bQD=&974Qq^_l{D4=vY#Wb$xa?~dFN^F
zt)E^R=`rKo#&l>xgA)bF{Z$rALL^EBlBxxY49OO|ObzRbbF@@wVUZ>qO{JNm)AU2V
z`4-S1T&kA~`jd=AJ^9Cze+A-sE<#eZRSs1LQ}WgW7{VO;R^NB=`auNl=pCtA33C(Y
znk{`n+2tcd)#yUWRBLl^>$#Qc`pQYdUasq1FA1?eyNn3VQ#~@L`L|KphY3{sHVX;m
z&OWwRlF3*KxxG7;Ffb=;1#3u{HMo5i)my5u>Im}U9T}`N`m<%GvfmWlVb$WfbRE2H
z;k9UBiIYXUN+g*kyGaWMFu80!pr3G|5bD_B-2+X}X(jf@HH)5qGF~t!({|tt(n=w6
ztI5$(qO;cVaR}PKK+Rv!SB**)K5O8A1@SA=>0U|S2gxJT+VaL#g{Zs9b_BF~F(w)|
z@lvz7m6V@#Y(40Q-VSXSBu#Vh!)E@rA8%LoFu6Cx$GspCc=%8NBkWwOk8sU5uMqY}
z?J>F(T@%F%*<r=MFEdJUG4*Nm{>I4s<1*UqpbU%OWEUl6VQ{~YbRxwo`-B|lM!E~G
zP(ebx;^U0MAjU4_Ht{n#2|m!)NSN`7fVKFv*HYWbQPPDBtSCalC^qjf@>tp)SZEK}
z?rHSkUVwSL@1z{_?@6#BYtwuD!D>AVrdMWY8C4Kkru9N9<7W|3I?<JEn3aol&MLXr
z+1XJjS*65~`E*&Aw9wWdo*n#KQQimhG8mI%AXroL?@li%lT7V9MrryZ{NHfe+yU#(
zgXWV3YDLckbD0cMt)4jJ%s^S%RjtyJD&Gwn?cLAt7Dt90M<xY&xlIE%{&9spq;$}q
zO&<&NTf<X;%TkjIS5?y5zxj1*_4Ahswfty@+S0%I7DH^2H`k%%v$|R)0X9mf#HsU<
zb^|WBkl$UyPgoxWPlDZdgNVEPR}Q|__$2;3ggyetV?PQfdbj3Fy71hekuOh}U+y$!
zm_RuokMWA*a=2o_lD&I;Q5MkNVnlj`@Qn}eecl8r4;<(n&ST<sB3g}Q_4*0B>J}S#
zIGYWMOr&RU!{^u(;~fu<Jntv(y6CT9W@3x5%J}(-h&+PRG?<~Z*pBuI*-r&ydjR@t
zte6Nz#4#k2-L5qpP8=h|<uwPg_%a$q0ODUzdJ^|_ETUaP#zHF2Dr(^wO50LQsFuM=
z{oFxFZoDkk14}_ZR)hHbBA1Y*p4<6VrYVTQ8W-HRZdxQ#kMkQl2;ae%;U@K61FVwa
z%6Oa-a{!<tF2o0keepp!e24Ri1a2kJ^uK96C-*8q>wlAb$DAdZ2SujrWHG?8@TUKF
zjTiFod8WVs<HM;LAi1~%LE<m_*pWEIUpU_F8qfxpoIr5dtvtiIDF22N(c&fvX-wQI
zDhBI97vubGMPm1uTCrdCb>1n3`|8fkYmWa6UoN(u{XiO-ALn!BeklLf{kBB^=N?<X
z!;QWLu)T1a{_nGK{z6>S&~KPTb2urIU2EVmA78|)(h;YveG2+KkIxl}R>f#mW5JBV
z#x!~+omyqgUZw!@Oxz4<;KHA(d9b*HeWHPrE|6Hl7r$5Bkg8NCi%HYu(HitO8_qLD
z9>HTSsps{0s(b`3Tw7+}gWVECRUKaa(fQZzBIC#xbg)5;&O9kd^tn;tdM3*}5?Lhx
zoGwf$34z?=UmxX^97k?lXg@pW7u#C8-Qq31CKmeo<Nk4Ak3^{BsIZYyrbfB9Yx?F`
zgvl-24@q@@(*wd-O|I#4@_XC4Cg0gI2KN!^5fU2NSr>1%zc5#Yg$j+9Mec8pJ`LLb
zjZg4*;5Y4zPH)MV8wTob59Fpygtr_Gdk?<0u5QHzNyk3h{sD_-guUVo4AIS7j}n%-
zZ%eeu@pTu`>RTR*1&VX)Z(^nCk;fucn=#d#vYth%Jb#goWsbgI^DYhI$D)$w$M2eh
z4Soh?j#f2S2wM=ChF)?1W}@TWW9MmZ)C22bUbKbIH+B77EphJop^tl})j`e%?mqI4
zK1J6xCwuP5T>)%W%@UCas|?i<z`0`DXie#VNk?c%L##bBVS#Es^`{WQ7D^NN<Gf7G
z%mYuLTbAIL5y6dCy$sEOUL1=}R0pEum8&!ohuU65!jeP>GF^+fTcwe;-JzfVX2{c`
z>ET5i&S)(ElsWU?ssT^c1YC=d3DZkuRQqtm*lvJFZjz<ie1pH8yI$3WXLvO&dPKqN
zX_(tU7ZH3!S(~%C>2fjCwL_k7Xf|alwQ&Q-mGT@$FRgzPH>>$1Q~LDK+<tdfsy0Q}
zn1#|wkbm|T>}dSO-@EDKv*`!;9d2P|s+kQYpCsG$;Hj^yrh|)%d=`GsBTJ`S^XL#!
z{`$h^5(si&?QZoJ7ROK>IjEb}H<+wVHxKm3MMzvIq3<pvvr(mtEF%hjXj4_1jMp)4
zODfTco;q3}%<d}OLBFcNcPQWy#`jD@3SFekQtdi6RMo7}S!OSR0VJuc{)0xq{8Qzs
zHd#2JdCr1&J*Vz-l+uSuQX^~#l5ahswseSO!AX~cg!B+4*Itk)PC;S<<?ss_k-sS@
zsO4dDWyNY6zV(i4$xx&M(3*Vr>RL7EEOD)6v^~seHT;DoT29ieo07_>@bgFx%M>3$
zX9%^pe=bR0cAtmvbfuNS^?1k__7dZb9k_Uy6?i-zjkR>Dxo!EQTQXoN+DpYe9Z@9)
z4mz7QoDJFzOmG1!H_-dUj8Y8$zcVALCN6y4qAS^Pxfw`j69OMVo;%j8nQ1No)HZRk
zf4Qh&j+@z*_VMY6Tj=ax9wN-e(TWWvI4e@aCl^E4PX`QS*<TOb;)^FNtg?mtGe7ZQ
zbs$R#-P}zLE*p=A=qT5_=P=uS+=aSx>fGOrc@(NM)QWX1(SS!kTvQS~zFv&|dlWm;
z#@HKxb{(HiUR91-qm0qJsIlDo<n)z4UH%<Ilt+vt_hyW_1b%OdaW`a3D<x9nmlP+^
zCi30P+|`u7{nQjX4$$74|7Z1iwYjKB6@4Pgo1n2R*b3HP2K$met~%&`2(HvHtN3f8
z&nWW+OIB1;K!3KL%89%bBI;1gX)~9i6v;4EmCg;_iqIqJxsbrogWYwv&u;CiL0KHY
zgeDz^8i1SV$j`k8$o5{g7nw5!Qs%x(jD(yTOR($vMN5Ag9-g#&0oB&H3rS-w&iZ3<
zF4MZ?36mrti~!RUv&{lw!LK~xd4=z~+K2>}oTrQ`X_(45<=cGUduKkrEe1eYZC<Y6
zSn<-r_B`KB((15-m|&@qJrG9I;>(oc=U&$(qamB@S@}55T$&ZM0sXBpHA%~x56}*D
z_la~fg%hCd^bCCfD+H(y<9DCN>tL965h{Vs%L$GCL(5I>Q|Kuv)NlA{h&Z%1!#9Cz
zq}uZC*n>Y;5}awec`)Z9_~zKqxDy*c@NTU#p_GC4V{O2=hvV_P*DiEDA0nkSX!n0D
z`?)b&O2Go?KNd5I9tTKI>gH=#+{$6q%xqP^NvmS*55L#sQ$i{xl%lPx8oz_vdlRac
zvaZ*9$%m$qRx-5u?a{qgRp!9u_S+P+k0XN*Z!TH-oo7(<^)goAMZk-$H3Zxbm*jh2
z0{BC3PK0wI1<h&4!A!t4abAMtWAss<l}@D=C^u0N%V_-hg7-hm)N%(27t>!1d=w=w
z;_z@FP)0ok(929doO{?5dSw=!H%GfbJWYgKazFQuWju{05$coEC-A2b@f{gfw2T?S
zgKrRi%iV534P%2@z10O9I8EkG7^Uql$e!Dg|AqH|L6a&UMqfRC_k-7|JLmNw(t^`u
zxhCl-I<0FBPz#~~Q}sRoS|0-t^&Bl)NR|in_~IWIgm&2N{-$wjb}&lcf;|YIB5{p-
zx&DghXgby^sL0q9Txj5@nrHV)c<VcGnnL?Mp=~dSfP+-2A%N4XNb(RTiyod$BRC)F
zVz*`R#=WC)={*V||5On>V0Ub;bps^E0iP!mz`UuPjZ5Qc+DQEGA`cvEEVh$>Sc3%R
zN$@W`Xu@R(C|+c~?s#SgK=&bWi-0^SU}ry)F7&;F5knd=lLWY#TThWe761y`R>zI>
zh2xHK(*krS7mzCVZT#JHeXmKDjbm#U0``>IiR!)W3pK+vb`OF2X$7&e6o~V0))-CZ
z*S9-nH*b?uC%uiCoK*NYwHIYoR7^l=4dBEq1g8WJLM$9B!3CBADA<on-Z-Ai!}QYo
zg?m4oQ&z11VA4=b^wuk{5!6505Uw=#zn0f>o^p_ymY_jyx#~g>Q_7!+{fdH_+YOdg
z%&1hgcQbjf*imVulyBt=T%Iewa*`gp8)KEe|2Bt+F)Vis%Yehu-|H>c;&6Z!mHUj=
zd&^@L<=3*hD+*Fvr0G0dnhT~*(zNRt49HU|3JwD<(=msQ7v*R}0r^`m?P5V67d7ar
zq`Ogz#$xI8lElpF**xS067zWTf`-3ss->5_4ec3VKjr^sCbjRd%X)HfXD>oe=gUR{
zI8U%dJ^41S&wa7vTHM?{kw*I?qgz{Zj@ehxdFnpo*`t<uw&{ngFV}UX)2@A3tYv*_
zj}vP*moyCdliP1zXa2%Xue-syRt<?>HR7B=Zcf*Lmb)gXmW-mMzme8VpckcR%x8#I
zfH6!<%h5G)i&ozE<o?RCJYE`MC*lM+2OJvbvk4*-%W*=NY^G*os*fI49eJVK$EE7|
zipt)Xg0+X~tdLO`jOcR*83A5##`;%~Oao%>mE3aHJZ$!f?@=!mw90u!s6;`Z%ish@
zTz3Oo))2bJcSRrCZZBH4MU)&vg+$nsj!N64&NE3cEhtM_S|h(#b>Pe)6YHm=AFb8E
zmpexh`5Vnv)|NzXMWO^=o+lbqrKMxMxwjrg(w9OmdlyUf4v7<ZB^)^elV#m{4sET*
zDt{$X1}+?+-KED}R>p00sAe1Y)!)2TkTs2><4Hm!SW`X2R`!%)SQn>Z{t6mGf@Nvb
zTo|0xu^gUn(MOb|F|ANtd%G2Vk>jbSDjB6fNRV=x2lMhx6iWvKtEWYbe;~!=c1wu`
zwL8m5X^N2~E5$}F#0J)SYI=Iel}67RiM1MUl#I?=VNePL-pv`0GSQGJ3hMRlqHi6U
z60G;s*3i}7>I<~4cNd4bz0vGOq-dmSEQnSU2fN2GIhxCH?q1aX)e$7tDt4>j$221y
z9XG9T3{S0EYJMJ3lh380SF@d+^yL9&98IjHGc7be=i^@YRRb+Bq<6a3-FTlZ(sVH+
zCiSyHBp@~Q&rv{Zo#`em6=W#_*yY46+N>oftDzKyOfOMxo(AsBv&trYspUG;yTjEQ
zZ9eZ&-GzvqnQo}vIPV7+|4q4NEj*6G!K6$Ie0N^dn@;cCxKyuDX|O%;eqB(pVlv?l
z_m(g{1Wbrf*+D$Gx?nm%`l{(biTzwvig;#6*Xlkl{=W&kV-fiVXBWDjqqyJHw}OwJ
zVjCTpw${yVkj3NNA{}UtcVqLs4V79_&An?MJA1#nEyCB^hpy!tZR8fBrCl9bb=BPC
z1!s<cr&ef8`mhqyo%}-$`zkmeHP6ds2ft6Qa_HjX)9-JsL+*}ET)5MuG-iyv%cLy@
zyBDH0Ey~ma>w_oe$XyYZ=B&9o2gFVoX_NRPE{n|`^@koTFJE?iUM{dG;-tuUHseQo
zs+lt}F7ulYxGf%|+l8aECTlnqKTgm1s38_YuwUFO*BmB1sqQ*urcj68;qz}?HYOj{
zf7@5j`@vm_G9f^E56|^NPt-%66_p)Fka=~4s}i4C+1e4hGq$h(z)_HYPQ4*b;CEi}
zHSLv0&uOW>wtxd7qGUfP8v0B|dytUTzux6v$_k!wafBs-(%yM5Co6_D)|mk}7=>fX
zO)mX5!WcBY_sCp_rlvSp62(p81=1NUYHg@^wCywF8d#i^{%zD0yJe{w0!U?6F3>HG
z9_-<?RMN64On2fMW<%SlN6*sy#{DmF*(I5Ziblani>ADidamySL3F^s9(m(R4C|?}
zITlYaOdVCg0y301by_HMGpEjlD|K_1+7td2f04*(wXmy^d@2)X6sOj<qI>s7rtUO<
zauu8kU{TJ@X-jk_(Un2`@|t^&p$bJB3qiMFsNp{k*M3e>u(F+=p%RXtzmh((vH?v^
zj6{!SAKsRUEuktCwOr`GH^X`7CO@)Bd>;f<dOK6*XgqZlAD9&M_UT$lVKxnRQOwmx
z0UY>U-xT-{@FH3n1BDyuhieei-PB^TM##xzhW77#W(ka`)*g4qb2?&46!ZJR$uWl2
z9MklQ+tYC=Rp)g7V-izb3o35w?F^uFV@31ECy8{UGQRfHp};2`4w0NJ7u~Th5znP@
zfB=BV)qD00+|52}r5-Y#Cb9xTP+g97r`k;t=8SwwW9kM6QOP~?9oR&WhF&W^^FV-}
zSOCBcMLYw~Y#gsR-}|#$MJX8D;a!g**~_tloTdUCCFH+~`aQ!L)=y=a_!7;UUq1he
zXI(R}R+0NbB<J;t<MD&<qWI9{E9Y37>mu1{3E)M4cun|bjvGEpA`_nP4m<fVd$$Zn
zD&sQkqxqHLh}&N$Py+Y>LE8&E{N;()bQ8d&)VCmfVM{vFFT(qjix`;_9viNomXV~=
z2v7}mH{I|TH>9b(9t7uAQl~+EtzhZGNu&Qaz-Pi;{@Fdb<RoOFCtPM=^&K9o|Gj5%
z*dD@COOTp$_y2bEBJsd}Z+Zl7i4e^sBqW1Jy1~gu%5F>l%uYFtauYcGKPM#1W&gbL
z?b>LuD3OLK=M$Y{f3BB{`DtxlxTjdjE7_E9?x8Ph97wd(lkZ*)1}5KWq+Q5<XbDC1
zwWnymBrYUh*bg~v33;Rzz*R{9?F-)De3%wMXA20E#HAHQ<HtWnH9+_!>?tL=b%D6X
zrq%@R98&PV;|kcEO7uN-m@ZZu)bP1F;nwnVRg$bm*uu$@`t-i{4{iVLm4S;pW`AUd
zF{5U#+dIS55d$N?;EoJ=s4<~eZSGblB=%bp=XQtbJDS8CaS}~7QF#m7`m4>Q(vf4m
zp#SjtN2n42B0c)OPbM@A%=wXR5sdqUMf_4n{=lrZLeuYIKmH|pE2)2Le7xiKAD$Rd
z&R`KTL+ye0Ufyj0R;HNek0X62ftIY9dfMLqOp*R2wFFQx*U7>jWqp6DQ{NF$y{8ur
z-<i{F;^Ry`WNmloEmAgNoBN-5G>bW=AvUir5hW+_P(`FyVYT1#a$A9<DQefm+px@<
zlOj$9@6M|$j5h$yR3VQ*RV*ajH6+60jc?<1D{N}KT~aj8>AVD+T@@W8d}sNJvX`s1
zkh_OS>Z)5wTw-E#mIvT_SRJ^-JySr@a;<p4J&aT-rFFmT+ld%lmy7%tOBPKk^EdwM
zdg_usD2g;MCRE5Z2)uO?p|fHg>@5yEC(tkdU|%m;vS%pRUBC1Bj+!ysq||0B25HP+
z-JAnmPLoJ{$H;R&HMi1ib-HQP0VUkwDk(d&$o0U8if@fHc))}y9)bk0VP#Ue4ecw^
zZ{(Ix;tu?+-9b15U>;krYB`(Nu?K>Gs9ZapM5CIR$3R0d!h;nL#@|>pxwJg%Jxm`T
zBiqwrq9avH_B$y*^evQ5!>A0qgNv$dQo1a?fTd@fjCO&shA0<Lld1Y0$M5mLz26;H
znRlwp)v%-~GrG+7EY>UoAqiQVCwg_S%2rcLraRG7EDR$eXKADYi<0%b<2_^{FaFeg
z|9Wnd$MG^xPj>ujtX^%#0H+^g(6qg!|0iZJ98EiSABt!_n;L-w`?glQ_^$wLMy|Q9
zE+=eHrc&#byPNtBTxGPUm}l=-PTlr$ecp+cot&87RhEe-M(rdnhY;rKr1yR{fTL|M
zfZJu`Sca5q{~FRWF7QiQZWSU-R@e_|+<%Md5@K8UxFTcvDl8`JdB%{h)lAP*9XBAh
zs4ng8RcJ1sauX*uar*2XOUf>L%3!b}>t&aqRaPl+dcxV4xwq#p*^1vs0Y8UIgr-Zn
zfoGKDs*5#QBsf^J4K@6ld&Pd9{n_2_krQzKxqD`gA(+Kt=yp*S>dQS+FdMReiad}n
z9vy!izLHYA3Qd_8?M&_tpR4e2wgzp#(iJPR$<bX1?oIqW>K)yd*uyVaEHo8Ag-(u9
ztK@y3;OV^>5SmEKZ(B-RrT%^J%uxVh%j8!xc=n5hGc?NW(bG>+xhZ8veF&Kw&>eRs
z*?pdm+U05=Zq|kK=iGVRE8q}YDS9_yYYxXBuKa`f{=-Y;H2M$kuvl*RQ%<c7eD<@_
ze|RM~wx?{T%#mlNYR3d?4pNi65~f4Yg70Vh#}jF1F2*r;m4uX%b&xyf@mHEq>pW~x
zO94qJTb+8AmiBP#`u*bPDIayZVj;dQAE?uG2Id_`{1wo%>LHtcQ()RW?FRq<d{)9g
z=tzS^DSPau(oJ>T9~_$JbnlK_+|?7b7u!Kh4jpogpzTo~{aROMgUWj~Gs(_spH%+q
z0r;rJ1n200W=jsU-R1)|=2q{oZr|59@EIj}a_d!~Rw|zVSc<m%_aq;!p22CJ?xF53
z;BZSxi_(Uzq+^6iqug@L-VkW3g6J%BGorAa*;4R{u>yoMeUo#y@h?f=92(!mHiATf
zT0&nNm^IWOy+nN>q2p8WN3$eG^}RKL9#JckPXd!t`;Lu1mAX%R&D?lPYC-Ky)BDzK
zE7-lr_%78UYfh%bDwE_}9K2}4W!fkSt11p+%;c7%3amj=_%v;Jst-4=wvWcUPCBW7
zOuWR2)@WxPgN?adN1DN$lq%CX<Jhx!ou=%xj+g$=H5A2r6K7h1`n4C$uJ_mWLIgg*
zxa!c!(>^R8eX0FKR$)6+*$<>5)KFCVlLoI1tWL(*zUoC`&5r7WYOQBe#Ph<6;%(GM
zzDgfDJC_n099PMoRYG90Ax+WHT@2hu^{udj0JH}*>zFjDp~aFfMtPL9%ay0j)%`Ip
z_@T+&sRg<vGx~ub`|9pCjY6S6oA$+1ZJX`uPB$Lix`~(q%R>xt5pspH(dfzT<+y49
z7^WpWX*L>d(Np!;Em^;|h7N*H8d5ivC<`<CCKX%z!8D*++t-MPUsk}^Uv{9hijMZg
zF%fQLI%>A*lWyc)5#HnF8KJ{5H+mo*C%E*_ff`f!Fh{GIHucZt2p7$?xB<zCNq2{W
zh17f^t(wk2t$^PMxN6)lVhvzU`D;1Ik<UyviH4SPn-b)4_FX4mVD)jdECVmb^uD2H
zv5~%{Yi{-krwezdMR+EmHVz#x&|4V!8p)XT9xz*sPf?DBnodBQ7<}hHyz0@j`(%$c
z^?#kvHb_e-N5nkywk0_g-}(DS0Crj=R$zxX>UFy;fYL@%i9&iKv6GYlBXME$n8|K7
z?uDWtca6d+0tBu)2B~?};WyDZG24yY5dF@JOFhC!EONU)?pL`r0S_Y-CX**Q-#0#4
z%x#b{#let>lo6AZYp-wVA#dEbpW<mnr{z_|o`#oz8+dD&A8=XQp<zqyW@qG2v`H>?
zP8Ha_z=@|B4`Z+oh>4W>FnO{04O|S*z+OaN4WxBf6g9G5AWAGb6^|F*z+)K#Wo%wt
zWHds0Q-+=#fsW)UPQIp8{xr@o5DD(b@tpuntK?=iG_2)LQu*ZhGTJ}i2K@r1iY~N)
zBaZThCGGp=zM}D084iAez~gD=&BAX*@pZW&k4Ef{ab!YVvLg2R<o}H$a$h{>%30;c
z<q|Xb7f6~UG*<!i;*$+{R=9?Q7%u&Q+l}zxS`kUnsS4Ywc&(|kOpl4^@$7~rxt~Yf
zB&i60c91GsrR18O{wG_@c?0A0-<p`bae3867<d!*@c8c4+RXKb?$81XuAT?VK%bH{
z%8aZx4gDlvKgE87@I(^B-UE^%r9j;|15^>9r}PwZj51{58~FcQxoAz$;ldUyV^R0n
zE_1LLY1)pPo&Bp{*NVRd7c{v6+%DL+o6RPPC{3{?@u;FoD(xOik2d-x+*{PsU~?tx
z6_2h2Bi#5uyabiY0jc~F+T4wI<=RDRT0w#1BZmpTxhG&5xHVOqE}wzHtgmSb5-g}=
zAnkgjs%fBp&Or|gw`@&xFb(9Lb+mhhN$(-`BC+;7L(TG{y%LZIe2;)KUqlbx8Tcl4
zsRFv66y*8H+_cp#gRLCD6HRTIXp?X~LglvCJ75Jy1j-ysgNl01r_&K?=}=#paak8T
zqicB}*Pedcs)EPArmT*4b|rINveoAFBh{yn)18YkxiUnqj^V+hei1jVCP#NMrCb9~
zZ<9EQ%CA`P0IDg^JYjvGH=Ps6Sz2uz6sC8Lnz5zOYkEgAu&@eEUtO&3*)^)|{bV17
zT$I)M5APCTO26|?=GqbDJv9Lpr(8L^-OlYC5QG`k&_uQBnyQ)xSIr4$tZ<mC<I)wr
z_htjC-qNC(igA<s>JLR#|7_=NP)~`wUue(l*RaDGvCxM2kPCM)7dVxUlIjd3Mj9h)
z^*&zXySO~>O{j&8H38DTdU=6I4+m|0RN-CJ-4~o8{YN@IBX97nwvW(||G2Sy74fLk
zm)&#G3E1K??$|RHA|R`wUmuht{8Z~#XIgur`Yf@+B^Gy8A4PY;ipzJ~VixD`vE^N-
zjI@z<6KOXPki+ikVoKB=(`%e=Zj0~MN%ibL*GEltj|b{}NbmiQ_d}F%(QhaE8_&>%
z!ndWhiPlOI+z%`cY<$*RT0}c}v+??cKxr^7^d#?+%c3WoC710%LS6ja%g1r`Z<Mn9
z4>72Byhjgpx;dG=(GRTGoose#x;95D0v#-`aW`{}-lN=7oh<2V$FHI#g1c6fLn+h^
z6dAxR+IpS&4|E~$B(0_A<&|8vM3~K!3b;#<zBU(s;@M0B6K>Me{wpCE1e{_RTSp#j
z?|mz!K4Y}6tN1t>k+H^H2?0IO&E1Z4^&fe`)TUKPKj!_AXy$Tcg<<1QQ;WL&@aZ^Y
z7KDRg)PERc@8NzZ*mildVRA4%o5b=8;NUR0HX`9=&gYY<sTW!rWy`d-QU$7XJg5h3
zIL}&SS*^kKgwrc)vcEuqHbL8+R}4Y#V@hkl32!jBpx??#iO$IxYxRCSF(I%_<wOJp
zyT9LQq)ChpF0q;Srjk>6<<R;>E1^+0Q?p|PlD!uYi}SZ2gf5C{HycoI)0Ea{az$%y
z9qE9_vrL;`)1Yx(haruj)uXjpZDa@;iIQBCr)PiD-7B5vjgmz36X_|-LlPMcG{@xq
zowr1@G{I>lP@-nGFS-5onv4f*pZH$gkLx+Oj`fw7tA8sTGSYPddX~m!b-Ld=$?lE_
zE$dAe<TNSAO9%zUUwp{mUMVJbER7eGh?~(tm<mi#Us&01gmLFMumO^sFYdFoyw6^Y
zNEKG+8&oYXHEWnKRIi&v33B})nRtRhKGu;3^N?7zj2NpJo4njAGuKVa9y`aek>k$M
zmL(d7(rMeAwTL)XEpXA<xEz3f&_#?$pw(9ZQ&c40)24IR^e<y)eVjGTw54<q3PmTi
zxJp3(F`11eatva^dUZvcbfqfO-<0WG*A)FYnY%oUfC^KWti}uJ%TM?HW&0r(8~=9`
zCdEI?T5*`0G@IjBemBC|Z&7g(FtGJ6KHy6>%5~`M(8|P>zY&@*mDGgf<3MNYHY5RW
zcxsECs;2#Vp-%ji7~zzP^tX;(NTaKJ@>3|tyWjP8(ov3g>8H%nfDY3>w>!0czP$9O
zB&%HG6NCu9u}%iX)QEx!I(R+9K&rw?$+$(6-v$dY{*o*~k%(-^ddNmz3aaU9sn3=n
zv01^hHh~OMT|(X!d2_QFCMR@owQ{+dsOmyggX=Xgp(JTm@d3gg)HC-o_S;cMuPbVr
zH+H;l7R!C@JY6P^toY&@89c3RIxrK*kM-F4WGGZa4>IY`QuDY*b?t_+Ct+A$K}>wd
zmn;S7Zvr%#4G!9x)(Nu$gYxW-YdR&VPDMBTV1ChsI!OEUMb*>X;o9a38EETY#MjB{
zgd?s;fRw)AX}I=ofwvVGi?AvwW)D#g9*s?V0<Dw4G`pTD$)QxJr`z0y_a>({L~!(*
zzVVYvUhfnh_)e&T#4G%-wR!8Ih&C7fzyIOQJKAa%Z+*^JU2V8}#s0Ox<~o=Ai7kxB
zL$c`IcL<>v5x}DTLBWE=Q#nNvp<Y5#EH9wXYJ4@8jpf*lP>)_57}C(23Y!BDLSi5}
zwQ;?5j&WUf_sUJQCmtml1SrQ;-{J6nL3F^_)~0gZ`^AUX=rxTGvVbR;$#8<+D?AO1
z!vjt9uyPF!#>6y0urMIcX-i-p41Y3<u+wtCX4GDM`{^?BnEI*0uZ4)MB0f^C5N)I~
zmXu>7Sa=llcjzcORgr<>^StCWYh?1(0uN}jNs76h5@;cK{A9eEH;EuTb5NpxK^8z7
zSLC&TD8_rjj5q&hQ#rNRZm`+T`P)L5<1l|yV6#hXole8MPhZDzwTRSDw7lqOCJA1P
z;aIC!F%<3V>Is}jg59D&ZeT4b669DDo6#Mc(S`#dFUYuXfdPvQpxVs`skst(t!Y$g
z3_<U_0k}=cD?-vVoDB*>lj^%T?ELbIB#)j5j}&PN@ZJ4K_syp*MK?usu_93Nv$9iL
z>6NV}(>ZATj_mx+UQsh=^DI4(DEr$}$QgfWAu~OvK@$}&dV#<lU*x5d!X;R}Bp{En
z^3X&;oaHHC>=_79`r<JeeNF=>OU`>Pey?rIB2L5bu^;o26GBZbTC;nfQSWZjPH5u-
z?_T}ugS;|fRfDOW%h2$DRR!vk7Z|>>!<a8_wtninus?U{`?V7v)FHgGW?QNC>c{=Z
z<=WV@4JI1jkb1#GrA8k!?hW;MQlIayriE){(|m-eKOyy%Q>1r#!4lqavKt&MTOrRg
z^DlO!{<JS>iX~+{&oaA)i5w`fE@sfpMXbo+!6uayEV_HKH|X10fZM+aph&)w#-g8I
zubYVVnzZpf`4>`|BU3co=So_rN);m_E~3!$AKu8=QXgD&e~BaSiNimItL~{Cnjuj2
zP5lKoL-|{zro0dHFYdE59g8x{N3K!=)=3%YZez;)dQUL}OF*JpkPQ$gK~!8{!}t`M
z<Jy*@B`VcgRwqG-L_V0!e115sy+qlnh1kCu%;{n!Z^c*{+CcaF7BR%DDIEDpMw4P%
zWXWPj7OlrWQqIhz1t}N8PSf=|&SbbIseKg!0EP9`j}9+~tl(PLS42zNN&sv|piVt}
zcXK)*6Q!4|6(=IyR4Ze1C9-2z*=xeaPrsX0-{XPvLAd}{89V>OTbyDMsHI5bs5R~N
zwn@J+lfHpg%b)d9W?5%7<h{V$+suuBpA0*vIg$6?TU=e0>{j9*3Xm#(_$m44r>b}K
zacwN!8~VxmtyT>#((LO7n?2PxprWcAzTx!dbX~u$ag5m_@=#FuA1Qy_!a?u-0(yng
zqAnrbUc`s}k2ylq=0VG<PAS}``d2yp20Ufy!Gktlpn?+{8YFgm`2W?ShI(pt`x-@h
znEbiOGUGq5dSv0J3K_NRWMWww*igO{<W4=R()Vo<z1)y$_HJ+)nHC%#;M91x+IHt<
z?x@+;thqaBfJyP=+fij!AlU<or?u#^Bz8>z8O#qR@f~w(XHVMOe82t<6x2`Gie2fs
ze6E|_>4T$}m*o(ryZtsdsA)Es)6CLND0Q$@@V{TE^!(QKVn_?6qE!^T+Owx%S6TY=
zA7hE;tHaUwjkY`oI?T^5t%1}%Pf7=0v8^Gi7J-gxuv!woE4Sfjf*TogNfCD7-Rma0
z!R6HkS6!oMg@Ko4YOcR}7cHs|#+4el95u63JXnK4M*|%JA&Dib=Pt^HJ%cyDhPv4k
z+SGrPfhcMGJc0u(CQBBJ71Co-s$Jjnn>;l*+{m~Xe%B(@&*>~zjarVu)#T~o^SiK9
zY+vx8?SeMS={8?C+Z5k)d1$UkEBWJ|Cew^Phns81yEXfDY-_}iO2VTKhdaqq{@GgC
zDg!V(cB%A!aeOII5Xn(0NY?s({@Oe}PB<v(H~e?w+Ub9Ia-E+h<p<+p(>Q+~`RLqk
zWI{WkIb>$bnJJ-)j@<K`6I-c?flzMmu0I)>km&^5Naq+bAjY%-G*`pYYFvNvW^!-A
zWm5G@GO>qnbL>98`14)Omt99TJ@*N(&!r+mX74?JsL8w%NKNqR{fvDvA_A0+e0=Kv
zFQp2nXv6x}jCFd*3muEA1v(gI8V0n$0;uyc@u@l=O1mfYI@T|z8)A5+Nlc>sMRs}>
z1YLM1(xe9B8%3}vN1<^7oh<^AmoR~lqdX?<y;2A;aN5pq8@Gp<XSSf^5GiJKY3|Cd
zN?K5G<KWI{q6ibY2zCObmB3tZZuc+&{!-aNgcfbmyTpRL`m#})8brNMHW#&+YJO6l
zA=FR??y^??^9E_-(T9<!55Z5Wwnupt%<FiV9qLzU36OouoY>j#_0+x4U7|n2qg6N#
z7yusP#ERj-HIK3Slro{r-KhyJb`{aCa?jc}*&!zt%<hbM?BcIra3<3Lcp?g4dGGb+
z;rUn8p_%$rhF8MqhR}fO4lj#BQ?=U%ICHVvN)*3q2@>0wJgw4P2-R6ZUXU5KI$FQw
zXdGPm3kUTv$Pc>mK#zMRMbBIWjhrUcUuvEvdWoCSSifmd+^&x892At_exx42T<~BD
zSjL;(HP2w&tce_v2Ar#h)d`43%ghntIavOYo?v2fu_V?H%^Z2MSVe8*dQjVCKf4L7
zr6%9S1dGfT8wcTUst{VM!KL$Y2f@)Klze_}J>3z){YE-DInD2j?@`WFv*x7A&<0K&
zDV-n>+_Wj$tA1R9A?)CCm5L`H?`k{PZf-6K&Ui$b%h2_qO|DCQ&pm@p>`rY)H%6@_
z{WmB?u-T;I-{rqI`>HqZBTleaFNr>6J{=Pw?6Z>R*7`MO-@v|Q$MRd!{wV{6*gW}1
z=ngM;M^+PAE`Gpjv&aG>xc$ka%R%nPru0uDPC#$`P$2$!AlaKdR(0;#Po$f%JHhmk
zJ47eIu6}tQ++D&ABQgBx7{@qPz>!_ME0P)!oXn4CdYV8N(0^d{1qq&DxqDU4_YK8%
zI7VqA`pak$`E|ec8ekL!&^37zofd!eX&};!f#+{(8H8kwtq!0|*x+_K3XE*sB#-Gr
zIq|;9OEPBSi^NF~Jh0DCUe`HKJ&{l$f7dMhoB#y;`c3R8k|$D>z^;QNT5dR=FX%-n
zyK^<ON`#&Rhx&66L~vMwiZ2VFXB6N@Rugg2A90LhMKMspK#-g!;>Q5GaLPIWfjLFp
zDQ@dBtn*Pub5)ywL&mW|GL0gQ!A%mRb>5$pf;%3q#s1Il@VX6YL7nJnMn#O_Y$h!E
zDYW(+Q$k!{eC1?Y-Dh~$ew(}FAEH>A^|2W{dG$1AH}{R<w)tL6F@11GKF(&;YC%Nf
zPoeai13cEG%+-spIj7oWa{RN2GLJ+OL|6vUyuj_l)_}wM&BD0$B|!k-rX!0R*}owc
zzx(~_XfJQ;x3aIvhb+|hE4#kn?fX*igtYOiiQ;c>Ui4MMr$n{mg4IlHQHYq|Iv2FV
zf$)HwyHn<pTT;^CZD?wU7cN=)zpm$w%IE_3Q5f{}hR`xzgV~W!Lp|w}aG?P4G4@4<
zs7}|Ha@;WkUc*}TFGYhzX}as)Bik*ZC+EE}n)#;rT}4Lpg`#IZb-;xSPw#m1UUFB~
z)~f=;+PAzNPWIfHMliTO#-sP$lI<hwim0?6SDP(ngrWNQ73kaj13~xtYMec4zGIs3
zuV46nrvv``BCENb>4(9MKX(+p24`HNGNwL`LPA<0oYP+yabe3|aFo0~aplos@GqB|
z!f0>hg$C7^9WUgSha{bKYvN3{_-(E%T<qM}E0^d;D@)@kT-tu5?BtZJP^TMw?LS-p
zQ8x0HR4BmsWtcW-*+IQj5LO0o73X=-w;qohS}IyBz9_32g}=;K-%HHD?!2|DM5UE#
z1QYV=%yP^G8XaW`J>OZ7mJSyZ9d8fb?(n?ih+<<kjo6spmzbjC3Yox(!9^{BSp)ie
z=LLBY>WT>%*{90rC191o?%{uUZuDQ@i0Fmz;^LfpY$_fSV4R_HrM&4Tgf&l4Ttm;U
z`=Ezj4fn0CxXB15r650(8BRO&##QfOBqvCwP8NfOV0l6}Oup>$$Vdt@O<(sSc(OPW
z5=`QgYl}`o6Vj2DYtwFSTo6aTN@bso^RaDMv*>DzAteTAoAbLZ(W^x?pQkv_XRvHk
zm2!Q|L1!yR^H@aFLPMTv3hEdzG?kL62_I-_LOk1KD|5@ZjT4=kv;gVeM8R8`*Uz2D
zY<a$eW3&?m4e{GMKfDqFE!Ss08@%gO5<$(0@%55h^#324&N{5=_YK=vC?X&r-7vaa
zKtU8n59t^sj2=u{S}BPU8zrrDH={vBVx){_!juq3NO$<|`@X;TUycJf;P`BNp6kBu
z^OOS*x+FKci0dg;{lOZ;EYmW#jNA57yTlAT=;7}!3+mvVl6Ta()5CS4_Q6VfZ}3@n
z0ajl-R_;~`*FWfkd=tm)zCp>H%$KeRElhVxeOKN)9}>qu$)6d9G?mhug}hz4?gR24
zP)2m~yWK6!GC_$PEF}pd3t#t3Om9B^EEwv?r&&81Jea}zI$ySH_9oCa+1#N5UPfz_
zS@akNBex$;am(0!P9+Y}Xl2XXI=^YHQcb9}Pm0jI?^~;RVqY4^C>7sd0h-N>Z3P36
zzA+J;oPFHX;o}ErbYYe(EvCoZj-k>)rKCCJ((!raXC1pi9eCs<15Njx3x^d^k?6wn
zfRIxwu`cx|<tsKjw+$`co1iZ3gQ8Xo7w%;@x9m0{Ixs-w7wM?2?z(<2?C0K-jP%iL
zJH83pPRq;Xd`u%%ksD39R(Ze0_mMt_jF!SZ>H@7vK0lb$BJv=7ZAb@NVEo1?Nk_22
z<aPR`VH{y76DpgjIZe}6@*`px@-hBoq%jRB{TZ=2DWZ<m!ZNxoNPjBC&`|L$K0lbu
z_v8L61_UK<Ss1KK;1wPePw08i!^%fH{4eYTd-L_%KI4-di+KvhY1SFt3}|4A%E_9;
z0Zuy^x(SpmOQjPel+$!!Q$r>|W&^A3?2Ar5wENLL7F@MQdVW#ld?dcFlDRF1VKerm
z>WX1_Dxy;w=%P1I)iUfL{maj49t}8{&>Wg@^?ZR##O}r})}y+;tU=2{RnHan-vMSa
zDwdxCfv7Xt-Bf#ir=pG{<0{P|<-pl+sb6Ut*0#nOhB>d4<3M|5)MjI0e>wQy7KK_+
z`I-)?%D`=J(OwS_O0$?OAih1$&nLu7m!W~bqtMZi;Cvg7e`>quu2*TMRoQZUXY{N`
zCRb0@T>YCGjB@!O(deO$->RY8kD=e*X#KS6dJV<|hLXa0SU0mBE%y9jnIwz6F*nAn
zSx$O&p&2<iy*a%?Tb>ChgFvQC#cB~_Yj$kDssjGorQSv<1x!^*BMY$A<WUZi=91Tz
z<yJ1~fl^m>0iMP&{(R`gN6rHL@WO-`UZ(v_=b{K_yO*y@2i3WLGq;$ldtIh@2PEJ`
zwrlLR;$;gBmu}&Y88GN~6mopcEM!uZc-BPoS1gLo-B{^<)h~*4)tFwttaoVD#Y{6p
zlflh><r4R|&S&&JSvT0yucia@9kX;!$V$3@L~b*juD0{O9S-_1e#YgCJ71;%A9lN(
zUxMv*DpN%xw_I*3vIg4q8|}FPP~g354aOMn?Y*6&9_8u#_vh4pU*z39AZ!1VZB|*2
zi`rBJA-Z(-;E+9jsqNMvQ3LhY&lXldV+hbj4Pt85HV3gTLM2m>4gIm<gB!W^cg8iY
zl4-?WiDDY*;hUf-Juv}Hl^9FcFQE5~y!PT(@O4(TpU{0(O4w-&5gN*xzz7!n`bSj8
zrIhzWiuX%>=iXaqu7mFV`-BW60#<XG#}e_6XfITIRPq;Pu_KfZW2}iX%OL8gU?(+8
z|K+Ql<#O^h=IuK!n`4M34q8e;Fb#^JzC}{C?}Q6eVKcr+{~C(%ykC3r70i=+_W0SG
zJ(cTP2s6w-4|$(BTCs&vw#%G-*B@~YmtFaxm&}Hw{afEh=GH7iaT7xFRbS%QJn{7*
zyJm*)LvGxgUiYYt;pE_ZrXq7PhM-43o~=HZ&8WK%RaDQ(R_}TDc-rA*kL$<Bka-Im
zVPa*Vm*8!*)38ly)RGvZY*2=oh_cPuAfm<BOQel<V`+|oFoOw%!~^JdQe$$u&h9`~
zA!GB8$?Y?xcah#?b8h~(vcA5yb)p-aqpTt2;59Atza8;zl!K}F4Y1=bTeTUd;qn2y
z0q3Jp%KDH8PX|B1I3I~b&mja+^HDEaDP{d1$~;2UGJW<HBeI6ZhI39dutNy}NiM+{
zAww9z0Q)EjRoMgfUc1kJyJ_#A?@BmL#PQ?{ob?G#NG|UEBZ@e7KyCpzPj+)wjxcE>
zGQq5g-@Fys2#(-lirn+9#3Fhjc9hR6nY*hTLc=dWcc}({IH<6Sab=AWh!-|J$zw$S
z{kmbU>@|8A|NU99)wIFiO7R;5Zy=YB{J|=mY7AFFi(iN?*6$ySUI51=&T<=DH<jEP
z;*K4KYvsyOt)r-u|7Jh`4^f>W!D&f2wh%CtTbQzAo?2<5>Bw&A&HEfx!Kd=x1rcuo
z^o%eg?S2alR6#SwF+i+`Lu3r!`&s6j*1#m3yfURP++KO~bZC8=F}7JJ8HfmPuxH-E
z$v<uR#hg_aLEoz>(sDx}EEjvVzvI~zCo{LDOu}k+7fWZk-us)`=$;OG5w(U5j1n%4
zkrj^AN+sKV1sT$j4k2l4$j|?}lu3blqwmLwY#zdga|$Di6`uH3d!|X)zmb^$d>Yb&
zs|A$gIm(Or2FNV9ayYIDb@oC<+)&_<S1Oz4HnRYV-K^Ioz)UwV?o8hM!cqgT*QIwO
zTCIyooo4VzwW&ddf#e}F&&b^_B%Cu7ix<@KtREi01EPvV2=8_9ZVhRKRos_<L=E__
zIK${g7*77xWZV#XT2;fq{@de5>Z$q_mr)P6XVRWt{6PN23qc0sqDzJjRccMsH&YQa
zCr0o5F8{0mx+baxumXevb2Lc0X=aXJe!el&c+Y7{^i1_Xl1I^a=+AND$*M>8PjZnN
z?w#49Jim}ZKv^O#Po%qF_f97Gp6=oTtz?@w=H7IxaZewi9AN7U&v-lQ#Ty^0bRqBD
zDbd)FElNOBNSmSfTX@4m3XrDr0A+pqM8dtDu63j8*+dwX40l6k2M+Gm7oOGT_)5!C
z!Z^!#0468hJ^_@fH?*&!^`NOrr0<rpujsQ$(}$-4M=EYSV6n1$d9N6NCJ(-?(4^+N
z-uFnO6kg5H3^PbFdKjt;#aod*U6Yc~a4qL6m<yUAk;2pX&rL<w!!&BsrbF{k1}0&q
zzU?d?sci+WEt+*k7SJc=z#PQxZZpNv5DA#($JcPFi5L23rzKKFue<tCIjJ0No6=nc
zo|6KVpEYNx#f^*`m#VsR9rg$8bPFMw9N_2heB?$-pyVbznzVFh<zjjIS*#Ja4N!}?
zfL8RcTjr(5<lVG%`#~^L3$s7k9OgVbC<+V3^?V}X`&!}mw6bg|MsPepeq;Kv(bV(S
z6Wk+SsJXw>pYKg^!iBOcm+Lw+Z>72QRmY1fo(=_cEMfof2Y+Ga{nedHKczwHi_2q3
ztK9=()>fmBh)wuTZRJsYn}}9bRRQV}`nn&d<$<C7;Km-{?Cqs&U7h{&4vNfv{*Jrg
z%A})}4tI4{XB;=kjCmWBLhQ%zz$<X}eC0t;XM?^LS*^T>9fJYS`EGN-fqmAV4!Go(
z)dzJlinqI`Lsh$gJb|iAOV$!?E}oN?3YTte%#gRZh2`fw&498rOp@%*x>$<QZBXZ6
z8IUqZQ?FVFg@YP`jRki|j(CeSfGVROyPjarC7oE|)@i!#OR{~cRg8Lms?S;UkG88e
z{}CzJ2cx9nNk>ShM<4C259(Lqped|W{``0k9u-K`w7=9SpMm#CkkD#oK$x0Kxht>U
z%e_<z^d@i+i<<pNy7YM857zLUwK96NpyBAk!MZpX-Y4iZuF6sr=x&UkANBCG<|+6h
zPsuq@!grAnm^+`|2ruDI&b!X$iILh@+x)Erlt2U^^xL+>f34gxY>Q{L!^-JmvaPiD
zQmiDjLzqe!XyoME@4JkF6t($KSx$DCN&5Z?_p4wnw6X&Ft-gCaYc@ltf?DRl(Ka?r
zPjuD0UyMP?D{t!)oC<{Y2#(vk7H>=1F=^e+((5EMe{dAwl{#4VOfWM=JWyWW%#y`!
zGLjb1TCuZ2PHOXua^onY&W7m%Xxy^jH=!vVVJOcckg7blqIb0)*rQSM7byQWtUf~L
z$}0ugh?tCh_li7)i_B^BH`e9iv_Ey<snt>So;rl0u4^8EBs6YdXjPV1#VR1d`5wLL
z3I{Hc%%q=f7wcN%t<coY95(eP2HLmozdCsIb=F8cnmSSU`qr{MOmCY&(C$0G`+nO9
z7x^0hs`H_i^Q~lW0H(>-7qDi3DL#r-F#*&i>o>Yi)?Ex_>zjVo+%p17<7NHSP}VJr
z&CpKHA0#F|Aj27wMaXDB5fRJPlHmo-S{hjVu6?I52-6UJH`shtuc)SGRw|9=-i(l?
zK>|`#p?bz%mn_pbS7qH{0>|8}T~!#1D;q2_Gi@wLalv@^>w=oRezW1D48Bfon6_yN
z#eh>Yi@eR>Q9|^QT2<}#@o|g!<vP?Y)AJUvSPipM%!G%=qhnU;_;Cgk;MokZ#1u5%
z>$<k2ph!4>=eDl)aR_*whN-8q=0PcL?+23`mbP-M|H#6<>2$6cUNz06mH$oEXmPjG
zCZnZzd^EJwhmBQLM&3i`>bFdY=(Rs?M*26`+R$bLf58es3pFp!U48bL<ENysugZ`(
z6cdt>38o2S&1U;8`Q&|i!Q>7bj$AOG$MS)v<u#$P%$2n_;LHP}3#+%^8+UeKenyjY
zB%iB2ejnxPC|g8;;18Vk#?6TZc*<b@c-2jaV2#ftI5oQL%av=Zu2$JoGob%<8c5ll
zdh#FTM$@4nF&zwdVDuzUp|@Ob@sdYly)tZGuANIIamx}S7B+??nI%zy7h36t99?&r
zmK-XR?=eR_{nl$!Y%JBA{jg_o{dQgDC#tiNdRe9nN<jqs;18R*+w<>;dn_WEIJm9G
zh(`a)C9}84+Bul_Bw54X&zuj&pfi=-@6JJ(WK>^trjs$=CnX+*WmnNK-h2}KJczkh
z`5}r}Cn@ag>klez_eM4K!n$@YTcG*ZG>!i}R)YDD(6>(p!#Sm4h}-|=BULsks{|e>
zL68v{PA=zh`>nkLVgHq?`NJKQ4ni=4L%*JVFa|sre(T=#!36Z{zDbDKJnL@Mm1xCu
zN!3Q8;`zqWO|JgPhX%a2B3;4Wn)er7pGB{E3Xs@Dmi*3c7Lg|eil+KRN%2(T(F3%9
znj2+5L5iV5_0=HXa)h36=W$P0?<yQzmjj7oxrV2%w2Iv(;|M+aT>oH0lLfWRP^Q@o
z^XS}u|FtcU%3EWd@~-o~_IY9m;cxl>+6^wqi!KR5v*rJOCG?HFg!j)l5IIYC(9K6t
zjXs9go?7>`iWrLeYKCT96tj`OqkGFObHks3Izz&A?#D;y08uhR<$Wl=GxPbaXqVt6
z0_45NbcQAAh*ob>INYD`h)Va4kp#FjtG6EIjO%f^)EtgE9_8cJyLFzYvcliIc2uMU
z!Rk^v1yj?RXQB!3dzzSpu`%!0kw&p5bpbEXTmrGgS8Hq_G60CuX~R3fs92vYv#BrO
zwOcvLHEfaURY`?rx2D#`>KB?cL^UO{2N?a*QnFY+QxZgZnMna%(Rgsd@*1<#VkDQj
z=NfBHOv#ncUo7a!CO55xMH1YSuaEj3rkc3T;v?Sh!){k{v+|a_ciM?xqRh7E2xl$Y
zs%p0yBjam(xmNJf6EiS*_wTdiiUggGHAd<Z3&WI?o&KkC#cOQ7@VZI2KB9eWkZt_H
z&mD+g3<=ezdk<_DnoVhQ_BSRX+!D&T-+G<&FY>#kWbUxpttesg;%->ZbanR=qc?y`
z6f^AoMV|3DMm+(sp34r{843g-La#kzc}qHKlLz~TRwigTrJ^}ct}XeLS9xNxw2pIg
zI&c>!k$fd-dG%Oh`e|wze<ysZKo1oes0H`?M+C!A64l_-1m<|?il+Jpce}kr!;4K?
zpG5BX2#?c4SI=bw#OPYcPdaSZwDI2CNU;b@OHKRUqdu%N7sGYjmnA)0PybQNXOJ1G
ztK+M|%~WyrzU8py9ucV%2a&2J4Q~1XfIOH@cKu0>Y%*vqed2S(Tnv?tP`m9<&YcDj
zvd*5~sw`Rxxp1;B$uQ8YIx4G?EK>85eRD~a*6UhQsp@_){4jRAVOd_t-D02>=YAei
zD38eXxP+$HU_NE;Am*Esr8-RqK!~Qz{${(wHUQ~XANjcGEca%w%!ZP=us%;H&0cpe
zN6w_b+HAtMlD^c1R0umjcx?D*<+tPCpt$3LiPvWdd4<zF3Mk)`fny}(+B=OBd$az9
zF`NMq+g>t8F%#?i@wmWje*ETw-w|X4X2Z&hT8XDAkObg6Sd{)=(~=A*$fX`r>PTFN
z;<pP{*e5#xKecu<LaOc@zr(8ZRx$AP8^=_*Sj{(Y6;6lRSsy;!6YYN~u0aj+UA!qK
zoz>;1Ppt6&62e==@p8*C5XX0cR*3F4dd5_;Pifm9ELYRjpWfwQ)nQ=8(7Xkd<(t<(
z%T3qP1nPHAl^pj4Z1!A~eUZ=CbenhKWvH1sS`xxZ`#K%3!2THhCY+el%kW)&V^0Tj
z)V;R@dKsiLiZ+3!U^bn;jym24ndF5om#@16(NCMBo-MESN%`WxS1;*_;HpjBx0?ld
z0zRucyPwF?0i1=Pn6zk3P#GUnd$&dTky#pL(ZM%~^cqys3S;vtx<IKW7$+Z4K~YkE
zuv$>i4pSyR&4gMzIvrK0^-Qt<klJrsncwVy)Bf(oscBWHU8BDh46d6@;0k{WV!HXm
zjn|7*d;jMUB=W-?TG%~=ar5hD&<C~g9py$pi8H9;!4;p)7>kK7n@_w>($Wag=FG(1
z#Kl)K8BD5=N}EvFVa0aV#C0T4@%2%=hocH<Vc4@f0lqwGyI7u<4m=~FSo55}vK}?r
z_iOS=wllS2T8pNX3O@jLGUhIxrk94Cpo(a-t0o^C^GLC9<zY5-E>;P6J2e$YSj!Bw
zDe_mrp_QKi#&!9@vrVg2HAp}22*57*n*!aKxMAF1Le2q_OxG7rDWG3;Nv=EHPv<s{
zA7)aT=)NM9u9n9yOl~6o5fQFFn14i@HD5zF4VBo=68n#GVBsn9df*7icuBs?_o?_x
zSf1d9VppiirY3cyi6fitOnUiGAGPyI!iLND7hq98y#Fy+0qn2NBNlBWTYJ){Na)TC
zru*<Svor`A3*?*$h_tHU(>inKn<D1cpo>ZwPwT3geAtE`X>oX@jnEm`Uiemv%5?C^
z28ai$)+RRjeXGiOH*JQ|OSwosw$7jzhKw2MOZm_~ivJt+b6Np#Dc}Jwdv`VH-&4H!
zuyMCQQ@Fa=3j3}PTp@4e^gaJkBc|Q@yh{!`tx&<5xk(-HvJ|@;r&0cpApO!S{=z_x
zXAW7<zGJ*Vr$;}t>(<gWZ_p-HEy!daN;!$^b%f_N=GWUe49Ukn2ism1n4>jVq#9YV
zYrcC%f5oA0c*M4~h)c@bPes15(^iPnW|Z~q+MU`M>X4jrQ7rMEw@bsXe32|wf1I=#
zU}s_qU|}8@xw;uvo@bM4Xq6mrc#IYS7TBA&D?jcK{94hsQT*+ikDvkUPgVfRc$1Sb
zGr#lf_+|3-ARR!iRIG)@$k#VUW?2gvgjS0z=}joe14;F!n*}4Pm<;tEM{eH9>Zn{u
zRuVqjM&EExp^gxVvOO8@Cm~GNE*4#=&?kwRv+z;hpzm~DOvG9s+Cji>I7Dk~jFDFv
zOEJjgE_xRr+_XYmS;}a+2Yv1ERw~8>9-aGCQr47xr3(_1^Snkt#VdtlELpOTlWUdm
z&v{xLo#73_TsKfma%Hzs9XME+F|T1ZLsv39I895q-iM>+v+q%M{sOut6K{KK`Mjro
zXLWbfMj1<1%VT-dVxuSJH#pHgW8Ve;$dJVC%6XsT(P)z3I%!?a-L-iq+Ri;2JBhwp
z{jOFbwa3zb)LH#{?NqAgAO3(C5{M_AwZ!Ci!#VC#d=(q|s_+Zg6Z9UOJ{UemHg}sK
zN~!vavAKQ=feMwTAAf%(iY9)<H}3>rWpB4pPGumh0L}Hpsw@HHGdd7a^{^gV-7#ma
zeCZ`8Z|EmtIu$4bgDWS`0ei<Vgw&sa1|h^d5NIRt>)ZJEw^vnKIb~Q7wGho!<&1g~
z!U_?SVBLfumi-aamjavUqae}W*RW_e`d9ym05@g}tnf~4JaJ;h9_|hy3VOt7Bk_+2
zEj#X5f(PF93rX;ia62FTFc_c2Br%xrlv3i`AOU8c=FGgh8;y|rc5h5Y*H4n4Z>|1W
zPU!bGG1p?r&{xkIE=mV>(5xag16OJ$F;36a2w#aA|CK0T;ZoD(;r?IN^Pds*e?~Hn
z5>6`WcX1bXeO=KZx+a8ugyEynknAfOhK=Ju<$<G;U(O$|yTSj6?u%mb!j?w~pSaWi
z9fAjr9DlVmgkq1$wY!0KeIL=AzVW{=4EC5!gQ~xbFe`zqNaK|1zVoo?gpcEtiZqMr
z-Bh>kJK<AaRc%==D6kxox(nW-#cFi(X3z2(6bs;2vKRJ7dD85Q^(Zq~0^WelA{C`h
zBwY)=TU8@W=gf;vPsHN%Q5JcsfD#5VAFJWtyBVux=A^~7Z$KGT=DJrELHLLpRYjRG
zC~e%bh3_|{8`8V`p6+#j!WW-*%ZmQT@y{uEa`H|2LB}jzxpS=shObLf%INM_h!j^N
zJY@@aGd(orw!w|tj<g;V;H6m+#W`~`AoJunlJ~ay&_nf|fpfx^if@jjuBp7(1g8|f
z-3dpY^`NC$TOPp%Ib4|^T7;dAYWyQozVrCm2^LnZubX=0+(fr0QkvhtJxAQtTGI?o
zZAzRg>q#+_;3`I}pZ|7S#RU1fq#Cw)YHrhI76pO)#_b=e*j8V<-T0&B>}w-3-3N4y
zOe#}ZHk8tyWcGAf$>^E~MpY!{3JqwbK5@h6=@L@*sc!g{-|uUsZ*4fHLq$fCl9FN>
z6WYkuTuNQ!&Dz~|#kT`*z*?F+E3#~^gmy~v%H5+45e&H5F@9*&VA{svFmbL*ytH30
zVRYw`-t$y1UFI-=9&Me$z-V7r;dc3}F!PrSwsG$6hZLT)9m`!=Rtc+}`%k)lzh^Z2
zOQ|eJx*|8^VbYf_RqvVCca&R<Nt<qVfA+_^>|TTDbX>ftkD}mA0C)2ZxY{eE?WbPe
zv<~+ZuMo*n^F;;9UDM&&taw%WYokcE1Fe0_*GXfRqp^!PC|i}FP6vHR<r4J`yL_kK
z(J?PAiXc!Km-EaD<PWI#e?wh1g*=w8Z|}Tf&}oja*a>Vj(RD?-wF?q@A(c;m14mk)
zt{1CXG)hdeqps{+YMtLe7S$!<7>U7${>t5GQFk*(trC9L0=saZOUPfJ@61a-DBm#V
z8Mtc7c<H6V8uV6LPK>i$1~=D@=K^91vNTfZedvdsaG{60VH(S0Lt0t|jmuJ+w@M<R
z-84B|jN9LXYq0Yka1~p8bJw4_688`2dC4iZ>om*VYcXdHFpasrDjsX8HrapLJjtXt
z4q2NdPZiUfF8-T0$uI3gnNk!dPWJk!K;|EjV0V{sb>>%8l^XC>i9oBA@pX5X*qDL5
zOztqLvt304D}`K^2cOK5<T<pw8JQmPyxn$n6aHI~ayF6p+$Z>H!lhA2Yq_rI=Va?C
zMr)HA?Sh~zh3%U;nS!Qpt@ZjM`foH`#$BfEOX9wpT*EiWF6%F8#o(Frbm;KdEtezh
zYZcq?afMxfyp4*V<>GEeeWvVbR~4%Wk1vfi)$$sni09^G;(*5XUDO%BcPb+sIW(ZA
zYZ!>pzhnQWXmj-B@iC+Cd~UZ<62HeoL1{LliiYk@G)sUr<#Sicn6dQ4S($6^+<M(v
zgZLl+h<+y97pc1H1p1GvO^1yoAdb0|2o^UVcRi=hU6-*sWLrZy5??1BP6g~sA5WZ=
z7J8QDY~AB00qK-t?%2WdYd*7tuJf0Px<#0_=OTpIs>#|4^uGD(kz_(R=xQwkgWs;v
z4x7#E#|9Kl1@k(39A<!f3_7T3xraRUJFL%E1iSb!-lZAdr#h!XU4NXrVd&DKBX2wq
zRIzC_&g3YoS;~Z}1;tav4^Q2J1x*HhOg6P2o0In~#|Gse>b_xq#6B?<U!O=$7==l!
z4ctNVe$Uai)hdlX@DG`5>3@lIPYC4^j}wV$SU(w)IQr=cA7VM<o+^A?Ezk{J(YZ_;
zb6?$gmI+m#s?g#4-l30tw+*93QPvR~%XP?xS5)$|T;x&jD^X73dzTeCgH4aP3JO7*
zFZLX)>5sb)9Ie9`s8bP|x?1bBQ=UV@P2uYBFZxLZICx6wW@i13*_RBeA%mvn1N^6s
zNiwQ3>(@?myGQ*ymP~P7@V+mps)6xqhMS1e@^=&F3~mfeP3@-N$577RbDLb+t#rYA
zak@gYfLvnAw5UzXSXbzhFzJCFsty~_wop6leMF1b>;5?<+t9BefSyEppgrKl62Y%t
z?QZ|7-C$*Os<tmp4e5U>wYZIx88pzC8+A98rPJ`Uo1C1i6SBONOHb9VKAfL?Axobf
zgF4QHAqNE}#W(8ppPGAwtUE1Y8s(h@U6)38Sv9CbUh?x9zVSWChAijri89!uq3%QN
zHBjn4s!46Ij(7tUhputk_|B>JF7-c*qt;VtIt{%E2Bx$oYYSo3D7tL}#vIeZZ8zk(
zVAK0}@=uYG_0{L{V43TADH|$TYQKJWS-?d5etk5i?&-o7w6M%<{ux)<lOj=EzeAE4
z7=0w`v0RrRHa3^PMtiJ29VqJ2=Q2^MnSl{!C^8-(p3f@CZ+cbu3D`N**I|YcMW2iI
zf9j68(z<6?1N$+e@iTE9x9PcloLktXduQ80mkol;LpijQMeg>~5*3*QrKwT|b-kS=
zGC)y$W49PA)LfklyL501BmJrYhOy^^y*M4Vgb@_sa5VK!lf94=yi4JaRdj|;I`ma+
zPdia}mwl4-3Xid83g;oUxCVj(D9U8B-1g+vgnz7~BKsBd8cFtUIOu!CcEX!_UQXzY
z-`5B2u!m9~g7@mAcY+UDG9c}%voA=of>bafifV_XT0hSxS_r7<?d>@kUBK=rWC-5;
zL9cr6N95rSg8IE#?c)yg9ksy0LCeFW_p2Pk{?G2b@lWP?X7hbKTPR#0pZl#9kxbxj
zGMAD?w4^*uvLz(`PBJGU=6NvNNf4|8v=C{qAfZP)?-Bmw@gK|@INdr{Uvx1?Yn+er
z>(O*4GbhQW&NFuIaavc?zPOY#V}j8WD!$b!gmra-z5OMr*=qs{_Ki1ig=diNHm@K7
z;DUTaSmYL5;UpZgG{<sSKbq7CKj*`B+tFWmX7$JfD@9$&7EP4-yed-)kzB-IKeJ17
z824lNspHQey{b-!C9QuGO*j<IQ8J{d3}-)3Y<>9D=SJ|$#6P0#+#9l=D@w*<SgD|Y
zMA29JFanC*`?6yl8ES}|D@~*@sF>PpYfFY)uBYjH`p#4S$Dknq-3XBu6Q_Ld?~6s<
z;rqKN@5CZGXUYYok3MYUPc!C&h1wxLiC>QrM{02LHAP|bEEG{2Y6*zbQBfdi0b%Rz
zeSi(yE1CQ>#A}6R!0LfK7l$O_giX-lN<u()NpKui6kC-|`kGZg5uKNgyMmwuXTz5I
z#vJw8*X2{rvhT2P0A@(8c;zGV3H>)uJApEaoo;+=@9M;CXyQ7$ppw{=WD$DcP(%}&
zl&K{?dq}`9kr#cp2kj6*0R25}@l7Ah^am3#-V--gvY_-2-wYEV;iYxF5S=vqsE5}#
z8RW*+RO-<T`#>#hr{9*ul1mueNL5f)j!_<Py(xyp>j&@#0<#MOhY)N3h~!50-CRcb
z+PqoIg!tmq{koa#A}U8L?S*Oh#=wXvwsVhnvThuj3VF#a8huBA4?ls~<{Cj6y2xoR
z(NUC0pFT8wg@%>QQwCWgC}<Ha@A9n@8_J-n3Uw6G@C39O2iX4Qz`~8qV<r1BQi+Cu
z?~@@ed2)P<D_nn(JwM=`Vg|Yae(3e*;c@LM0YVw=IDF)}^|`Ebpjm~c7?-Q=+t6xf
zY)?v2%NL&qO-0Ksm13zBcHA1`?J>C6Z({JA2IFL-y1du5$&9x5v!@I;*11F7!(J@4
zw1ri-_R~w{&P&*08C2GAIdJYxQQf}|t!n7|-@nBJGNl7&<uU@g@{Ss)jydZG)BBIP
z`6=<lKkK{Z6K7NGcI0lFnW?9@e7L9O>o-BM_2mtTn?srzqhq?;{?>CDF0qx3<-8ek
zadEyq68t=!FPN&@&zPN^?^HBl1jJ0s$~RBl9*y&!3oKWi0Okg>?L_UQ4q8(5uRlNr
ztdL97^~UranI3N~V&ED?v{`}{4DBA?dnD}z#`WtRcHK{I<~1$ZU1J42q%Anuv!gUm
zDls!U2JtgM!_ej?954UXA!TZ}GtbSAnI2NJQ{=R@{kY#n*kGcX1fv~2nqprp#`;k1
zRMKbd$X<ijGG(}g{&PP;5i1+1U@Rc-5ToFz7iSR#(tvWAsjAc%Itt3#o_cDCy1Yz5
zjpfL>lTR+N6D~YsuE<fwccv5ADGHD$Q>q47tpVsF-BO>}F7%1=jmM_wjb(*c87T9s
z#&cTd6w~V8jk>$z47`~W2H*{<TL*)g5SpOYGQ;{M9T_Lh2-5ld>O%w5m%9X}0sQN1
z>B8u)#M<xzTKZH2ZYeemP&gOGNsB^D-<%pKlj00*X|z__P5E#O#wek+DMT5+dl5=R
zsv}uvITmgP)Pq|#8&%b>R!JJ}a!q31%aH2W;1z&!iv=J{t>+|9CQBP|G-=L7G<`eX
z5~Z<dQyIn}4|qB=r<o~>7XP?TKiJ6rNx=@7`PuVikM2{Z#PBh4VI@zQx(I`|qk-1u
z^c^J7HzJ#htAy{b|1RNWQZ1tHM@wzTS6GEwu?lGsbS@OhYjWBU(zDl9XwnU$UPHKC
zm$El3##&t@?q)7!l<AZiNCjZ<S>uzO(=eD^>1c@)u8qoR=dkSs-YGB@*Y6xq)$X)#
zEMJ43ESYBdAa9$!@4q-CIhtcwU@V^U!i>F{%7UReo)OvZRP~l{DY<C@B%d*c#;O7g
zC*^3DMhOrFAsPIU7SecBB)7CK0p&*H%@Nke6z4HF_$5wop%lvtb})1^1q$-J0x{)7
zV*P0Oa)zr0<)RWht%l4n!kD9>lMox7OWA=Fu4Z>XK1uytTtDP6^yYI~&b&!zv4t~O
zwr?`2lVANwk@3`{<k%q+`9!+pWJ>p_>fD3ik%0<)tB4+nGI~zsWeL?*yn-W1-*o4v
z6e~s^D$~tm7Lh+cbLX|<_f<}CcAt~D`f=99!X7>R(y<~*t(x-61s@+aCF?AaWcQeg
zV~nKRQRhYJf9d<G(kC6cQ@`}_=v3X&F@C5d<lW>sZ)Xa2-&jgh$+m2w>a0ufHbg{g
zIaBh-%N1^T37Nh~tqY_LEP<NuHt3Y(jv*61-_GoOem>Gy*!PT^9bTjjQ2}5Gc8zM6
z3<a4q13%ksj?}gW4V)?nz&=Da3HC7Mm$T2*Jc*Kkf67d&332KZ0P4Hi`jBW=#t-+O
z9Vh!u=qHnC>T2i5wOxA}V$_Yy)LsjxWhMQUO{YE3GreLIpQe3Qw%I<gYH5!W;^KGC
zyzq1GRbcLXWH?m?+`^sz*&#*i<CzVN0y{qO==ts>f<%zsRDu3ORywFwQhw{(m)Xn7
zbF&B~>Ec<`I6kf@!mxs;H0MR`sK>fQ^0Kp|xlkFvuIrj0h|3+ibW%qgYg+zohctAu
zP`hCSdR>(Twu0=E^V{UT=QY<#4i2JBpM2V(`e^6HvbvqFjjz^z@LX)+^ybW?N>YOt
z5mQ$Bw|(MjKegNud?xQ=cJI6YTtA|;fbf!MnhwCxO>YYSoRw1wKOdMv>&~Qf=_)jQ
zYM&Sss+MTr;Jo?+7Eb03+<%X!#*)_>vYcEv_<X3~c3VW#DZdk|4QY9{p?{C8c4jbj
zXizkSKzU8?f<*hTXXn-{XFT2whms`!6oWe7JMUm3@)AW$SP~{;7r!{V?RbWPegX|L
zEJ^K$QPGS#BYpnV*=?_r`^otdJU5Jcvww=dOz^}HaTm|0Wl~Z4_B8N^kG`>?n7z$~
zdg+e+qn<0&tNy{2<ITd}o7(oTAAEc3m9f;y8RVnyk!OSNdy@Wui8r`+(uN^j^Ln?V
z9%&cZA1L?h@|gOK)c>${mte{K$yop90}>X>@Zj0u2feyG#t!`-wBzsnQbsTyaJnC`
zd*)NPj%>jBQ$M5*s)w}500}ErXhPH^-hWQn05F7P6mCTAcDW34FQ4_Cv0tt#e;}t-
zsewL__>WZoSeS6Qqmr>l)V6V&Jgk>dx~n<!M43Ru<qm|rc=!X1V0-mD^ifKAD5VG4
z#@e`A$<faS@rI3;l=$xvTx!r0zS8m+R_vp}^~9q|War-oY4^Eo=9zBa!x9rRrvslG
zr7sg$<JNbQ<4UCwVp2i4=qs&d`K3>VT))<zc@Y98;>|?Pj_UJu8<2g{e@7q$lf*oW
zyd`;!8G#K+x|^9;!hc<w=%}^Vb?tmzkXste>%}!Yd1jTUb?~BFB!CP5Qptrg=MbXH
z@R<8I1oQ~w@RjiKK8Pg$O9&M&9Ja@2CDv(B8p(%k);INX>6^;FE<c{Tr#ojP&V+5t
zaawh&-w2Iw_M$yiI^FZqk9v>4thRDmbqF-~9_L#iu0t$MHI!5IIt`iI*LPPoMGYM3
zbp)(*SOBkq+_C<rqvo&!rPAz1GsS!#Zt-xI%03vjz=(G9gfh-}(Dx;7mZ!qhJ}8IB
zkk;TRK;^mCrR`+#w!W^DOe{#`)ZAaJILk`4@97Vp2SmeuU4$v|7T?btE&<&yaW#-u
zK3b2Xv6QT3fk`5*^pUBvsywA)`$pWa=_PZT9OLL*lOXFhBby~^RcE+U3R=^ueE-5D
z!&#>6wW#NvsbSaGPU}~@@#a!^hkn3A%wJEd;}i6XkVT#r@zmFV$SO;+84Yiel8AV`
zfguW6^vTio)ETlUmfJ`HnrII0a5O&KdSp-;QSJqoE=so?p?&@hZ=a(2kLWK#Anmlt
zfu~KZOp#C(f=m^rR;VSeE^o@Yo?rbCs30%6T>=f7!CrO`KUwH2y1FsNrBQtUrWgfF
z@IM(Do(eNTxuzIX^WJQV#~iGbHa1msNv2!}Zm(bD_^RCS>LUociNJmT^i9yTP4BVg
zJFY*+QWqIq%^xMlK*lJ$Eab5CvHg>%FDzv(?Y<*;tW7eq`>*OCYvK6N!Ra%bG}R6h
z#Dj{h7X=50SzU75$oPr2x&Zf9&g#p(-%marz_6;$=|g7f4(&OoHyLSXYn2QsSw0gP
zHM;NWtCwUCaWolvyl8_tO>L-sWv%+bjHb+WY0<|I#K>)t=EaK@_eN1HE8oPAx*Xc}
z+&K><6zm-)F)8PSPl`A>Iy|bJGAf5DC$>~=M2n~G`1U4D@)b^VJ%15zG~9WL>vHD7
zeWUZU6*^7Uxv>)}{8U)IB$QhSme0I&1J~qLo33gUoovWn9RvovYS_(+({GHd2CEKv
zL;%%DyR+n6#Zzka417b<uhni6{t@lp|J_Zt|KrN^>|-?|MGCK_0Bik}SY9Ih#AB!6
z&|>E)BiYBU`Ih1XG9$Hy&vkP2xw00JWOwc&n6(ly{uuJD$#VKm`_bsZN};p4!V41p
zNX@MXLAVKH`@CmiCRMPUnhGRb^L*zcZSE^LQhXBMh2-{x`P{tg`~Bm<`3aCIP^Q0G
z!Hd9UHmBPgiWAAxk$#l;8>BWIp2({a@&~Lj!Sb|{X_K=<aW|9!W9rhu!a2QK22#;<
z1@+F~%%^3$e3J(35~nmy9R<WAun+p4nespV)v{rtN2xW{5x9aLko(f+8l(3Ku;W8}
z?fI^4-Y^eUPX$knArm=4?pV;f<jkY4?(4wJH{+OuS8JI6;0IvbymXCvQxT>3VI3OI
z)}C*Rg(4!=?ErZBNnqIDz&{mn)k&y)<aRZJpK^*@;5!-i*GNm@xtXmFhTt0LulAf)
zl<GTXo$Gyw<mOof=xS>Ky<~JM!v9N*l7WT$eKB6aVwx*Ut(P7VIciM&O7~QeU^+|+
z)C~Nq`k?<Ac>FTCri@M{tUI#28@gZ;HH2SP*P5h9`|YZBju)Hs^K{GeP4ZK2X}Fj>
zjqJNpdc#uXW9HJ0DW_SB7G3H`rXVx7<aqV3Qdvta<m-hKdZ-m@Oj6AYeiY||CM<zr
z@?rRWv&Q@xhKcsk(CwxlL1^QWlx;bwrAO-*(LcS#D}up)Gs@Lx0Y~Ci`&HGuIsb^D
zS$Y$mAhUZ@1o!YdqS7<%9=LFrmx__wLrLp&WJr{yZecZyYN=BmblSO^KZ7RN?h1?l
zVE$ZpWn?bA?AlBwql{Kf%OEv`=3%7@ml~;3rA+>O@O;ko()dl!Xp^B!NIGFTLf)_X
zONqAoV)|1}5s@+P$!at)Z@J3E>bE7#lNNj`y=Fj(;8ABCsJ5N8eB|32>on^6ayQG!
zkpD<;@*!f%FK#>(I?$#|dV4K%qd@OAIokcIm3JanI_*A43X~k2F3aB(;V#JY@|AGm
z#Q3bV4ijq5*^J8BuzGqoqvLf)k*Z8sc=2efXCPWtNkIUWF9Ur|xN>)UsKW5kTuhH|
zTJWpG@v9gep{D7>?d7z01%)u}A6IDAj$0^EV5(vqT=gfdNEP<-jb#<m?{5dqEqtbH
z99qQOX2Jc37|xma^4U#Bb+SWRfDnMsZ6$Cx%%5XYnVY*4wj$IO->ffLQKl@6EIUCH
zoV4w>J%3&W$5k4j&|1ll6OiUPUl)IOLz(<kc7Bt8eATQ}M%a82&WtlXFwtI0Eysp1
zH^yu1`<gxP<o;q6`U>#y`Nwhz#>LgH^BQ!AWaSIhpK8XJ^ag0raEn4!M!@w!|HiIf
zcn^m5^Pa4gXCI?Zku4=;;i4I9M_cb^HUC59LCjB4IF*N^VbxlS+H3HuQ!EQ)!6<)}
zJWMZ^N(m+h{5x7E4ZUKsCg>8&Q_a@~X62sGUrkQ=$(}p%;v_8#$ifLddW+F7>L1}k
zm2A)mS%v+KiN_0meMt`L^O;{bK%SxOB*A+Sl5kEdr*DJQxcm7Ofg>@p8MlpgKSEqj
zhqm9oy!^FUPt+xOmo)nG&s9m;RR)otHV5jd&wgD!5Fu={by0Z(8c%v4dJ0(%F!XH-
zJL2R!S(rq(26FyX<`N8|)l^FAC9hl#6fJFrxYvB^v!9s5D->C@jWJ<D_JN65igX2l
zCUSFEb*N;xJp(Cadz$1ncANPy+}@_SX^pUT2GDw6i{q5-dX&RpfTD$nj)9+$elV*i
z5O99}QOS;&mjQMMMYA~)w$XE~-V(_F439<OLgAc`7|8E3&}2OnY<QC|qvBeb%w_qf
zdT^eymaxB0tBFP<>KSG3y+_l8t=<ZUP@wLTpNBn~AZQJEc6kOxYT3bq2{{x|n^&)1
zCoEY#`uSWgAd_qJO7@#~2=lL$?gQ<UfRNY^EVEbzN)-C>;!fI!$eHQL49cNFPlBXq
z@WU%ou7Q1}=p%U<^4523>$xz4O)iQwsi5&_!X9??);2mKh++Box&QdDX;!0-WoP!U
z41({72;UixU}<9`ojLn>-9kX3CHK3trmPpL{n!X$z~I~35}FCbuWntu9rG~zxY>j6
zrSDnbM;Mh)wme@y9ENkH8<b5(9cJrReDZ?(4jzuLw2A$;KN_RDh!&>o4~>?^<3CQ1
z>0gv!xFT+3YeWEQ_-tH_jEq;nwX@e$`qrnALY$2jze2smDC)nH<Nrp4T$gt>STw9l
zW4!g6CzbN;LzVt|V8&Hn+KR(M2)B#g1UZ5N5MKBB<tw_>GTGLn(Pdus1FO}4L}z-3
z;X0;m36cVCUB)hw%hz2m()E@8atNc!JFv%)qbP$L<jU+ccO<9vGyk58db0xyyx@x<
zwePB2QwVDJq4c9sQIm^})&t|_0<C34Fk=VP<PG5m>0p2b^5q+)S1+}+<Nx>)5cMFf
znN(Sk?!4r@O~Gblm&Y^`Y`2N3p_qXioxrjU0;#J=c%mg>p68cq7gdsad4Q$p5dQQE
zwNF)EamVRLi_HIs4166|0)koexEfKfVlUP%%8i_6H*JwKu_3(0;gcVfzn*wrTYGuy
z9WxnDCo4tUG2FqqUD|Y-d;bWjb`uTH*cbU|Lv*kC{+7=0KO*9vyVBvVd9RyG0kIfb
zMbB%q$^xbGl?bhwt@|Zq0M!=@XFK7?7Ud%cDT|yTp~m@Cx#^$I#@r5j?F4k>Xh?vT
zI{Jyl!s0R(igQ;D{EazGmETkAIhp?MG#~Wvq|S^iu>pM~;Qa-ZTAG&*`AEZlnvw<!
z<>}ih)mlDw{mv!6s5jHal-`4rU&BIm8u*PW(5Q3GA$#|Hjjm0<p`YN-{wVF=?bX8W
zQ`t3XdLDX(F)5AkB0AWx+draQucBtgxSokWrG>n&RB~m0kMQz+@?$PZF&YAEa`6n=
zgZNM2zeTXT+TKI@!FsFZgu%6sW~62=eZQrgIHiLqvxAF{r(FivLEgNSZhU%F4$#yf
zzC8Woo2B}YiV8NavR{8`v^&Lls8IMrcd1s8LoSxvdTt`6V!Cy-v+L8EJ>eaD#UX|E
z(=D*#&6yfm4FKCE%9Dr&SaaZu4=wDa<Ec*}3r*jx(ta+4SO>ku{o*XB5WowW=A~`j
zZR1_^HlVNm9)lBXf^?uXaK6?i%Zh@P-e0D-hRaw}lj6AV^dvG~zIC3NxFhc(b?OV<
z&uA=Yu&{F>RQwCGUBpsNpS>G0GnpUNK-;{Jh%#qUo?AY2k<j>#^$CjxG~3cNtn6eO
z<@cgzi>6imo^TG{muP|xH$`}dcjbVENl{DZlP(z9J)>s7dY-F5NOo<8NLT1j7C;<4
zisuG;n5m=1B-zpIu<2Z;RTivyXJ9AeSDQ4tulUmzy_!lv5f-0Fw$xLNgSP%8GshB`
znv+{m4z-WVZy@So@uP_)5CZn<(HNeE^DsUMj~OU30+cHeOxmYS$wt@uEY=c~ZdFH{
ztCHde6KZI2%_d)&hWPF@x5hGm2Cy6FMKHhCR6*&>*dd+Qs*{awPd2B9V!g9&GU7%b
z<rfdb*GQ-~!QPSXp;@-m<$L^|EmfI-Vo7)`4Y;%Y5J#tBpY>hrN#7(S_xfil1}Z31
zD+Z^B2``Z0Lsw`QCip5)-Dm@DCMo5oi>QQmJu;)OBvAVO4yFA}`8yY0SZ-I6i0ZOV
zM&&~9$)M?ib>`E29YT5+3(r8(_ZJ9fygPs_XMJtqwgymvH1wx}yIS_g2@kH&FZ-zm
zLt`#i-aPCBfmxMgm&y&F2WBLdVR`?E9-Lg*(}@_j%X)_EnalT<mBxz{xoxi(Q-e&n
z3WGI$0KSt}?RAX`xu;ofXrqmd1@{k-mk9FVZk&KFw@J#7oh@Pq*Nqb~DzJ7K<U?34
zn>_ndzR0B)UoKvh%x<Q2@R2UV_a6~aZRv*UbB)qd9C53FRAy>Xee|5d1Zuv*q-xvy
zLwp}{$oq$hEWw_-k-xgvyEafpOI`fFXzpYv=PuG;PL@!cf@!Z%feXEce$B|(7B<=^
znRzRe<P~em;5(AtD!E>+i5Kgs@)z%W7H$o8&9%i9ZgXI_kbZNhR2ob3)R7l+{=;eB
z&ZPq{6;a~ZcIC21&#(Rzrb99d5vZ6V{2p0Wo;DS%!3b^xkuI(0x5<85)Td8FTiee*
zLXB)D#ES#wbUf4P^hNKB%67vKj2gssV&1nCvZnez)n#p|A%QOJX@g9Y!}9V`6xsMI
zzW~b$7?hvWFm>Xs`bEK$6jaB%);D#128_vF_|Db89EMS1M2)_d?Q1T3ZXydkpp3iu
zYDRyn&x>3iBv(c5@SC;Wv92b^0dr&w@>hsu?qK;ACE{Fp)a&lfFYy>oel1>l$b$7J
ztB}IDi~^Q4RfUUrm9&v1A2h5q@l-B)%Zl?sVuedOtiHS5ZOi)&Mmt}Q!G1pL%v!{$
zY7bhXJ{pk@+N-H#1k1U=3+Nm=O~LxK4cdi`B+gFlmIqsE>eycS`vtJ+eWbsv;bHz4
zD)EHkqPa?Q7_W7Np&fuFxcTBuL7|m)am>ik_TL?>)Sl~9)BO;(OSbS$S$=<`afNfG
zjA4;67t8Zr8iz+Pcl)+^Zb+5?$gc_EV9#wn1qbV|5y`tzL(v45ad3mEJiECdQ;(Ex
z!$%c5EE$Qy?};q#KeP93p$r5D(5kvcW9==UPa7KVzLR)~45o9^L5FR1e!T}Q-Hpbj
zlf`FA%u&r|%K$niKAK69Vky3wI%pAkM7-(AWXU`)PC8f^cr3+b<_WAgzU8AeY!I_)
zJ*C`;80koo@SC|p5RZ0KWGryq?^fG=_6DWjNjp@3pn7t5-oT)ggfSThF6dG1;4PSB
zgL1O3&PU#-cL*?KA`^U4lXB9X;}JwP{GM3QvXca<^zZswSJI9L_WAdQ2BmF)&Vzhu
z?W^p6%$RoGlQB@;+lFu;a<Vuy<1?_-3ic`>b1sM~*=hz8&o-g{{hxb@U|A9*zynta
zlEySzWq+Z^!uD^{bme3Oi8|pAa&SS>r-b&?c6qQ{;#J^!$>b5!Nc06I7g59u@;lF6
z(xPD~QI)4=1oNWRx#{aABDy3Jp=cil%0Fi{i5o%EeLh+}+0S*Y#26J#z6XU`FAGf<
zvp(7j$n7YW@^2Ue)IcNI2f^Gh9*Cq+Jc15`_mRhNOonss!);gblt(vAd!N3Q>?@Z1
zBccv}zLbY4%E)L~Qi)a}QefE9;8w4s?k9WbOAlq_D&zAWH9%0~2*s*xz2(gt-bo?6
za`M{Bf5oawRg%@sAiwWEN{JfF3XYjp9c%N<G+>KmY~X(-`2Xw@|7)2OD2xADpVdxy
zi*O|s!LcytO<c->2ePOjfPqp7?pFVgC{1zGtulgoI5<|Vc&bEu@nypKC)s2-A^NCB
zHA8*$JAcB6CckW3npEY2a+;+&F9;l*`m-YR;`SPU3fy^W?@#q~Ww?3%@BBu_Od0db
z8Lt&UJy~fzi^8#@Z9LlQE9AO{C%=XU>SC3n$Igvh4Qb{5YtM((X8))&-D{XQ=_f53
z=F!2M?36O^eUY1$c691R4icEx@HEcJh`X11kA~^*>biT9Y7^CfmPTW@C|UOM^}O{h
zQn(HP{@|OyAT{(`T-$b2^JS#gA5Bih9QOSz|G8&t$<;mqE(!XTqi?GkVC0h$F60HA
zTD@Ocx?0nwG0IH-WZyM}jp$nbWLJf@7PlU0$Ivm-?-=jzB)8hSmp-w7Ry{OJ)5*o}
z)-_|yY*;%ab6r8~6FJ6p_pcrEECR%jgMzZ?`x)2hIz7y^(pF=32FG<G(BEB}|C(%l
z<ZYl>WMlxoCYh@G`^>?8n#N&P#fOWV9VM`t?k*m-lKMfv^2GE#o~N$BV}EWqZq99*
ziag9s!jHTs0oRI=x*k<u@Uv|y@>|X$uto{eVqXn|uiq205S*C7#;2;@aY)e8fs9V+
zqD%g{mG4MXUtm1iK*v5t<_+HmUe9=f5rR*nP13s*&-QV14D5}$y!N;j)tlz9%FZ>H
z+*^F<BrUyL>*%uq(p-&Kdg-=kca_7n=UzHoCea^NpE%id;!Sugg6k8*@EY85(p&*p
zjU2W~Zo)mlbs~}Z$mBXtW7XdJ+;2tx1`f^-3G?v-CcgSYGHjxQIaP|2x_%&4#X?ua
zjFD=yB+n~Q9y=ZVec`bHOc5;vKhRjZr<4YENH4itUirk%NL37vo@7My&DyaF=sqyy
zVr|+p4Dg86w0uw(Eoj|k`#KKzuDG^dJK!y^{3I=}bf#5a@q&&GChTwg0SCY2N8MOf
znA}aa<3#F%zKz7okOTx=P0Lr|(+ym~P^pNn2&hO`BF@FYvUnn?^o4~XB#(LX{h8LC
zqUmB)^H7W4BdY$9Zk_2tuDx9yZU(B*gN6qe)a4TyFF{dC3I^38*hNo=cZ~}U%oc7Q
z^uV{rLmEpO!^wpe^=;n<-`B@I=U^KKw@7nq^EbuMcP~giw>CVp^JzvomuXP9+0kp2
zVl<$71$eV@t^4LxPGh+RJd18L3*3Kpz9qP_y^m%rAW;WIZqEpNqB}Z8_q-FG5zS%o
zKMjoyq;SAuA?I5ysJ)Xu5l#3aV@Mf*nD1|tSxva#$Ctpu>hHeyc7yjScs)5&I|X@`
zcb4{DH8z+H1AEkakcG;RLq}`d&1vXQtLXT=kJ3f|4_W6O&GzGee{E^2rLDb+*4}Eb
zqG|@A_TDpy)ZV;{QoCZuYGdyaVpCLzS$ppp8k^ev<^B2ozTe;XoZlbL;T#-)yqw%T
z@4b(!Ykm{dR-*;7%+KXcuz8*RkFJltX<JOE9ALhitGPfCaFh4=&Ybv9+hBK~w|?q-
zv3!!^nM;1r0>_>G-@P9w1$Y;!oYCux-%A4%t^L2LNb4U?k$~;HmYpx}2%o3j&nMwc
z^LsumJlB!vbDhG8js+xsUnnn`A`Yycyhoi@vh2J~!O)evZx>USCy1oo-b-D2%4xJj
z4><jXF;ZO0>2(?^K}$)NAnN=R^jVnztJrKkfavh(x?P`_s#ci%ojKS#llmz7@>$ku
zDL1(@_{RFvv15nspXM)DnS-2?PAWQpUrqq?Y-R+N{}4xEd?NB9&(Y2dIIwIxGcZB{
z4=6O$7K|HAU_U?_4~6x7_{xr#4CO|&&xlWHOBSf%KX`5Zds-Sn76YvuY(^x1f2^uS
z!)-DhxVf*kU~fQ5_~t7m)zub8%-(6LYNcI7CB?%aL)$)Sqw&{|uf6)Y$ZMZgSpJS3
zcNdqjr$}iUtmO@$EgAAhbSx>LRit<Ne28VLHzv9`iei4!ZGpH}<JhxQk0cIdT*!8P
z(9Y>@`0lt;lIFKyiUFSZm@{sZv(_nw>#KMdR|*`Z9;qQW5r4Dz+a6gWln2@=IX&wi
zQhaiWHnSZI)EiYWQoij~Gg+eD8P3cT;ry=q+s|EFPu@v_#?&?R`WdI=7}`uAHji~X
z!3U+uG*10bpVNVRP0C$&FE!U`jbVX^VPLeTJg1=d8U4V5`x++;w)IotBBu8C+B)Un
z{Wz~B5huVUNI(^M_q0XbRj<axrw3<6=hY;r`-4^c+s^%lJdUG_ztPua1vz6<ho*l=
zPqLK*cNcRD3H4yH&X0Fg1?WrT5~!u@OBSnRz0?eiy#+y&^ze)9`nd8+3t+8&b&n#5
zmR(!4T>p=$o>*A9*{N3HROMw}pm|BQjn}t5#WeX6YUZ89n%^7y#nO<!czLqjfAQ>t
zpW<en{XAxG247~@k#|>nWcb;@Y1<Y`m_qIacp&l18HR0C0Cpj!`h=~}<!5%a_N{Le
z`VVJaaw{Dah`mw2VFdFIf_+(KNQG8Cd&ALWAqgfbCRTpjk5)gUlw&)zeKduTWHRY|
zVFn2sR>^Ts{9v}F7&f$WXqzFmXf>LzdcC4P6Znab@j<O?<Hz?;K2ir}JaBnxJAk6I
zk8(P8p~YuV8x&DVB*i38%|?_LuXFS*z8SU68n6duSZBaGNuC-;ziSBdWOFL++z8`_
zKba3Cp<N2FlnWC?>vkVMrpHZ$fj<d9kB!Y(zX$(l9lJs#GmFQ^^wrW)rc#|_u}a7`
z<)NCy3*&B<=M8}>Z*bF6Er*tODk=vp{}|4HPPspIkwTUjNXMiyV@zfg#$<3`R6X>z
z8ZI(TZS<^+yB#Db%HlF$>%!NUf~(nyDm*n5WOX`Lplq@14~xWYS>NX_7|S&_wxSEX
z7*gWXNMzJpy(267bruY|9Dx+=6x;nyP~6rgdJOIZy1tXH#UHY#ePMPyW9dbI{6ymH
zjvd#rJ9E&q+#L2evw7Z_kai>+<<&e*?f)7(>>H$6l6+VlkwCjFv1RS@n6jEYst;g-
zUg@(uRQizs>Aa(xyofZ=867`a98oSfJ<c(IAsQ5)AYX8JI64-`5YTk`8q+(j`%mky
z`@VhgLFQGOdGuyRiP9&6S(cUW|8)hm+GL;8;|^qpSGjr<#0-RI>=UV#`57{o8_8MO
zwd{ib3MA5o|J}{|Z^;y5#b5N$?W;u&jzf4CyKdpuf1>CJ{h3GxAzr7<t9Wo6z1uYl
zOk_8h8QjCqakK5xit2hYMt_$fmPHO<*&QlW1k;wf9L#48x$AQgw}_||4;1<%4-3I{
zX}|3_m6TYj6Es!+X<*ntwTs+06ybNSp1?tnGiXT;Lbdl;pkWPR@`Y$>bh52~@`8{c
zbSR~X-Hw~E=aRr;>sio=TXebXzW6%A$%`eq?!_wB4`zJ$)QTaw4B*6=g6$9&&7J?J
z-j5~Lz6M`I<1gMk4J#3>vA~VszE-kH{N1Wwr2t6hnV=>UI#C^5*0Fbxp<NIk$HzCP
zKrdb+g7%W?KT!p?lnO`zWCkYh?*sL;Kll!o>4knwoeeYHWO}V=!grb0Vt>Qpn3z1A
ztHu0Eaojs38=(aZXcihsXyT^vVjQB5YH|=eMYDs2__$M&sIPJ}5xfV>ob4d3f}`2|
zz=50YnZr%OMqH}RcRc?m`|)ElH61-1lUX)Zyj@{z<B_Irmq4}2ya!}HunBER=C+uw
z_liB63um`j?=iFceIFH0O+TqI!Oatv4CN!8xRbS2Fwes;zWhv$lWZ_(T5EA|ywcA8
zzJ4ns?0NYVuKD5YB?06hGhwWO7NYbQPae8R&wN<@iOmh#w^|t#hJl%fg0&j2-T)=u
zzLuotIC%E;A*M)5dHi&}&htYgd+1M-ENWI-ovgX&m}1!aEq4_GO3*H#8Zn7_+YC?R
zc8Zh9Im%?pj{bA!3X(}LsdtJsv!Ks2*?^B3)B@M~b23!VQ-XXC&Ih^uSv7g01j3Zn
zRh{An7lM@}ZsWtPxWbFSnoTnd+bx(J0vZ>yZUsui>kVoGww30583MhkuCwny%#TZ!
zDe@Y2)pQxm?0KYdrYg&>X$8*XcQ=^j-TwmUR|HcEHz1uKHkG)vkxIHS^YR`X+@6R*
zSb1&4+30d%zeh_=a=bkN;_H8?SPXOqlf`Ej|CB>rRa@Az@d2u0<s8+24(+Ysi}#m2
zU9)>)*Ui6kVXBI#SGlRmYST0e8Q@i}A*S?5LPOd(^9a#nI;+p2A#7tL#*<8@1q1QU
zY}j$mO6@u=YCw}53zbzzyZM~A+pRbzkw1cSeBzAmXP~{TYW7Trn$!<z8peGAEwJ{d
zVaOPCib%Y1=^k~znVf@t@6EP&9emO@uYfXw1u8s-)jY)3B-I^#Pi0UV(w3hwfCRO!
zqsJa?p6Xg+f^8oGTMx{=J9~C&0Bjm1wX-cH9+?KD;AYM5yUsDk=uc*s*Vh}#CY!U0
zTzyJ{v3<S1nb3~;y><-%FD<C}FJ9x1loFo>^WJtRi__}l%H%kkSH&0G1gmsi`rb=s
zl7#PB%v6>G$x}u}j9e0Nh7-zk^0o$m8*xj4;aBN*v!`QCT*N-lYwehuNP4bwe95Bh
zxYQ?2K#Rr9DE`Bm3NFDSor&n1p^Ba(xzl=;=~C0wI!4wTc`gy`gM?n;kyIn}HO}*n
z!E<fd44D(Lpm#v-cH;V!*}?-VXmoaL9<@Tf#w|i~p)$HO-&NC~$cg_V0a6KA(rR9$
zDA`qW#5QaW)~%fj1dkV~l~5Js?@E4N5FODh+Lf@&E4M`^&KK3yv6#ID55SyK3O#!+
zo@Hm7=(98p$u=yJ#j$GEL?ieJj3pb&VT~{-bevy3M(Y`9sk+6;dahcP2Yi6M*>v5R
z1Dy&09(D%9hiXdLA}pGeufa<BboJ;!dp%M6%6aB8LNBo~Ur>X9csmnDX)Rt-d7>|W
zsbN`y+fbrl1x^nzp%tE74%Gg^mbU+ZrDR9n6oM2q*BCW5OOBG7D+|IU?{HhWf~5dH
z6d`@Oj{FFXh|_4r&RCKU;Usvd2r2o46+O-~sZucWk~qN5JC&H9o4HW92VBO;FjEHY
zqOMoNGnJdGa=5ZwcTFrXup52xf&0XFJ$A~|fp;503vVv$nQ`tt?P)DjnrL>Kb>qfG
zqki+p4jjF9P6e{9??h41rC2vW-b0Nv?~z#ZosQcTN_sj~kMntJcUYo0r8vqAS&e`!
zxH1%K4`;=Bnwrl`qDVXXcbm!YeCu+E*<d364N<h9XZuFq>#(&I|Aad)XXT|S%sUgQ
z94h5Y8M@%?(iCP*FC~^c-&qEX?%K2x6s07<_MKl3yoS(I?FW>CDadQz{XQ4H+A=B}
zw{vVeJ-)3H&TbZSR@qg}W@Whfi+Avd^4KZjS!#OkUdCMKB+H>NY4IH0r2S-GX;tSK
zhe8IO!SI%Y%cQT}`%T0a!a!N&CIRP`@8S#qhGblL?t+gv{VK}s)ZyQ|(ntWK`Ux-N
z;C?qE!x`~z8cByHYd-k=N!ENev!)M_hoV$y7iiCTfZLkNLX#cDf5~(m{4v8H*p8RH
zob{N&gZK1xh&q3#k)Q=q0eCk<8T#vLD+HhLtn0v1t=;5T7|QwwW1%sVZMqRb&YK%p
ztq#iWbJ(X>ex1$2Y9{AhKO_cIqf&?8KSkoric`H?x+F-dH`_9Tn+^vE*>^_UqAMzE
zoubk3%IcOE)-wDLlb%TQ6M~wuvK(O&11J?Jt0@mKX_SBu->xt8_h3VqsK%1u1FL^=
zjgEr$asLQ`hl%=lH-<c)Q@@e3$9LvtVt*}!dah_0^if^C1)KQLKtb)M_Ek9z&G}$r
zLg3_#kd-Aav_#7aA%GH<bIclkUQsfu#_e74+V=4e6MyBy2W}{$3=naNhA((&NF+#*
zKf(-RBR-i%j}Zw|S+PY6GV$M+x{>YDEMKS;IN6rG3Qo#7<z!SYdh7{TB6%R47#*F@
zgMlH~`r?9cCgHs_CXvf^rtgC@MnYNsZj%xu&-OCFiY5=X12xy;4=>z1ZrHCJk|wUw
zil6BEvT|w|7qO`=jxd8azu23=Me&~+K4E88sEUn6(%kzHUc~ewy6RpA06m8D1Q}PW
zG1EGgq$$D1Gb15+r~MK+g9u!jTWI#%OQg~VUdK1g+cySli!#~W-^kML%wmuIJM{Cu
z*lNod7*tH`safc|<OW_IXBF<NkJ9|7mRIb#EjvIrq(qwfnVX@gdzf`*Iu^sPq@~X%
z>^{r}{QtF+Y^?^**F-mI#FZ=uT3dqO8SLEY9$qJ4JgK0#=_7V@gAkYs$?bmw&y{lz
zEu&O3Ryjesa(Z&Tgziuhe&>zN;0(0q%!47L6FD-?bBBD{3g^`>kDV^fgi_%;<}laA
z!c`oeiZ(wPfjdRyIZ60)soS!!6fe5o3JLl?R?d{CoU16*9}3wc>Qyflg`reqDoutG
zx0JW8>=(>+dT&EDj_W*j`^7kl`+_5^xNlzJC_IYIHtwHM4l>J{fy1CYSKgMrjr3Y%
zkP#J&bro`1<FepsJQ08mDq2W#>HdVSu_=n<c$3}6E4jD?v&$1F$PKihN4V-n>=Ans
zs|pBzJYjA?+67p=fFKp}LBGD_WHJQ0A)WZeNS&$RfANH=QBskigA9z6FTvMetl>%`
z40h(%OQ12o*%BMo@<BTeC~g<dCD-Mru|~Tp@U5_5^g?s2tJZ$-WquOw&p!)kdV?AM
zXvQ`DGUZkXutZ`f8&-whbv*4tJ}=wmxY9{#%qtIKTY8otZ%7*Cz8clNjLEh+-Pc%3
zEZ7m74dRcu@a@*kP=$S83_8)i@?uNc2?5|jShAgFGPxRpwhFU(&bLBE7<uw)@3qom
zj0xpWEHC!$wgSQ`Xq1Vd2RA9+syeQ;jEqiUnyR>{=t6MQ(y-IV)~nt+<$Jb!IIUlj
zkL%A23%IgmCTy|Z9BalI-*poxsA@HOUS&S~4flZQ>3>#AV-W~5R(7rqSoiWlh?sDj
z%4^*hNXS`M-%fQZcq_20v<CfxE^G3>vL6R5LgTD?9ug6Ms9wui;^yg@jM1$&5!WmP
zwF}SRxn+Qr5A4m^tw-{nHYq6IR0Z;JZwx>!JS4oDYxvbG)#`HO=LOqTT#r`#6s5oY
z#cN_AP2S{XJ+idA%E=DR(s7Ks(bOOU2thw%FRO8DtYO|rwW-qpUxp%-XH1rz_|J#q
zkp#Q`{%H;zS6@@TJ!QC8*Kb^qRSr~p?^V(M)tW556{E2aw#ZfC(^Rlqh=%I5B)|5P
zo?YeAl*ze<lQSIcd@GOjC39e&ucyiJQcj6~{vSkE`_`TcR}SMjxoWI_RrWdVn>IC$
zUCY>-lJdsM3DoBb>yyaq)Ci(D36={Qo@wwk3m1&djwXNW*D*}m95)(rP{>M+3TI3!
z2C|gooq7~vYd5L5oY~m+>c&2cEbp%BwiSr8Pd$zOV#@6FbMelo(qaN72Ab07z<Mzn
z^5T8b%J`@=15j-6caXT@u?1lI;iBRD3!D5v8wrUHjWLOxbX9>Ej-KnB%QPA}es)fj
zR`>KVntgm(&A_1kt|@%3`*ui#P}|Wzq2K9a6V7mAL~<Te&X@M2Lp1~^xV`K)&7^6d
zT0mb(*ehs)z_%+OnWor&pi%Sr!99A=UQ|I7zUDS8wX!=|58;@A!-iq`7Ka+q=*b5l
z86J)DRDB|?jm4uFHH(oXQ^p{24IZbDh3vaq+C7d_srQxh!)^5rl)9t!s!cQDrDhVS
zt29$1JGbGNrm0T=RsY16+mxO*^g8z@5O_s+`poK{ep5Br<<eV3H{P~owDg3Hh4VDO
zJzfr+sKnCExfJTr#lzngxHhsZ1(HSwd~b|EWr$dFURk#jDpic?F~Og>N38ntoQfhN
zNOt`bR9bbaLjtSxUG;dhhQCvV&wWieb_4ZT!CE|#{*vx7aMEO+eMnA%=A<8@+=A6&
zK<Z~iT?PF2_111lHY>W>z97Y@)u-bm);dk$ZvK(Bf|kagnmAuplo&#e0jiE;ANmX+
zKqlweEbjrNRC+}V+k$D7JHbogVHBgU7mMmQs_X1f>o`s_u}ikPv!zJ~2KGk^$*GOf
z&<VF*u*T1BXdafyr!-54hPaCXo76Pb=Cx{!hQ|tiX|WACojbc!Y<A3GXuw(pR@zC-
zkOI5@-LEL4`q1X6_xnsTcb5gROq*Gc-~^sMc)rWR6?X&;Mv@wm-)JMUcNPXQvx+NT
zzk1c3ilvI?i5ejG2bE}y=jm|zm>AMkuXhgQ1mh!0m&{+R3foJn_?czBVQ=0Xn{R@c
zXDi^14P-(o1J{F^R?+<K5V)d15I-;vn1hpK&UDtWmaiyLe-lK|+uM&|#ps^d=uG}W
z!w2)+rEHBWWEb+mt_?#IfpjgB?^SiU<eRpISiA3!mr_~?8hp2PKkqNGtx(h<GmJK3
zuwb=`h|>Lv(hd<;Rzs)g6N@ha=`#FPU^xteNooi`rC*iBKfh*O_NZ+n*FfQt@p1_I
zU{to?S|$p5Eurt?j5I3LnxvbRI0ywwI_UKuJ1#UX)-7M+fO%vH9FMC;3tANaCj<8+
z&e}&9$IUIZtQz)9vMToLu}*w8zU<FI3Ak(?1q8C2Yeg?>m@L!TXXh~*Krq`m88>(5
z^KT<hY+Yv!!CxCPTt9!#4Wwf>^vEEk?V6=IipU_t*<eCPzDp`8+d|%QD11`C@-Oi-
zA_|Ny1G6;q4g#0Vu&wlOzO4h+)P|xCc<jr9uNuAan?x0=rXSTlm9qT#a^SvgqvaUu
ze26&N^NKBRJS^_)liBwxgy$drH~1AD>I@2L%e)UPvABR$9@r!!OFHGn{s#ZMDWb1$
z%{IRi9A&^?NAal-C@3^Tz$dsFaC?Tq89oy<8PpRvTlEM^y!R0A;>~xtBor~h`H$G?
zEExW8{`=oLe`TEiK+E>^5r55pMn|b;F8CW=Yhz^(7|r9G#T{Fi1egBBAibKceIlw9
z3IvDQ0<SleS_WqyoZ>D|1V@PkIAunu^drioSAiObsa%&C1|+5)J!OS3%i)&9ct4d3
z;*7un6u!LclG_sg`Hfg-s%Ax-+so6d;7E7j#NRVB+|snKe}-e*@kZ0QU$rWDfx`s5
z7I-4(xoh8ckR090gra0hF2sdKcKwecYWps;L!x$X6sk(NE1#i!9d^|wW?!D*|7W+A
zvX)8zNI2#G3USgu^!|*`CoZCPFu4XbuyEF0Nrj+<&lehL-3-u8lL48uf}P$m!x@IG
zT_T$=Bu9*7V;7FjyQ;193){Q?iKml&nsG-VU%Y}@t$F-|;0}Xl!Ep1U6~|+Td;cby
z<dWo^=UFmG87k8{Wi!jJ97-8A7f|toD#;W^yIN|G=Q_LB{A*%8b2Ek+>Lv_GlivER
zNo4*{80vo<z7TtBP_mu0z{;0Df3CMr^_Lgiobo=W+JE@mFPp)S`}g|Qqu9LptDm!#
zT5|Ed<ae9lH2<C+CE0saFGtCConq}K?=>d|7+m`40cO1?ylG!gLgk2GL7L13*7<}7
zk5zS-RFidQk;E=jFIq|-oU6iWqk0-=SaO!zpOL!m4jpeyR{dcOZ%zW-b0`pPM(wg}
z)3zYAB%Cy!%qK02uxAEQZ%n3<6;a68W}gSjT2xZa{n}qX`Z^BL^nOowU_axwnn48O
z#Wk>tqxe(sy;8Bu{LcELECmeeVMWrfJWCs9D*oajsa^9`)yRiG1^!Y_{=KEI=R#kQ
zlx)(dXlY*RbE`2s+Btw#fVwwHuk10zPH6*l8=ijugL(rYq!({4tkd<^4ha{bW1G0v
zca@1OK`p;;qr975VLd!buA9e5(6wL6W44=w;HMZ4q-pT;JY*XL(%^!_ba(sxDwea|
z_w-kIuu1C_kwCZA0aO}$?52^Y^tk)PX?!N$Nlmxfr=H*GS$V;Ywdy|zpFNnD#l4rm
z3k7XCb`LaVX05n)@+3Wlf5fU_CsO~c<0tvBIlE3PoW1&~J7Gqtht0G&%~1Npm|Ev%
zO=;kgU%Cj}*t{S=CDjnmi(JNsdPl(bV)(w181)82>g}^zx!>IVUiqS?3w0ni9zRE~
z%t4Lbk9ZgpE;<bkp=gqXhPfB!m_o>?i!_VabmUa$PIC9I++1-d*w^C{9ptgQ+`y^Q
zVaRkSNH6Iu55VFPELI_tN$t5-Q#yJ3@ixSZZ?MB0P-ZA}6y)V^+js7Lce^LRuMad!
zH?Lb7*51{;V4Xt(ueZfGmzy#+IaSL`LK+p5E`dO)T+`u2el|A>)O)2T;e~N#ndx(z
z7b4WV6rt3$geoh`PHorr&?1#0R-$Bd<cEbga_?4h)8pnxC__s3bkGF$wA_~qKg(0C
zC{@tx+8dSsb!jbTx{xE$HAm{av!@jS5}l+i8sN<?5&w%9cc{CFB<9k$*FZ8m9?;&4
z2k~<xm-z~ezor+R1e>fKm)k_Td0S{eH1kR9L&%-GVM(c@1-n-Bqsl+MR3?CmR0o;n
z2yAxVZhvv!T8zIxJ)IMOf(w?>NDuvJw`_R$X!q0*J=L2q=$0I0IGnb9ooNpD3c2F3
zREa^%9vhqw)z3L7vYUQ&#NUtrW8+!|gd4migN>6_2jYvklJxCGj7a>0lvE&>K?&44
zAvG))$4UwM?_d$Hu<IQz6WJ^<?z?T*Ty06kcqqHzorSuDqV-@4ByM9a_@k;iWPUH<
zG2n(QQdjM8!-!~1RR`gRGLHm>hCAi`dZazO|D=yA?j);?v&Y2ZAuH`UB0badDtSL*
ze&yf~>$Z{u2e1(n?ZJS<&+BO7ufaOMbCUYQOtkCvt8xoD1B%&kZR7mra25V)ozst6
z=MBsnV+~bX(aanu&WU5GqX?(=`{POWgl~_2FD*HX!t3I=H)hS=jjKnzC7$==lmxj<
zi`i2TsPSb(RQZ;X4r?gxw`nKrDw;*l(%-`&?FV-VK9SUCf;o8c#aB;M$%XW0OWfoA
z?}vlEq%&WpaoT0-ac~<ctEiQ5#IABgT;s;fi|8~cJ)+HQGkT&5{d10DOxy1SoU&K7
z#`s-olGLLUe+F)<UAe2mkVYQSh|)9<!L{no6K*SLhWExL=MYv;E+NhN(dhzdxp6xw
z9Mdc4WSnNK&?k1HI)0BDk<r@3Z*7!cLeBt_K@$qG7FpvKexJ>@1VyWg2iS1*)MYz=
ze{g9}@pHmQJzI-yC&9Twv9Ta^W<2=`^(=MqNs9(<_JD$8`ZY821ox`idKN9%j-aZu
zIor&E1eE6_Qy+S|kRs&t9O)KoGiFVi8m}25(Nn~#zTNiJgG)-ZRc|FvO;RwIBJ~BE
zMz+A*fOk(_s!<X6?TW32z*rjl@m5~WP^BRH!@Y;vvCy~<7r|5e9J^7CbXcfXk{)u4
z#5CIl9_q$47C)~ATOyerPvR5!tV6NXewhg>x*f8!?Q}z$v<K3-JF755_9=s5<HX#G
zIID6`e3qv?S!s;(Qh{McL@9@o7_L4fuq04>lc89=nCV(885wQi6KM0~;+Y6fYMW4M
zzR2*vGC}_w1^;8F{VowN<RsEFAMp##5{^+BD_10Qd0B<f$6dZfJdc!%B87lbJQ=K|
z^D`O&0b7mB3#QHwAE@%vWm=zu%bU5x>JOK;V@m|<qA`O}N<Amoq`o@jjAHTzKvcAc
z$G}0=Sduos62{!Vu$l=sAUEzQ4U$Jk#|h<&C4QlMh5^3a1pUU+#nb$Xc?OC8(+XcS
zD!e2IM1AD>)5Er;5iRNoR!OQn8wgc%P?=sBX8h8@zasOR?@qUj&~g3jSPdhs%t&<e
zaCQ@tciH^tba2C4%~e6IK_d0)RdShNUmCAv=;4dD7E4N{{l{|Kg7>x#L-5rr*6Dc3
z<`6`2!81}!1`P+H$5QOrPjn69B;quz+xLhbhGjiY>zT4I$1RAm&Lj2Z*um9C<WatF
zd`V3QEW4A0ET5ugpV+ovxTMQYvjIY}h;%EYYWDHyw^jzZ>cLr0(zRONo%jpF$~5k8
z-v(nw1-E%aS(hIw4R(a$V7C6%pf@hte6V2s!GZVkK__hTd;h@BTZd-JeA<=blF$!l
z4Btq!irxzg2GX(EMWEi%=^MW|FckR3cd<iP-C6rcs8&S=C#BW@U0)3&A3XXe^x9Tw
z5}WdwcG-nG<6Z0Ht%i18rJh=mCaB29ySCa_*m))*p1362r}OuTzPxeyw<t&C<=t~%
z4km1Oj{Al8Mf}`dLZv0n><UR*bxpIGq;H=`M5<6t^2-QKW3E&sgcIivvK79!CZ<a0
zeGZ*6rcYNNxYrjt_$A})Du{Y7>z~1XRjye^skM$5yIy?svg*F=`#9szY=DHhH;abu
zzNmqMOD7%EYX~#o1FtP{(pLtYxB}eB2fo`f8Xr5&Exvnvb<2>Zna7wm9>+K$6U+3i
zgyfgcP{yb{*vfh`M>FO|x#)1|i*9p{6ixTk)o0AAOYtSWd4o=cXb?cq^b(4@WxZGW
z4}eqhREDl(+KKfMzY7L)1)1<=-tlawr6+}(EP5<ruCz{1IZ7N<$<Rj@r1IkrH<#z%
z-c^-<4+rK@=ua&iiTT+B|5pzE|4A>$1i(ZRrsYIy1f$2LnKxnz%Og?JE!UWw$0mi9
zIZjhuzqpz3^89*adQHP2mMc5(+~cR#M|wHNNPQFGk5*kk9qP_rxk<_9wKtP<y^wK{
z$6}Oo!fQ*Z@|$Q_pA84@GQ)=>fwt0Uffn>-52g8HihMDS$cP)anA45T@9P|^+zbgy
z&Xfr%nDNf=HnB{qCx#pst_7eJD1)-opUx|wlnqL)I<2c$FK{d(hF5*(JKA_$q*L-Z
zCQ>fy8&|3UHb7r<w4wh{b4)ckBPSA=Y!7g1h=nkYg?rttQnT!6WGUM>EM?B$ytLWr
z;iXH+!(U8c$l@Izk5%qS2r1-yBCf{Nh}JfyQBqXOpV}@B$GuOY#chV4djdmLYFsO&
zynTu=kt|ar5z{BfERlfbI2DPZpy(^wo~hn(8jxL?E+k|X%`yqwj45R!rDid#VoIJ8
zN}x#k0JQpD+#e*7s9|k<Xu|?i@ApahL|BM4&Xa;)TLf-R+NMaEoq|r`h1mmG+*Ymk
zsFWK0qx>@^Zl7X=14Wim6_|(OzNxBSPt~*A8%s>Npu3tgN7UJCiufyF=kJxG$CGt0
z*J^iiih4Y)b*)u@a5WC{8xxvkO;->^xTT>qb#0ER{H66E?KWpQAZEt$yp%hYVD<{`
z<a~|;l6eWWsU%;VG6G-6yQk@Co0%8>#p_z!@~gWYlrUaLC=nHK2Al0OwO$NE$aV5H
z!AiwxowXTtC+<64$J&YU-{Mmi_gBHq%@lO!=K`4i#Fm<Q293#$s#J^PI8G)WX71lE
z7J-e&zMTa?qo^*yNe)15S$8NBX3UmVa|fi$89V)qYTeqT04%-F(hL5J7urStrdX+|
z<u2W-0_ti$eM#(0(KZHKk30UO$P&dx_@>&9J!a|8QH%j@T?E_0C5So0OV(CHO)*@!
z`K$XDM1UYWjM^42g927vFFiqSmX2Rg>IPPA6+=>G!i&ze5j^nb{RUE81&Z|{(rOD`
zqO|s0%4c4Lr&K;Nk9eHE4%=}M<0Sngbk>Gro};jGX%_eJDk!o>u9WCH{sL_9r_De2
zPW>b`@S27~fst{Ux{^-hB`8I>gi}A*8hhT!Yr@B6Xu_?hZo>r+T|~IG!DBZ)V`P|1
zXyaz+7z4-79A37TNh(iyWoI)V3B-bf)SFBg_p+%<GfOOY+H_N&jRKn6gtK#T=T1Hg
zPyx_v))+jYy(qPxBLJQ$u<-to#2ughRl~3J92;>o%i42Ey%!twp-sjlc;*<#E=vaU
zX*b{XNIMF~LgYOET-%__AWYCguhh!&iYP8|sepr6W6w3K7Ts2u1K{+<v#m7nNiJrI
zCyVK57G=g)h^l9r@hKm?eHoyam1V3EH7+QHbuTr}8@B4iaMPegVu?=^bMhCiaE)9c
z3U}fy9LVlo9hiNHydnd+1pABk!>UYJczBbc`1$J$_f{QA|E$a|v_Bz$*Qp8(?d~g@
zRoIAC8!G_ZWpL=rKZz0Et<P%+nHHJ^NBEhLnnQgW!W@LZ%@?BFFtoIr+{DWtrd`HC
z%m)4GfkULWX3JDQ>4m!)`AK~61!WVyGb@;zcGW$GSpcU?r))%YhY#1`5c`ZYN9qBa
zlnCF<M4XhJkdrY_jb~%QXaO=k(ShsB&&1B&y?J4nQ5#MD%gXpRid`*+Z()jmq_Ej(
z#W~MS&Lp_YvmLlZeL{{P8qQ(v=(_Y6Esj#p%Yhgds5X+;j$VwV1B@`$Yi1cYdc3UN
z?4!8Yx1^l-+j>`GX4S#d@KM2U-8dsyH9#XD^?~{Yhr!1KXj_eb8j~HM7&yhZ;6T{P
zZnbONFQGsfF_GC|(N1XYGMHbesV|tM{avazxktDF5UU-{sT;*GP~xLm`BZ(*iKh`(
z*B8QWV~6IF=Lw8;Nm+`L(L!;jtw4>GDt3XKJzLN^Lr1aN0@J5BYYoumhzX*iCQJT?
zuR8mC?Pyj)vQtsyp`>?nNDhVT5PJZZb~0qLu5sQ)dkjCRv(54(C=w+zdBMs$TNl2m
z7TjZj<n*;q)mP#Or3LOScJ?gNMoY?F1|y$3Ow}es2w{3}MQvxYU~!W#%{f_0hyv5&
zdM7!V9p0{2xYURI6z!KXA%lbzf_j;zz_DMGJ=H=nI1tX}YIHyHm4{qAJa;ojO<&BW
z_{@cp{1J8jVTP2R{6ir^L6tb2<tMnAZ`YD+LJ~f>ccs+EIOLH^(SW3+NiB*Kmo_i3
zh9^;zDV#C9`Msx?$z^O_tgs%mU|nL1A#fUwRFy1$v1xwE=?Mm~CdP1}zw(a<mSh_=
z*=}B>{>6(3d_Rnfk)(Jp6E4%fUEakFf(+~iKi@vMG+Nua^RPu<t*G#$)cRj6&I{Qp
zG<4Upwru`L5~EwSRqx=#*9@UHQ3)-(Pf&6G<4bU(=euPGB1d^wA&G}-!?V(l%`6x3
z!~+w#4RIfzVGj`^nQYz5%&$&RcC+x051$&|<BydRjxs}bVUvhN@m5BeEGWWg+3zn6
zo&6w83E8`+P)Sf7xEk@){*ADFi=T*P7&ha@;8%$UY}n=>#k2=uWa1=TtM>1&C9;3J
zu##+amDBWpR6EM>y>B4MkwtJrbG4etcC2JnTdT6*`{NhyhFzGs;5(`?k#{W?fh2Fx
ztr?;Usw+d^f-_i%Gao*lA)e*_3R|cT70Dj_(&z2LqKe~_(=p>Jj4yC@q3AN3To|r&
zXWSRH%|!Hgh98QHSM>>Nyl37-^-Wod+V$cYsO8Dvh<+{WB9A3@v=NJA82NmB&9a=n
zPC0EcLrLq0sJ>az2d?+zT;-Me(r@{h+0)i!#EM2wyC-F5h8c<~^TL&fch4LqQ%m~)
z;uZR`iix>=`SG8~yIl*jr9#C9=>F8ZS-#7Am<!FYG$ZzjTE-EnEAyBf^DkG`u~wvS
z@ZLH!v7|7)RH;x7s&%222|)!j8;tG98s(0^HSfRy1Jnr_CG_g+N1h$O{*S-se@C|B
zF!dMDqnJfD$6uOJ?kXeo%awzMw0L5;qm)CNqM|kvYklK-jq>nCRqP_>v^^HlyXXH0
z`-MTu1NGmLs{flp|L-qDT$sMpq4Y*VF4tvC0_<V5tv~j};f__p7dUDnbt&|RsXjV3
zo0A^;Gw{S=FUhD(C>DqK<0L*mLWdhi|6z8I-KzDdwc9=W_@Lp>aH-&)B6nElXKlT?
zOZCBQE0-)&uAKg8B7MjvcD>7rEw|zF7!tZ?$4O#YL-;%__|3|uK@!ZpJY3SXESb4|
z&kumC9BL8)1yYZXCDDXuzp9%($QEKTbrl!hE5s!P#LET?7kjFUc!)t!ZfAP2`g`nY
z3CE;PM+kb+SE^GcPunhskT4`jg4H@wL$IL9P<o`0&W4k_7W6#@5LLg(=rRF^f=GjY
zw$W0)vG%E6ev?_AEIqvQC32`47c}aX7^|>~M%H?f0;I&&nmPl(TE>-<5<|PtI1Eoa
zzXoMVzVkSMn|1Ggt+(8}xx-EBGIbPp5AHP|Y7rBs`e7mk<&joJEg6&^Txz7U%^x+s
zi#c`lGvB|A3X;!3YCzz2NA#{4Mf!;n&o|EpJG8;|es`AJSG8x%VOiUgDZt$_;I7V&
zg9y4D_tGPz>hD~7#u>yhWBz2$`HxU9H2;k&i5i_|+bY{;Y6O7CTfJV%*vPz+`)WO~
zB13oTp>5GvEiK&||640^_)z1K9$$2E?$?JLyEhZ|6uX`7x^ogJZCg?du4vGkJ2@Ox
zA0Jtf{g}%!w^A^_wJS5GzR)ApU7nJVQov+>ymMKEHO6A(&{MmoQ2%Il7sJduH=i)g
z3Y;l!z&#pP;}Ta<Fkp6vSeBERS_sr1DY-7lOerAM8sy%jypX}~1%Iy-Ti0Kfc$eOS
zrB0&=jZb8m8fd{dKBq}zMIVV~mPUWueNN{C+((#aLZq33??0R~L}^EBV~xzgPfPqH
z`{`W9nLTB5vutQbl)!3_`CPKOxwnR2l3S0d%&A#XM<bvozL>gd@+tz|0#U8xn7La7
zVBNN|)-`u@W*p)%i_&uQ6){}vVlk1d=V*nfEa4z7uCa^PBvFDB!lPXuFK<PMRK-U-
zT%htE<GXMzAz6-!!bEkuhRr3>)+u9yQ60Zpqh+9E%~<BQ16*C|?b|eO5a@B=R)c8E
zNi+mY5kHV!h^Cbv+1qAG)q75-%dajP{-I)S8v_kY2?Z|FFw;vKC)QW7w>$Ax^_8_q
z%9l*UP`6hAw^go&_}wN|wFcu8FXB@QQ#lTCbB>qx$|_f>Wy)Yp9c!oM{I9QW)pzE=
z!kh(gmt;~fvw<P<JTlIzoc`76T7woMab7^Zb_@{9ZjnWMQ?gD)JL*=EZ8H@PALXbj
zS~~G-VIWOwvNYUn&pKlbc09qh^Ks_ST5B%I|N7!zLW^*|cF6hA|MAVOBqXX|V7JEM
zmAH5X0+v9dV??M}81oBMlHH3nI|3nz^;#akz=Yk~nhViBHeeekl$MDgF<bZQ<2{I{
z$RAn*m_JE2(;Nsg_VN!IseE%@&aCx^#Wk~SKVsy@zATj%UC)qcUuwfX&OH~2^_6XS
z%BDBqe3PUNAQkiVZ5?8FS#ZNj$(wQ{y5%JWlGO*L^o*IXD@1ktd^<kU?$wNT(~X8~
z1^%SNF7K|?&>=v=C2x}x1fuxh1O|J-Gv*%OG_;HSnMiko&Ftr8q))Wh2iVrhGb>z*
zJD9Y;(jCogi}(V`ZCsZU3`*T>XwS_`PeuXP2@08Uf#Ei+v0G87UViE#=ZD!v7EO6|
zU!ac<PCLZK%RtFGsRJ{rTmUt%ROb~lb5Go;6CJK6#Pr9lxz{$EOOnLKm>sJeaJ3G&
zh6yjT8R!)7rv*uv`W7PivOwh#tVv-sM5id8q`e&Hf}oeXKilL7bZ=$S=ezxd(>evt
zSa4|ztandAO_BG&YfjxqCnBfk?#<p#IcwEcDTr;r+O&`})`Wn;h${-TOY!<zYOQub
z{|P5%C3};JS0KUF1B9r&dq~u2CQ{9&-PMN{wkRcim~FO4UZJB1XyH@VcB_jiit!P8
z!f6*1ye<FYaht|d<lSYtt+R00U%Xl`+7AAw33Ih-$y!=0y=q`ri>!W01M9#cFW#6T
zy-i}aIM&D_MFSj~R0o*H*rZSl?OB+2W%R&B=@!{^jd;<|7N#Vrz*aQ2Z=ZV+#~R}t
z5~Oa_E9dVZ*|KmQ;7T4v%@^RhZ5z^}U<;-yDxsQT3{dbCcFnS^<Y#@va!?zmuB4zX
zxNu-3K2hyi;Ah8@w;OBDRYjhfG4|G_I+00h7B}UL*4GiEaAs8lfl_~FN^M%KU>T1+
zKVi-k=~jKaPCg+sK6YqJ`p{xyyrB+?$9^GXfMEI~I;!JbgyCC7#K_rhd(y;>&>^3%
zxf$cIW5l6rxEK)~{COF!efOn9e2>7J@~yoB|LNc-Ufg8cOy{zqTyGZRCpMF;6+2s*
zbg{(exAsa`tet-M-Cwm}GeqP9R*r2m^cn9*U0L?Nn+_BuqAjOkp>)fj!)@>$+rHg0
z*#TI%<WxH{7CNYmSk{NJ6-yu7gEP`EWV|`}{7XcRdvF$qucV`-JFy2wd}_x{#Z@g6
z)9yqZ5ofkpzWTitz+`*g*@91F7z*(vkw_(C+{_T9Ax!Qj3QNl3VeA}sYXBp7eVMb$
zw;!nR4?kDv79^Ub2;KZ{yQGdz<07XDu&4RvhbkTrRloOf=Y!AS?_c&gM0EEEEjhBH
zha!H?h}pWn!NsZmb21@CZs4Hbf=b=Cs%!s#g2OuDQs;>e6<m1Y;u5Fxe%bG)j^6oA
zkDq2We9xPw{`&amw-IgAkKw_Map}%XcElTmv(+^O<thQ@3hLX`%DOOIJUZSeGO6>3
zkzT^z-+=m0K-2n}v=5I0u2}7mfrBvoD?F-k5KP>Xj5ZEAn$q{&p(~I~a@Q+C(`MK|
zh*3GM*_e6fg9}B@awi296Qx#^`#T{)CkflYj+l!-WkE8~uLlRYvQt-)vM!#=32%S?
zmy*E*K3{DPt2!c?uM52OnBNH!<M-@D?EG?Zs3#dI5kFn)cmH+}4vR*z_gz%Qr5V|b
zH+ElckDRsn>{>Y{@cI2$Deu4K(En0Lj0>grYgEt(OztG9g@-&<uQ8i*fXb7as%iYh
zNNfjNOtHe|*h}wi5ma`xi}lYG^~RKHkC=_oQK<wLdr2STXuT;rKBz=-bKg%wb*c~H
z%6lF2guaQDnzn`TjDo*-Pf@NpXcOIN;y<5Q7Yi|r^9;P@bif9Pjdsclpom7*^x2>+
z(IB7J<Nj5e{ZDOf*i4@k_@H4e2F1fJPmeO0qSkzlOJwmjZzg5PWR~tA+stLZBT1oM
zEqSEH@TDRZW&N8rWCu1qt++(#Cc_*aZ^A|&oW!qCK)oO8dT?_bTpA_8>0)<8l1%_I
zPF>it+s$tW_}93uMENhO*^FU+w(~CEeY`bttBoySPuO;l&6XAr!El9rt_-QP^Is~M
zO_<z{lMTy|ARtXGZC)9a)tN}JH_i1I;8gwD5?^uGB+hb4;}?gvCEHRR@ErKiLbcAN
zr35c~KT_O(r-xODSKG!<uFX+P;Tjj=0>q9QFa0>W-e_yzl}C$xA|-wwjgN2<qOl!W
z$Yd@>$z*s4L1q10F*Kqi`YCip<Rwmvw4Cd)bPyt5?jRCQ`9fOewpQtd95ZQp31QPV
z()U%7;1Nui21un}?a7x3C2k2({pqmC+owDx&Tyz1P=H&0J*Z`)x?fPwCV$el+y!D^
z11z*_LG$>#0Z7i`uU%&|RyK1FCDd4zwB(+YDz|0brHi*o#}s9!y^gix4KCo<{)6Jp
zFH<Oha1cwP&0Mm;uoTtFmEja#y3NEe3^ab(bRgMff-aSeR2ryIfMHp9io8V_I9)vp
z)OROw<z_Nqo0IbHE~f<<D)Igt_1RnwCZp`o#0tZL(gf2_*A46o8_IRF+SNN*uo3GX
zG0uINoAKiQtPF{H&&ruOpYu((ZT24yp^tT=)8LBl;t}4JHE~5gc{00{PbW@WxNpq<
z;+;jZrHCly8RX)>@qFsf-(fQ{{i5T3-!%1@;%Tn=yuGPKVOtyRYOhN-i4pqrP=BNy
zLTt>xOKln|usWlB-UT&cD`Tea$x7|H^OMpf;P>>XF~ZYeT};)>HMOBU{)_j36DjuY
z0pF%ehbf5dx1EnXTv{8VO;MP*FMVv_8L^;kELP|kLO9nb1qlPPJCgoT+SC3=*l1X=
zS-2LZZo^Tg$!sbkI<OyCqZ!?mN5lg9#@fE_Yb{|tKQ5_6uaR6=Yd8hZKGw#*$~5R_
zz1dc&p7DemNX!YPS`Wuefkwx0AndeK0U-Fvwf!Z^*R^=JO(Xx<)M|W;Q_H3aw`$|w
z+mPpEnoSdHnW@{>3SMSxmg?<8kL8;_WGNa?k?xIm2cr_x*mBfjw@&-aOK16g@aINO
zSbf0cXV#x4w))17u&|$%Eh16+f+Uz7FSFU*qU;l)gs`kzPSJ~`Qj*<lF#sd2&(${V
zMr=Q^($CzjvRLKdUg-|xv$czJF3~s+Lc*WWh%T(ThuRz_`moLub6SYW`eD~0^(gjb
zMcqs6ggVhyZP{uif4sY^D#wKXcWF{dD@wVNM^`rNe!gtbz8dH!;>8VL$gpAjkewZw
zv<YJOe%(0ib18_{s5f+dtlA%>!k1rOfz~Df-4WsPu62dC*Bz#QW*1@<zLGCYb|00z
z_4}8)%W0{9O!f>m)AtvzA34;sHB*-CHacE(Uq?wb4K*KN?k~+L#=s6I1!zqq+Y=dp
zM{sy^bEUbN<YAqs3_N958x_i|yuJdgy8_9M5)hAeUFeS<(KIwfLlO-0$3YD9=XRFQ
z0+aBgJr^;H2Un{a#+@bZu&roI=Qzk~1h{OO0Q><kcOiZznw3&`4nKH%90w?XiKFeI
z>_NuSlk+X{+UT)NFFxE%WqDDEAV<O>He8DX#h2mEJw6d9Yuvz6#eJz*Z!h9G^z9LO
z_OY8g5|n?K^EuY;xh^m*-WMBd0(meJ`u!89&Wo-6Yd>^P6{vt)cho*Nqxc!lIkBCl
z3N4L!LsG%?Y^%s&$9CS3x6ojK@XF4niPP@{+TT|=CkS}LQL$-M<_W@eBu+!=K+?j!
z%bXU9JT1pY-amyW=fivl=zojCnFas>TPesn;YBWM<inh)GN2U|U}LFTPuQqw_|k4e
z2eROrWi3XJ24negFkz!7clm<J%%xiKWlm=vXWfnlY-84W^ub?`mo5$DO@oT_{PvBJ
z(}BidOSe(2@4R2%>hw+%(T?nYI9=i5t=!H;;phc?+N5H76mtkoUrCm8Ll$lAC9&>x
z!6gt~IR!n-eLACIPSM+5#f@5?ipEH~tC}Dj?4q>)vDuvYuw5WMX;4-OdE6c2(Q`;L
z_IzFtqgd$IF(Hdf?@m|{-373n{JhB>e(YXm?eOqRhLlc*xk`ddF7%t~`okkALEcj)
zi|sI;Z$*OdZa;#~6#_PEiHFtE54@0e+xVmdhUD@{+7}QMeWHxo;0_uYVB7P?u`^ND
zh2aGm%R&0DQND|ONfE7fBDCO@{o~we?pE4If<Mqmg}zOk0(y!(eusv@27i+g%2wWw
z@;m=H#^%!JLSG#~`x=r}-u!3}N%R$x@dA?e%C;&HpY|k!nRA`Vw$iqb_b9`X26CSo
zPXhV~^G_)K9y@fj|3k$f4Qac_E7gm>F}aRFY9UL!-<!5HqDnvFqR7r#X})a>0yCM}
zy!(4d67y^c=^nD-!cPBI^zT1!w@b_s*opf0j6+J0hvSGdTm8$?BuTBs=_X4Y41B)e
z@Xy{Zojc9)_-U0Od4fjeVv9qSp%^?8sc@{<WcX0wt?l!NCf$k%<Bg1$&C6uZKD9t>
zBt|C9VV7)y`&AYt)2^<)k8IuwWoDwGJVKdF<{xo_?8m=!@W;5glRF&_sqm<3HTZda
zmEo1Lw0T0_tv2&=nY4O1guIU(r)GxTzikUiaB$;PQS?RAjY9F<qeoenOAY0>%4Q_n
z!~aoPEgQ;HH?oI|BGVUPKEA!EfV*5wN2$GuOl;O4$LC@{1C=#bSBBlM7(5b6L@;HE
zh_xHdo--LX12(wZ%ggqv(pv+l`1b!FZ~7l`lz=spDbn%TLbS}!BSV1T`rv18b9q93
zvraH%uqlmDHD#JsPNuQt-u9#!sF7+)QJ~!2o!@Vbo6G)Imqfp2=dM{sWJ>^7vI^Ms
zwWgqm)lq+mTbaU_3n^rohW1m#K$U`T`<4Z~0BF+KUp$PoMwuIlHb!)r1JIDD<6Ipx
z$?m_&hFzSyX|>^$iR}Wz<ManAB$RqgX3o2l{B|m}J?(>ZrqGy4a&I&zt7Zo??<(C^
zb9!}hSm@<R1{AE9u03S*xP+tiLL;a~OVe^A(TvheNjXTy8FTv)_F*=^ap8l~m9%(I
zYLQn1c<D`V3Ag3iJi=K0%TwN;ZS75^ZA3PmhUi_j(i&#_p6_Ra%5=Z2OSFZ?vo^Kg
zJ(=Zo8<02#%5eB_NE>*|9%WSA&;fc~a=!I=b}4O!dD>B^IMWa-bcwWJ-ty<pi*V-&
z1iEb~f}`w;UIq#F_sv-_{^7E<;lxvooLCYti@pYB<w<?Vd>RV4%aC%)q+sAn3{v^N
zhq<$lPNNuUr%WXy*!SIOQsvo+%rA9rh$RI%*l{#**E)huk56hC1U|YA%gqhud{Kyb
zd`5Td7Oh(unu&Wk@K945Cwca~lCW;S+zbVzW~HfRVo}@9?#SU<y1y^-&8VwyDra#S
zNho}1;$;4BfY}pT-R>MHT6K9FHRoe{#e>1pIo9GXBfiKhw%i5Lp!clfx<QA9(7b&3
zVmf=7X20?bHu}80MmMEqviQ=w*Z3f)R;$q6Gp#O_Z;tJk9^Gx0o*TM|ZAzVLVvMt)
zOh@llUAxGAv`Hk~G@cq|kbmGX{d9z`GyP~e5dhbC>k9V|I^n$V`iu8b?^_J1mpc7H
zFA`F|<2ucS&rIO;T++4uVu^Ef8bn20kz5{f>^F#(ts6`!RT?jd{*c{pXH=>>jz8BI
z%G%%QjU!Y|0j9jQy1Ph)b_LIf>HbJJpw~^<HF>h1jOtV2!s1!^W5Dm3WltQ(t{T4I
zG!^ce8VgT(d_TjtE^U#*U1zoDDST>AYvL+PecQ${>2=kIcJ&%vr{gqg2KZj!vmr%e
z5dZd%(hAtVJIzu3xlR(lyD^Q4_ivj*LGDSuD-S-`W9PU@-efgTNfn+peg~@fo@4c|
zI;M8a25s1|^HMvX<l~sfVOrH&uN$UC$6fqH!sDIIMAysuL$a$4xT~-86X#wLJEfj}
zrVOyR0WZXfLUqg2mDURdz9#EePG_c=6@AiQOe6G?v2+4>pI5#9+{peld?BcBk1;Co
zNv`@+S2>htLTTa7f`icxhM}-AurD8ew1Le@PMO@n(k?pynVwgL7OxB*gMQt-nLpkS
z3$V>hP;|Y8_vcv)wyV$SLA^7~08g2GL>rt+5jiXsj;55H;SS@aNm{Ya4ia^fAx*;t
zG?eP9IC96kKSyCDJT!-y)cvXond<+v6YWgX(}mk0UclW%wc~dsVBRp{P3l!j&rOOL
z;jdr~)7%)vmmBw(D^l})+YZ5}pKR_6rtHzP&&AFGHpc6dl;r4jA%h0iEjFn0!$>a^
zJ3#1*S12v#>;>!mE2?0P8nAzGPF74B?w$ME#78m5G+lo^(hXt4#-KM<UJcjLmBx4L
zWBk*lB29o>k8(Q{fG)24QBh%ndYn|Miujd-Pg+<rP>TF<--$_EZlK_hliEGb^4mA`
z(~Co;h7aqZMT8_1AF0gZ4zG>av&dD~8fYwtUG2>;5L%6OON8&Yl+M(~5#8J5OFgP3
zi4(#D<ILR+d+E@|dELR`jj8*y<+&;r<8RN5iJk3LW&o$D%(Loqu2S;)-9c)V@9H?6
zv9NW`2eAKB*LnZ5`M+;pTcz64Qd?<h>>Zn;G)8Pi?48)7)E-qeYOfe2M$OtIh}xx<
zidiuuh@yy5l$zyz=l%KI-}|TU{SRE%mB*DlUPsRJIG%?lk)0DL=oe~3XMIF}HANfm
zpl!0R*nMc0{glmIr{tQph}?14n4sz!P6rIm1E6}tOb6@rb(}x`{>4j$$*@XRKh7<0
zSp_%s2s^wxm^(TWZ}+fW?|I!0SW?%Qj*hgcvP!=C^v!I?sN9nc8=}pk5#HPL)H9gf
z2w4QO$uABRJQ?Xs`^Y~up`uMN41HzpN@KZygUMBg$@_53t#cE<(jw5xho!#dlLcXM
zRGUIExBSw6$M-{?zCrLBi;+S?{ymRbj&FY%uTu$7H+EoPc2q!kW#B5FgC9bZ`|V)?
zr=m^l6WX<uz5Jk7xbcO6z%MtpPBE9wrne<%VKm-7pn-CKlzU{8$zY=QUN8P$3-Pxh
z%lYyjzQlmmJiX}xhtq9m?P(sM#jbfiI=38*d&Ell`On280S%^5Ifa!mWyl_J<J9@k
zW=Z06?bXZ323eO#LXY!r&?vz%3C`u+x;!WsYKObNi*{a%3xjTdz~mhFQ3<xjEZHM*
zil-Zbn^8EiCMu~h*ako~cpy!|C)a^gJa_&IZs+ZBEBmyT;c2>HS_UH4zW4fVYglxY
zJ??m9?g4ifwSnEfaa70#8MKB#@%SM78)z>w?bo_yPWX~9`!(x@;e9iT29Ghr#~Bo-
zji~3xBMHFeuZV+4^87as$vKXk%qhrlz>zn0pT$VaB<8TRPqYFj!ysluT0VT6tVukb
zAj(-SZI$fKU_7Z>gtYhK5J&Ne4Lh)0YoymY{^|m}VHYh1b-vC^uM#Bv{7)ZJhM87W
zj)Whw8^jf)i=5&p{#6R*Y<IYVhX&`Q)ytQMz2c7iLauEUDM?Hc&#in%0f9LT1z+>0
zaV4?)9n=gpW14JIZCP({Z}Rq^3?DpVR{eqX{B=rNh1HhE<$CPbmThI`Y!|BIhsmsN
z$tWz7)Dr1#`|2bhmEB-Ym?qn@@L;NQ|6h^3Er03@4vE>#bLDBy(pmpP#0Rm-NJ^tR
znoJhVYHHZm^$&IFhp(TkI3LipOFf8Eim2J@HCaoM`wvFxeplRi#I=21y^ckPpM#3c
zgw1f}M}BX|OPwbbv+0AcL{~#1?ogk=zk17s^dOIB0wlxqdP%Yip;VT0?mUwJb4wvD
zpX4w9|BijwxRhHACzGJw_@t}+vVGiI7RGKek|o4%Fv+s{4_ZXu!^n$T(#cmoq?Ai#
zAg4!bZ8PXoo2877<kCi=VAz=86TTHRP!udJIFL1dghtG0>FL@&=R+GNF%E281dV*d
zzgDy6=Yj$GIWmT>cjo7NMufTTA16@BS>j^S4+S_MX`UUV(i2(`<B?H^xAUt-h$<IL
zPI`?f#%ZQW<eXM1efVVS)tMf69qHCUTIh2N7})AQgvi#!$E(>pV7C0qYok9j><>yS
zHs4Vm|83}pbU}+(l3x=oiulO<F#(Qp1LlyT_g=F`T1DC8-YCzXW(t|^XOTqgxko_#
z^y+qQ)~6p2d^<Wp9Y0)=k`tZXk39EV`Ck*p7rbi(#GjywOcaJ+pV{&@2bbzs^cxT0
zrXA5R$;QR}Y@-N(dOvWhFJAPo^|rj^E)?tbsmYn^!<BhR=3EaN=JYql#-Rnsdo3qb
zv2FM$A5vM$*MSa`@TUl520^`++@!@#awhoFVX!Xq#)zU}DJdqv$Vcv7_*`n)a|6zW
zhOmwNHKI&{<asJPH!rKmNzJI5WgFpuZngNkzn8f3JM5u;;s|Pc0X8cl#LPViE1$di
z<lCjZx*4pD+Slal3MS|}m6h!yrRz}g;M;}MUu?u`x7DpOO}JV*oNs9z%WB<y7#bd<
zHu)RW+A!KJ-z(Pj)RCJ$(*P}d6(MFsHQ%jC??Lh7DsW`U-(~c|bZsPDEIM*&v1@$`
zi5ivjZSynuN=)C<FtL5xbCzq>%qV_bapd_Rv-%Z4^bRDbYck9Ax?zgVUDcANl#FAh
z&?}r#2zzS?g%ET*3C4Z1KKxKF<BMe3<cHWey%)1?Qf(YK)Izo&tn*VD&-)O^u9S$1
z7g1kCOz78~e-4A%C_4#zQ7wxPD)zzh=6>tnw5lxE%U#V)i}wAZ_;sm5-r2trUYaP&
zGUzwVF^n_f_|pv$p2w8eaK8{gE0DInJRxBtjRR_jykyl_OANtMa{~}Ib4|RKUJGzq
z2OMH&aBs&RFfYzNDG95`zE~(3x<qO%<~6`oh3-SulcUbpj<z;QrHNsU7QZ@Q?d8xf
z`C3_KwEeZ`_@G{?DAE8KKkK0d<G}kFYU(NyI>vz(gx$m_tx-KN&--i*9SGax(MjJ=
zyh^~nPM%b&K+gWB$*#PLTwSg|TyAsE6VdaZE0^E1s|kQidISaCsn-_rTqiWN$`5b&
zROWv_`IcqB_LXkckxK8yWcAI1y&wz2*?-8W0aGr<30ZV1j@o|@Y9ek9V2qql;+f^3
z+71QBZx6HIC9ML=OK=kT4(jli@E9L+ihV*FFW<rAy5#ng(a9;*{O*Q-$ef?ANNQhB
zrAg7_y%`YiYBbEoPywkHLK+ose`WnWEX_dlwW_%!S(>}$1RW&Lw*>qgX1lLz@t#P=
zUw(3ep8W$|&tdrOik5#vrZ7B4g>CV)hHi*N_XDFL>U%|}W3Qry)gKxoR^Py8q+`&W
zDqO>3u1jVPvZAF2mSBBbN~ao>_a8QsGS?~#4gI3Yt!E_xIqVG)5vZrAnd{;!b-h>Y
zU;s>(6l3Kc5)na2mNhzwl7RhZ@KC3z+4&e$Axo>@CVUdo*6U&7=ZZF~%i?Rw42i_3
zzJ_6>SFkv>c9&26>ja(qmK47_&jw^vv93PPzWw}~K};~jdxwo4^rmhc0}Bu)GW72D
z4O19PXS3}@MT06(TDnJK&vap&vS-1{kr9ikyt{Gz{JGNsw}*XTl>9M^TKC_H7~#B+
zWmcuvEx7VJNr`uTj}ioAxHf-_mNn6{j=5qjL%q%+CC`z0dv&@|Dg2NM^Hki&x3949
zQ;vz{UJr2WM#hEVK+5qAZo+G=RoSwc8J&TR$C_>m<tMu_u-3dg<|fT$);IMb)juLC
zoD&d!BFvwMjMNfeSZWa%I7FYL%e>zMo-u!YvPJN~+O8SXd8d;COw;8NqX))4tFDGd
zC_2O%|6y**zV;mHhwYOOoAREtMVyTKWLk52<w@6ZYiU=`r|7e&S$@hZJBrR+<+Tc~
z#`V@Yl|$S+T^g=1+Nv6!4LL%tvCR|NquTCB9@>KmLA}Hz<|w9s?NAajmQ>tZ^WqCd
zj<LR%3|?yAQ@#(%$)cIPYk|1JS(o_w_QlikKXC7uZ#W*6o$PD98qk$hUz}X*m%g89
z$s?fdl&i4Y#@sbgfgKijkpRcW>^koSOPMiG8YiNF(`B$e;_hy=r+|edLJQ89SeC;m
z8Zz|P#Y7v{uU?(z!Rjqz+~}Xs8kw}phrGGCL7o>ZU%5=Y{Z4PSy3Et`{Aa)@S1DLT
zW-hUNc}8zOz*Ik+6q61YWSNFeTb|yF=bSqAYT*t&`slfLX2llf>o}`@W#j(iAF`&u
z*qsZO!<{P^s^HjXouRov&M(XHuSBo^hJ7UCYz-KUk^r$STVU>S+f6)<D7>qSCI_{3
zJs2RfcesOs!6~UD1-<2N2|L-4XLZ&#l3lt3<Jk9I;&qrGh}J_5pRTy8pw=Bm5GV+!
zg|rN#SKTvz`d}g(+1P0PX#@#U9>2}V5Y-0{97*7^I{wmhPs#VbH&Epb(3_LyCDhql
zE(2O;=Y$Deroi_wfN1V5M?UvpBv*Q`2*0y&otNSoZwMyN-p`WzkLSC!I1M0quhr<x
zEm6R;v!R{HfX`7+G-oq-JmOuLJfib4Y=TH>$g`fg`vPk!Xq_lhLXTwC1}@E!T1@(D
zlcPX%q0l%|>n=&)IC(^pC{fAjk#G$;Zm+a*=a1!B&d$!l2K`BcVO&{Cj^&1-%WUWx
z#eh7kG}#k88yE61j5)7tBFew<X$0+o?^cN2ROrP9eCkiTgFMB!Fd=N2qCte*y>|HV
zfAo$tG&0N%w5CpY(-lX`Y<($Ewi?d)J?>n-d2pibs8Qyv5Rc@E+xk;Hky3RP>p1m?
zsiH1dDbTb|sij{3`$3V+6ygs%Qf~b}PYczUIDHhwY3)_zyUv?*e9n;^TBgx;zJVP4
zaS*}&C^ngFbouIRc;-(|MN?zxs=qvZYqaQvDQ}Jvv}|oRUY7KM|DWOde^F4ENNc^q
z`C^LQnmwzHqYWzE*Y^~TxwyM->CD!(>Je5m^nqsd+jnkImVe+CKxPJqI;~$W5l$p{
zj%jTz<!ee_BzuPK=B_T!C)avlP0P>bxm&qS-O+-J15pXdGiVFg)jAp<-^*em!sE!o
z>p|76VabpswWj<h*RX6w_YZqCi<Uqe-C~Mw{u9>JKI>2iG^KuRH4*C_=Ct<d+u*0k
z%9FHb0O4<@&h#rNi4@nJi8BM+6Yl!~rfnjxv^hOn%(THqJe{or4om(YR?a5khB%*S
z>w78pF!$b_D#5Sh-e{=osI2xwh<MgcO2{XaB$L1;-M*I%Nh8JCF<^|53G}Ojn!Kly
zb>pSf)4I$Od;sJ<xi+?ytMlv&T(`v)vwPofquvK@iqE$e`p`On6}33}8mBefD;1@;
z_u*|aY1qZ{U0rB7<N6F(J9bI^_Ada2s0rr_sh#|IO0pRHrxI5C%3Y0Qj9NrvQKT7;
zC}q;OGy8;&4+%H5wCeyKyEzNhl_PC~PM<gPwA0;@Cx>_%7K~N9l`6;#!x0RaJ%ib2
z{x+;EBT<qm$fA4C(U3y--ZnY;q^_(m8>;D+ZUyUa7wxiBBqDRLxoB0Tu5Z@UaB+*{
zodNFSjIf3gnsU9Mq5PLi5x=E>f*-p9QCk0yF(7=hQAgBLZpqUSWuZDtbL*>iuo5JJ
zj<{Jt%Z1^e(<bc31Of6P=E{6rXwZP*m|M-=1?Qmn(nDr#EHY5=9f7xHXR3P;hEyxQ
z8E^^2VzQ^RbFh=l^H=VKouEE%PNrWHL?ZF6t@N&K1qlaC$WRv`4_C`KYj1UTD#_b%
zlpU@h9f<fRt(H6M*6jv-<$|%X;+Zf(FwaGIPp<T*3X-wN>iy8&62BGnn4?vpkEik%
zp4B($^)jLTy9b$WRj<D=Sj+*PCjp<y9rnH@AL>cPj{2;6Md_rJ-4+s-ia=~Uh2>)9
zi-f>8_l902>Y{h*v&3#`R541&_a7dO`pB@R?yyzlCh7tbfZojH+qbH_XA>5emFuY#
z%4p1u8v*i;l_FNAoq37wPIUs_AgxPYdX!OH4<pu5vp!!{$oda^>dz=RyuVK&C5+_i
ze{EI#z?J91nv{^*x(S$fvNPPIvgPvL%MSTIAXtXxY$?44CD5BEfYsqLXk#O6Mq_3R
zn)`5kR^p)nH&dB$t5&Qw>-`4&O%qV_ncUJ*{?kCE+zwZ2-+%-)IMLc>`Ta^(#Lko#
zT|2V?YX|&qr-Dyr^+7BUytZAT7dLQJG2~T5pyW26Q&U6bknCR0ygF+b*JOt9_R-(&
zg3V@F%*)L8L*G$tKkMhz{DcJzcd9_vZRxrqPWdLM5}tm3#6N9+(vzdZWX8Q-Rm1&*
z<*OFl|7N>>%B!b3Y9lCA*C_O@F8Dz`B-yh1FPlYBFj8iaHgVD~I+!|ZRB~DzZmpPs
zwsyfodP?yUTPiYdeJtb0K(jwFBD@SeKHAC6_vV@S%S(~*;|lK&Of(;{4_`0<P+HU8
z8XOY*tl*Cx_4z-UX(|y0wxT_ri}NDF&@KP9=NV_9jlH0XsTgjjR15j4I3@KR39axJ
zKcj%avr9SM=xoJ*<078wLs(=nANZEOQS&&DXzH$7;gL<YuS^n|1~H@M*Sid@9pYp%
zK?laUr&6FOzc%UYt(Q%UwNe+#=9(lB3uD%ieqSQsANYv0b(?#a%`|Xn!b3W;JxR(}
zQiwi#1x*L{En6L&@<HP<X5$xcRx{;D1pqeciIx<%GJ>gog<UhG>^?nrvhiUA8ai{d
z7#dD8^aLu8Ngz^Q+Fw9-o>XrQVielmd-`dWbY(Fl#rwxdw%U&K2Ct9`Ud|*1<H{GQ
z@&@K7>W8cPvOj%J4u}W?V2yLt_3Pf$LfK`BbzHtM<}&QgLf~7?e$HFoq6(&ytWFsd
z_~%nMRK|9vX<+i9R5sAJboc%iY>mL^f4d1LGLPQ2xgdCZV9j)b1dSmb6k9ew%=m1M
ztS2WrUaGxOU=oxj$N*44_q!7naYVmN^=-g3oBrTd5DcOKF|N6OIZ}zUC}`OczxpIQ
z|5;uugHhf~m8YTj_`<1y|8`lGM>`)zwNq3sseV*wB61Q`*;y|*=It_R>i0rJ$1J&=
z&6)$3Sx^HgG&^_<EuVJE^rk4O;j08X&>3k&p>&-#7ln&o7$&XPF`)4dbwcbP5PgP~
z<({FIx%!M_P5X%@^)GIidgLLs>HPq$^<ouA6KE^Qq;#p7+Gi;%Jw5h}_F;Y~k^U5Z
z!0Nbv>5BS?tXaMdH?%)I>rd1tb=2XCMryFmqb>Up;?LTeV!txB@1dDI;-{nf3<Y<`
z>~Z9Iz|DA)=(!d!s4~LEwVlJ$9Laq$vY8z9`F=NQ_&J1n>93R7Ci|yB4zP72^pO=h
zwojBFT1c`<JAp027xKy@DYs+Jc=}#ar;Ca}=X_%O8p9T00pxY-<~KG(k{_&6Koypt
zZ!O!QSl$qPkN)!5oU`=6;~>mWXa?s756{{Lh06P#cz2Y_9Q5&Q80LD`7~|cn-N>{N
z92*LBj~U4QvvxF2;qoOka*&xcl1XmnqRu6=!`&c7`I+(s28$}lJ-e2lRe)Hi5uBy=
z4s<V}+9VXf<pQZk&L;}WsCe+WAuLX|FKvz~I4wXG9~7SyH&d`b!Mad)|5Xq9ziYBa
z=)GB>O8i|14E9w*@@7W!#AM{;`9}SH0}ayqe2C+@6HFD0<RXcPyErrQ^Cn^re|R%6
z?=6b*UENHP3Bhi8{NbgBVwAJ=ujVu|@0c{(oa<6+Bx_Et$J;O0Z+6wkiQ#SmU4&nO
zrHQm#n@4&D;J>(?qi(6Y-Fce7WD26y8@c5FA)}>93T}VOGCuXkw4z6)Ln+XxN~E?`
zw@oQ*eSJ5VSM0xDAH`nE{eizr#YYm;n<2K-oQkXFyE-(F7!J};gUT%PJc7n-+G@L{
zw4q_SW7c2^g!@?7`a%Kn?tg{U|Kb<=pK_E-l;N$U`#sXD%9Vh{8reSQB^~oeyE|I#
zj?UZB`}Iv{a^(meMSv05zWGVgR9AqTbV1KIRLM=60+Hz-2U-4p!m_Oq3yU&7k+p0&
zQQH^G#GlPlq0(4KzPPCB#LM3`X5WWabH$vgY|H)*P$s=tA>J??Rz>UdSQVLg@FEV-
zPGGg=-ipsew3KX84MLxYX964)4}oRoh@>j`MM)+UZK_ZvVM_s|Cy(ZId-rIVQV;2>
zq;M6X+!PPmx*yx`L5t@Ay8YHpuw?e@G3O{Bwq!5KltWeZk{4xaHKj8G8yt%cMg~xA
zI!3Jp<_4*E4D4UO%;+ptDhp#(l-*lUn8BX{J!{KYw34UN51%kpNCuUn!H(pmZ@tv(
zBn19wwd+{(rXT-a{fDg2o{P%@Rs8Ez>?bA@cA4<Fv2Ky5V8129Q!w#<wMO#&*RzmR
zFe#!VRMpY2pwMcsfTbmtj`wJlRl*P`ZD=XC|4W4WIE#vfTe6z%8%CokwKvwu-g(uN
zJUIwwqtd@2)s~&;#th3^mKP1~IO<Kt7ozaYk@ZGD%YK>K#3{4X2p%BVKz;y9;nT_3
zCivRt$MTS=ZYH{RLVYN|7Z3aqtRrywg@>xt3fr6Lpsd4RhLBur$OM(VbF)5P?wZC|
z#Hc9y@)K`&OSMbcqr?MGB!C{1Ns0F?A)+@{#kvWjN&H<I1!}x}A%%JoR@lm6R#sO9
zApkHM>IPr<7-q$dG#N@TR~ItZaOVqKXNB0z?B)~WD5~aIZI;fe&{<12U$i8~Q?W|f
z8azGwrq=<$57RVgNop2uI33C$BnukSLuPNcvzfOkTLnq(mged4`#VkZy+I*JNT#S1
zHNesST?YmTSf>7$t40b<dUT!g&2KvPypScq<KbTxf^dH`>D+#uv^L+ZX4_YqBakbq
zq_uk@_>1uU*9g6L)SZH!?@Q{xCDC;!*Jwq{W$x}h?<HnmUE`T<*3B%qW?0>PzM;lk
zBS~P@f$<?ViFX#OoAKgIdJ{ft2&e|=qx9y2$1ysr%C3~Lh~89-dmfSJs6Hl2I&M5G
zdQUB1Q5b%V9vL{@%mWkD<7u4_MNq3!nC#?wly&QC+dcTF&Ju9fTfd)*x+$wc0|(kq
zzyfMFEZ$SiMbuN*bUt#&m);x_@I59G0B2hq>PPvNs*~z>DU-5Uy_?+=VIF?<0H$2d
zVR(W+Ts(0t>g6<d@^pNRMTMITy<b`*l5^qr5Bxs0!Ck0rg?2OB-T6e#DY$2I81f10
zqf-2xv1>v*{x|!>r93R=6q`)JN2KrcSagLB)`Ydb%h5{!L4WG#tQv9&X^8k(<grI=
zl|iQ@xQ)FL>!=DnObCDCT75z#D8j@ds3x-)Z6nlVvX-kV!H_m6vh9EVW9xa|=a(DK
zd|81$vU>buz_M)(IrSV&b~MSHAHf0`<zAlBO)8|3u74Fc0A0$#ynIYz2-fRT8<Et~
z$h`Ya^&31O>^=8UUhO9~tCZ{AN<<+>vtN+dmq#Blv^z2vtBB9j9k8SMoC+PyN)G+q
zT+eU#C*E^a>`!L5`dfwCibpG^YU?TjNgys~_NAcYESpzz>swewv9P?|0>@dYyX9ac
z1b;emxFTW_RXiQV@0#f#tc3P5pC~KIVik|_NjmnLESg3W*D^YqpB0S=ylu1HF#pH~
z8O88zR)xf3SLW(ZcHggH<OA2_(_dQ(EK1`E60I(bMmig1+>G_sg|f$E0gFpXu#Z7}
zal19I!iUB6nbcT2^lw_VOx_6JOu3&DeA_hIL8(Mu7>j+lhVPUSsNo@U>J8U;MQqR)
z%%QjNBJxW1rYrra1c~?6F32q8+f@T)2~WflI9T3#M-p3KUN0CWg|>LM+bQ^Ql56UH
zlg!)(BR+_}8Ae_@#l|oPy*hQ!+}uLSmom_kTgq+pxF=8EWZYh6sQm5izTv??K<&5B
zDN@X$Q;ttm)|nBA&DX0a*Q)GHm~qM;;)h)O^8%ycw)b#p-%TLzIn~Vs&u&c;JOcE4
z|8Y}m{mgD&Us;EsPQ~}Sxfg5ul}2VVdh!knHR1DkiuR`o$;gk)#AKVrPC33w88)m4
zyR<mB?0{U&Rj$Q_A+DyM4kzPve9xQtZ-I0dJFF98D~e*=ny9pQsU{dUN_Ad}Sztb)
z8?Eg0Vx^T9<Ydg{E6(j_qOUYw+-{?~w&C$|Cmd8x^)F^}1ZoUVFd`O-|CspOUCry5
zg!YJodqGs*VQzY(V4Ymh@Cd70h(K>H>O-OUK)j$IsD8o8EVt*>{DFvs&@BQ}etZ!D
zsQ93D`RZ;|>{kvkK>V=;V2*!aFV|b1TpO_SQwIy`T)RUNFoNW{%x9C->3&C9D*Y%-
z5<>GR15|@n{Sr2KJ^#XPA4CYt{XmfTofLbII|I5j$I45LWW|5H&i(Rw@aOcNQ*mC@
zS+>evOfF4eUwZx^1q6ht?jIEMQz7Kve2_-oz5xX6tdZ?SQb6N_2a|q4pR?PHF(bKq
z1e3<0%;`va50($z&C;N?XYI6wqFi@;AH&1<gSmYF_3+#m^@z)l!_hVi{U@UZa(}vv
zW6_tT6%4wM7Ak1=Nkq`s?%qnD8#m@EjrOlrTi>BhXC0|^@okj8mF)F+V!Uc5RdanU
z-<j&Xni;srAvgBQ9Ncv<r00c0G&bMi^0d0Obdt_8m>fgu(VI1=mGcI9C<DZ!E2eZ7
zNNppxAt4>svPVNde{Ixr<VCi!%CBQesb%LLl47V2IC3i#Fqh4#Roi?5mO$K(%yOwr
zgw=tJ*Bpf%$(KYj;)EzM71vv=hiEvC1s;Wq=)=F4g<tP<{xmVXq6FAfkB06$i<-_D
znBlU++Q*Fi!EH~eQM+&c(;fFevgCgs-8`B<+KS}Qp|2MxXg*JXY-9Q)3k}0V9_tyY
zj|Erv(|N1gy26<;(NqF4o|c!s)!%Ek)%=-{&f%O+Q>1b%QHEc`IK4%SzWaMKlRoqw
zixRrr$v0Y4IZZjSA#KfWFZ~)4F%?_2mHl)%L(ZRD!@C!Tw1itrz)vHo+2td>wzgI;
zjz=yk_KqnQ!fRptoGsPF^V89n#=4SKq`$uJ1)!yXh7*~l5)xE3mB}QEYRQPlN3$u?
z&T^~!s+}7%-#1U!9i_NJWKqy4$j|g_`|SW9rt|3VE%@z)zy;T8`MDVy%**J}Ms~I&
zQhMs6sBA`te#ZOhIM5couqjY6_6l8RSmc2ew#1gRh!Zn-c>~(&UqK4Zg~~9ulHw%?
znnT?sXDaKseaA0j9)k*t*$a9FGg@&~f+1G}u9MbWr8dP`b`@p6eYcr;Y>M=HEM=+{
z_uZLy=HWh79$Hd#X?uZ>^tba&i(eC3Y~Y8tJKyz;I?)&Hz%XDVm8r*GsuR=ht%}Wt
zxCz0;7m|MlLz3D=bDCBuvasqwRT?}T`G&cb0l3BC%$7nGuk!EG3iO`CZQZL3Zn${k
z0x?HjE5SoA>xeMcEEoKcvAG=5F_c<x?jXaBUrUe&Mn5AD$gC4});7g2^i`PzxCdF#
zr@U`U6yD1($$d(ST03a7>y$u#9^-kn)m#N8iWv$I3Y+-I1|+kYhiOGtA9vT-1+AW}
za#&x_fo>(zo%?h5hMX@i=QosUtz2nlmbaAES1Dc|kE)%1)|);(75c78Uo&?y=DEDI
zpO^=<e_XLW<|W>mt{p%2AamR-W6f4cIor*^@T0mw>D$%<QxGb^?XtVk;D<@6mX+8{
z7p0Nq{rr@mT7+*9uy{cmY@Jr)o-!wUC|O0!;-XtR`YLN53sGn+0%UM!S59NS&qyWB
zCS3uZ-_G-7bQ9SbO*7Yq%&%OA19?^dHouSTpC(1T0Uuii<h9<e2$IpTqc)_AxQ*q*
zqHme?s?4W4OK^`XlV7ka@QFK|u-T#%Eu|Q-z(k)n-$9{;xJqoRU%aEa8WB3H$CKmg
zHQQs~@ioczA)JnZOJb54y+*<;6?DkAV*qPL0{(ShgA$DoP>eGQr80SO#H4{D?nTew
zvYdrTM?9lTzeO_jP*0HuhbcR&DSL5Kl{m+@HWWBuNW4`y%j3Fu;7z^oQ;gc|K&JJ(
zi*Pd}-m{1yDf2@eJHx>DA@NL_;xEF&-`C-A`|{t<n$nB8o<^dzI&lI)q-gQ{koLI9
zy?&|bBWcwarUk1_t0x#W3|7W<lfaj%Z&P_{gmUflgN=S1NcOG(qMo0iiRLO03x27T
zYHp@OQ4uOd2`z0+l^9`jPiusIu8eAyg3ClwuzRvfjWhx~7m_?MFMo72RxB(=c^bB%
zIaHunS{>rbZeNxqzOcXksh4lj;sf~~aaX*T(kD!<B?q`vkBJ}DSzgoI^Wbf!Fmom}
z#gKSqR`KiStCKO|8J&tcWPw<FVaAE5glRm~2;Tplrc~48vInth;?xioU;sG~P&{7r
zN&bfn@+~h&DhyY}T+p|BG7wDvX}L<>0X@zr&5Q<ja_fHY2dH+WWuFl7a=l67KDC?0
z(coeUk|V%CcShQ+K12QpYk)20#65Mcrb(ZdO$m{|SL;VpzH93-Y3$cKH_P~J01+01
zS+KP4h(H@WDijqC-*?k)cCTUf4@pl3!njM$&@cRSAuU!)Tn6zzCWPOskaGsx1)@ls
zW)#a-a^R@-PqwvQUu(LoFFs964RYsc<*`nNEA?u$M8BvP3#gux+?BN?ryUpz-p(*E
z5JjK<sT$()xizCU6oNm&5H#_mTH>#CI<of<6Fe_eGXg==QJj;Z8XTnTy;s2pzc+_5
zG7phWPo}ypk`;~z=FOS-W&%ty{`j_(cqSYlGg)j_5tCR%Zwa6F3oNpA4yszq`zN)s
zc@H?1Gq%VF2^dsT+vc=KeQk7;m2^o?3b=r{Ug?scRGINaQZ-8+elJU5u(GT!$CM?k
zf3)sy;i&ZNyvu;!1f2<SdK4B=_tqo`I1%FQIeUW6lg`Stz8{#_y<{<Nzt|N2(vM;*
z=Fm(|5-YQqbw9lwmZcGvRSGTuB&+Lvn462xJN;W=U-3QRXro(4DeA3$wxnyQHM;}5
z>}wk>{P9bCY4zCc$6y9&nG{zmKd;-!cNU*}#@F}W+R!vv7ifN~l<6r+rO>sJe>g3p
z@tY7YxFX-=?K*tce#m|DXrx;Qd}Th4vR_+QHR>^=vw1nM7JN@87ph7K%loa4m)N(s
zQ(HLrE+2DkoYW~BJQ69I@9B|d!R8dfMRR#bu210EiIgE+zw>n629&kGq<4NB3bxMY
zh_M^=i4?oY&4%WBqNrM2Z|<7ECeImjZsxS!hw3XL*{>G{j>N#nnUw)_1ov9X;5)v^
zFO&(Z5{<k)P(x8!ID{eSmS~R?=#CW>MA8<`C}><@U#{J$m>okdQX-PF2c)&+OwMvk
z6h*GJJog~?i2`$4z{qB#J~P)79`%<{FX4WYPv+J;ua_RPpxrS>=-ho<_d)DUOjay7
zZtM69R1ZLX^5tJB;}bcT_^(mdN!@CVJpZm?>f1+X1%q$gtc~Q1W@+m~L_bVPPcHZz
zuO5<NXq-|7kS%eLdzyQ*4X~M<@>K(MebzA@o@%;s>)d|4h-KrCqL*xZ&XZxDUCYpo
z^iJHHCFJA5B&l>(l1+l`alq$DZ$~c5>(ctm3DGR1&T*A-^-?xP@gzIL!odLz4LyE_
zylbq3Jv55m@eQP1wH)bsZeMlef}U2_wg2=Obo%eSL_;qXyfxJMa6@13;as_M2j_j7
zyC>+9Dz=i4?pLKGkO9@I_||##owZ@Vu(Q!Tr01*8crRSWzvk%wuE9Y6%f9LVN}5Oi
G;QkNJ7tYiG

literal 0
HcmV?d00001


From fcd85296f964c5965936039dfea7ed221d5bf68a Mon Sep 17 00:00:00 2001
From: Nayana Bidari <nybidari@google.com>
Date: Tue, 1 Sep 2020 09:52:52 -0700
Subject: [PATCH 125/211] Automated rollback of changelist 328350576

PiperOrigin-RevId: 329526153
---
 pkg/sentry/socket/netstack/netstack.go       |  45 +---
 pkg/sentry/socket/unix/transport/unix.go     |  10 +-
 pkg/tcpip/tcpip.go                           |  13 -
 pkg/tcpip/transport/tcp/endpoint.go          |  33 ---
 test/packetimpact/dut/posix_server.cc        |   9 -
 test/packetimpact/proto/posix_server.proto   |  11 -
 test/packetimpact/testbench/dut.go           |  42 ---
 test/packetimpact/tests/BUILD                |  10 -
 test/packetimpact/tests/tcp_linger_test.go   | 253 -------------------
 test/syscalls/linux/socket_inet_loopback.cc  |   3 +
 test/syscalls/linux/socket_ip_tcp_generic.cc | 119 ---------
 test/syscalls/linux/socket_ip_udp_generic.cc |  30 ---
 12 files changed, 8 insertions(+), 570 deletions(-)
 delete mode 100644 test/packetimpact/tests/tcp_linger_test.go

diff --git a/pkg/sentry/socket/netstack/netstack.go b/pkg/sentry/socket/netstack/netstack.go
index 0bf21f7d8e..36c17d1baa 100644
--- a/pkg/sentry/socket/netstack/netstack.go
+++ b/pkg/sentry/socket/netstack/netstack.go
@@ -482,35 +482,8 @@ func (s *socketOpsCommon) fetchReadView() *syserr.Error {
 }
 
 // Release implements fs.FileOperations.Release.
-func (s *socketOpsCommon) Release(ctx context.Context) {
-	e, ch := waiter.NewChannelEntry(nil)
-	s.EventRegister(&e, waiter.EventHUp|waiter.EventErr)
-	defer s.EventUnregister(&e)
-
+func (s *socketOpsCommon) Release(context.Context) {
 	s.Endpoint.Close()
-
-	// SO_LINGER option is valid only for TCP. For other socket types
-	// return after endpoint close.
-	if family, skType, _ := s.Type(); skType != linux.SOCK_STREAM || (family != linux.AF_INET && family != linux.AF_INET6) {
-		return
-	}
-
-	var v tcpip.LingerOption
-	if err := s.Endpoint.GetSockOpt(&v); err != nil {
-		return
-	}
-
-	// The case for zero timeout is handled in tcp endpoint close function.
-	// Close is blocked until either:
-	// 1. The endpoint state is not in any of the states: FIN-WAIT1,
-	// CLOSING and LAST_ACK.
-	// 2. Timeout is reached.
-	if v.Enabled && v.Timeout != 0 {
-		t := kernel.TaskFromContext(ctx)
-		start := t.Kernel().MonotonicClock().Now()
-		deadline := start.Add(v.Timeout)
-		t.BlockWithDeadline(ch, true, deadline)
-	}
 }
 
 // Read implements fs.FileOperations.Read.
@@ -1184,16 +1157,7 @@ func getSockOptSocket(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, fam
 			return nil, syserr.ErrInvalidArgument
 		}
 
-		var v tcpip.LingerOption
-		var linger linux.Linger
-		if err := ep.GetSockOpt(&v); err != nil {
-			return &linger, nil
-		}
-
-		if v.Enabled {
-			linger.OnOff = 1
-		}
-		linger.Linger = int32(v.Timeout.Seconds())
+		linger := linux.Linger{}
 		return &linger, nil
 
 	case linux.SO_SNDTIMEO:
@@ -1922,10 +1886,7 @@ func setSockOptSocket(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, nam
 			socket.SetSockOptEmitUnimplementedEvent(t, name)
 		}
 
-		return syserr.TranslateNetstackError(
-			ep.SetSockOpt(&tcpip.LingerOption{
-				Enabled: v.OnOff != 0,
-				Timeout: time.Second * time.Duration(v.Linger)}))
+		return nil
 
 	case linux.SO_DETACH_FILTER:
 		// optval is ignored.
diff --git a/pkg/sentry/socket/unix/transport/unix.go b/pkg/sentry/socket/unix/transport/unix.go
index cc9d650fb4..1200cf9bb8 100644
--- a/pkg/sentry/socket/unix/transport/unix.go
+++ b/pkg/sentry/socket/unix/transport/unix.go
@@ -942,14 +942,8 @@ func (e *baseEndpoint) GetSockOptInt(opt tcpip.SockOptInt) (int, *tcpip.Error) {
 
 // GetSockOpt implements tcpip.Endpoint.GetSockOpt.
 func (e *baseEndpoint) GetSockOpt(opt tcpip.GettableSocketOption) *tcpip.Error {
-	switch opt.(type) {
-	case *tcpip.LingerOption:
-		return nil
-
-	default:
-		log.Warningf("Unsupported socket option: %T", opt)
-		return tcpip.ErrUnknownProtocolOption
-	}
+	log.Warningf("Unsupported socket option: %T", opt)
+	return tcpip.ErrUnknownProtocolOption
 }
 
 // LastError implements Endpoint.LastError.
diff --git a/pkg/tcpip/tcpip.go b/pkg/tcpip/tcpip.go
index 47a8d7c869..b113d86134 100644
--- a/pkg/tcpip/tcpip.go
+++ b/pkg/tcpip/tcpip.go
@@ -1074,19 +1074,6 @@ const (
 	TCPTimeWaitReuseLoopbackOnly
 )
 
-// LingerOption is used by SetSockOpt/GetSockOpt to set/get the
-// duration for which a socket lingers before returning from Close.
-//
-// +stateify savable
-type LingerOption struct {
-	Enabled bool
-	Timeout time.Duration
-}
-
-func (*LingerOption) isGettableSocketOption() {}
-
-func (*LingerOption) isSettableSocketOption() {}
-
 // IPPacketInfo is the message structure for IP_PKTINFO.
 //
 // +stateify savable
diff --git a/pkg/tcpip/transport/tcp/endpoint.go b/pkg/tcpip/transport/tcp/endpoint.go
index c5d9eba5dd..3f18efeefc 100644
--- a/pkg/tcpip/transport/tcp/endpoint.go
+++ b/pkg/tcpip/transport/tcp/endpoint.go
@@ -654,9 +654,6 @@ type endpoint struct {
 
 	// owner is used to get uid and gid of the packet.
 	owner tcpip.PacketOwner
-
-	// linger is used for SO_LINGER socket option.
-	linger tcpip.LingerOption
 }
 
 // UniqueID implements stack.TransportEndpoint.UniqueID.
@@ -1010,26 +1007,6 @@ func (e *endpoint) Close() {
 		return
 	}
 
-	if e.linger.Enabled && e.linger.Timeout == 0 {
-		s := e.EndpointState()
-		isResetState := s == StateEstablished || s == StateCloseWait || s == StateFinWait1 || s == StateFinWait2 || s == StateSynRecv
-		if isResetState {
-			// Close the endpoint without doing full shutdown and
-			// send a RST.
-			e.resetConnectionLocked(tcpip.ErrConnectionAborted)
-			e.closeNoShutdownLocked()
-
-			// Wake up worker to close the endpoint.
-			switch s {
-			case StateSynRecv:
-				e.notifyProtocolGoroutine(notifyClose)
-			default:
-				e.notifyProtocolGoroutine(notifyTickleWorker)
-			}
-			return
-		}
-	}
-
 	// Issue a shutdown so that the peer knows we won't send any more data
 	// if we're connected, or stop accepting if we're listening.
 	e.shutdownLocked(tcpip.ShutdownWrite | tcpip.ShutdownRead)
@@ -1830,11 +1807,6 @@ func (e *endpoint) SetSockOpt(opt tcpip.SettableSocketOption) *tcpip.Error {
 	case *tcpip.SocketDetachFilterOption:
 		return nil
 
-	case *tcpip.LingerOption:
-		e.LockUser()
-		e.linger = *v
-		e.UnlockUser()
-
 	default:
 		return nil
 	}
@@ -2057,11 +2029,6 @@ func (e *endpoint) GetSockOpt(opt tcpip.GettableSocketOption) *tcpip.Error {
 			Port: port,
 		}
 
-	case *tcpip.LingerOption:
-		e.LockUser()
-		*o = e.linger
-		e.UnlockUser()
-
 	default:
 		return tcpip.ErrUnknownProtocolOption
 	}
diff --git a/test/packetimpact/dut/posix_server.cc b/test/packetimpact/dut/posix_server.cc
index de5b4be93b..2476998f85 100644
--- a/test/packetimpact/dut/posix_server.cc
+++ b/test/packetimpact/dut/posix_server.cc
@@ -336,15 +336,6 @@ class PosixImpl final : public posix_server::Posix::Service {
     return ::grpc::Status::OK;
   }
 
-  ::grpc::Status Shutdown(grpc_impl::ServerContext *context,
-                          const ::posix_server::ShutdownRequest *request,
-                          ::posix_server::ShutdownResponse *response) override {
-    if (shutdown(request->fd(), request->how()) < 0) {
-      response->set_errno_(errno);
-    }
-    return ::grpc::Status::OK;
-  }
-
   ::grpc::Status Recv(::grpc::ServerContext *context,
                       const ::posix_server::RecvRequest *request,
                       ::posix_server::RecvResponse *response) override {
diff --git a/test/packetimpact/proto/posix_server.proto b/test/packetimpact/proto/posix_server.proto
index f32ed54eff..ccd20b10d4 100644
--- a/test/packetimpact/proto/posix_server.proto
+++ b/test/packetimpact/proto/posix_server.proto
@@ -188,15 +188,6 @@ message SocketResponse {
   int32 errno_ = 2;  // "errno" may fail to compile in c++.
 }
 
-message ShutdownRequest {
-  int32 fd = 1;
-  int32 how = 2;
-}
-
-message ShutdownResponse {
-  int32 errno_ = 1;  // "errno" may fail to compile in c++.
-}
-
 message RecvRequest {
   int32 sockfd = 1;
   int32 len = 2;
@@ -234,8 +225,6 @@ service Posix {
   rpc SetSockOpt(SetSockOptRequest) returns (SetSockOptResponse);
   // Call socket() on the DUT.
   rpc Socket(SocketRequest) returns (SocketResponse);
-  // Call shutdown() on the DUT.
-  rpc Shutdown(ShutdownRequest) returns (ShutdownResponse);
   // Call recv() on the DUT.
   rpc Recv(RecvRequest) returns (RecvResponse);
 }
diff --git a/test/packetimpact/testbench/dut.go b/test/packetimpact/testbench/dut.go
index 6165ab2937..73c532e75e 100644
--- a/test/packetimpact/testbench/dut.go
+++ b/test/packetimpact/testbench/dut.go
@@ -16,13 +16,11 @@ package testbench
 
 import (
 	"context"
-	"encoding/binary"
 	"flag"
 	"net"
 	"strconv"
 	"syscall"
 	"testing"
-	"time"
 
 	pb "gvisor.dev/gvisor/test/packetimpact/proto/posix_server_go_proto"
 
@@ -702,43 +700,3 @@ func (dut *DUT) RecvWithErrno(ctx context.Context, t *testing.T, sockfd, len, fl
 	}
 	return resp.GetRet(), resp.GetBuf(), syscall.Errno(resp.GetErrno_())
 }
-
-// SetSockLingerOption sets SO_LINGER socket option on the DUT.
-func (dut *DUT) SetSockLingerOption(t *testing.T, sockfd int32, timeout time.Duration, enable bool) {
-	var linger unix.Linger
-	if enable {
-		linger.Onoff = 1
-	}
-	linger.Linger = int32(timeout / time.Second)
-
-	buf := make([]byte, 8)
-	binary.LittleEndian.PutUint32(buf, uint32(linger.Onoff))
-	binary.LittleEndian.PutUint32(buf[4:], uint32(linger.Linger))
-	dut.SetSockOpt(t, sockfd, unix.SOL_SOCKET, unix.SO_LINGER, buf)
-}
-
-// Shutdown calls shutdown on the DUT and causes a fatal test failure if it doesn't
-// succeed. If more control over the timeout or error handling is needed, use
-// ShutdownWithErrno.
-func (dut *DUT) Shutdown(t *testing.T, fd, how int32) error {
-	t.Helper()
-
-	ctx, cancel := context.WithTimeout(context.Background(), RPCTimeout)
-	defer cancel()
-	return dut.ShutdownWithErrno(ctx, t, fd, how)
-}
-
-// ShutdownWithErrno calls shutdown on the DUT.
-func (dut *DUT) ShutdownWithErrno(ctx context.Context, t *testing.T, fd, how int32) error {
-	t.Helper()
-
-	req := pb.ShutdownRequest{
-		Fd:  fd,
-		How: how,
-	}
-	resp, err := dut.posixServer.Shutdown(ctx, &req)
-	if err != nil {
-		t.Fatalf("failed to call Shutdown: %s", err)
-	}
-	return syscall.Errno(resp.GetErrno_())
-}
diff --git a/test/packetimpact/tests/BUILD b/test/packetimpact/tests/BUILD
index 7a7152fa5b..74658fea03 100644
--- a/test/packetimpact/tests/BUILD
+++ b/test/packetimpact/tests/BUILD
@@ -308,13 +308,3 @@ packetimpact_go_test(
         "@org_golang_x_sys//unix:go_default_library",
     ],
 )
-
-packetimpact_go_test(
-    name = "tcp_linger",
-    srcs = ["tcp_linger_test.go"],
-    deps = [
-        "//pkg/tcpip/header",
-        "//test/packetimpact/testbench",
-        "@org_golang_x_sys//unix:go_default_library",
-    ],
-)
diff --git a/test/packetimpact/tests/tcp_linger_test.go b/test/packetimpact/tests/tcp_linger_test.go
deleted file mode 100644
index 913e49e063..0000000000
--- a/test/packetimpact/tests/tcp_linger_test.go
+++ /dev/null
@@ -1,253 +0,0 @@
-// Copyright 2020 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package tcp_linger_test
-
-import (
-	"context"
-	"flag"
-	"syscall"
-	"testing"
-	"time"
-
-	"golang.org/x/sys/unix"
-	"gvisor.dev/gvisor/pkg/tcpip/header"
-	"gvisor.dev/gvisor/test/packetimpact/testbench"
-)
-
-func init() {
-	testbench.RegisterFlags(flag.CommandLine)
-}
-
-func createSocket(t *testing.T, dut testbench.DUT) (int32, int32, testbench.TCPIPv4) {
-	listenFD, remotePort := dut.CreateListener(t, unix.SOCK_STREAM, unix.IPPROTO_TCP, 1)
-	conn := testbench.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort}, testbench.TCP{SrcPort: &remotePort})
-	conn.Connect(t)
-	acceptFD, _ := dut.Accept(t, listenFD)
-	return acceptFD, listenFD, conn
-}
-
-func closeAll(t *testing.T, dut testbench.DUT, listenFD int32, conn testbench.TCPIPv4) {
-	conn.Close(t)
-	dut.Close(t, listenFD)
-	dut.TearDown()
-}
-
-// lingerDuration is the timeout value used with SO_LINGER socket option.
-const lingerDuration = 3 * time.Second
-
-// TestTCPLingerZeroTimeout tests when SO_LINGER is set with zero timeout. DUT
-// should send RST-ACK when socket is closed.
-func TestTCPLingerZeroTimeout(t *testing.T) {
-	// Create a socket, listen, TCP connect, and accept.
-	dut := testbench.NewDUT(t)
-	acceptFD, listenFD, conn := createSocket(t, dut)
-	defer closeAll(t, dut, listenFD, conn)
-
-	dut.SetSockLingerOption(t, acceptFD, 0, true)
-	dut.Close(t, acceptFD)
-
-	// If the linger timeout is set to zero, the DUT should send a RST.
-	if _, err := conn.Expect(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagRst | header.TCPFlagAck)}, time.Second); err != nil {
-		t.Errorf("expected RST-ACK packet within a second but got none: %s", err)
-	}
-	conn.Send(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)})
-}
-
-// TestTCPLingerOff tests when SO_LINGER is not set. DUT should send FIN-ACK
-// when socket is closed.
-func TestTCPLingerOff(t *testing.T) {
-	// Create a socket, listen, TCP connect, and accept.
-	dut := testbench.NewDUT(t)
-	acceptFD, listenFD, conn := createSocket(t, dut)
-	defer closeAll(t, dut, listenFD, conn)
-
-	dut.Close(t, acceptFD)
-
-	// If SO_LINGER is not set, DUT should send a FIN-ACK.
-	if _, err := conn.Expect(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagFin | header.TCPFlagAck)}, time.Second); err != nil {
-		t.Errorf("expected FIN-ACK packet within a second but got none: %s", err)
-	}
-	conn.Send(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)})
-}
-
-// TestTCPLingerNonZeroTimeout tests when SO_LINGER is set with non-zero timeout.
-// DUT should close the socket after timeout.
-func TestTCPLingerNonZeroTimeout(t *testing.T) {
-	for _, tt := range []struct {
-		description string
-		lingerOn    bool
-	}{
-		{"WithNonZeroLinger", true},
-		{"WithoutLinger", false},
-	} {
-		t.Run(tt.description, func(t *testing.T) {
-			// Create a socket, listen, TCP connect, and accept.
-			dut := testbench.NewDUT(t)
-			acceptFD, listenFD, conn := createSocket(t, dut)
-			defer closeAll(t, dut, listenFD, conn)
-
-			dut.SetSockLingerOption(t, acceptFD, lingerDuration, tt.lingerOn)
-
-			// Increase timeout as Close will take longer time to
-			// return when SO_LINGER is set with non-zero timeout.
-			timeout := lingerDuration + 1*time.Second
-			ctx, cancel := context.WithTimeout(context.Background(), timeout)
-			defer cancel()
-			start := time.Now()
-			dut.CloseWithErrno(ctx, t, acceptFD)
-			end := time.Now()
-			diff := end.Sub(start)
-
-			if tt.lingerOn && diff < lingerDuration {
-				t.Errorf("expected close to return after %v seconds, but returned sooner", lingerDuration)
-			} else if !tt.lingerOn && diff > 1*time.Second {
-				t.Errorf("expected close to return within a second, but returned later")
-			}
-
-			if _, err := conn.Expect(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagFin | header.TCPFlagAck)}, time.Second); err != nil {
-				t.Errorf("expected FIN-ACK packet within a second but got none: %s", err)
-			}
-			conn.Send(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)})
-		})
-	}
-}
-
-// TestTCPLingerSendNonZeroTimeout tests when SO_LINGER is set with non-zero
-// timeout and send a packet. DUT should close the socket after timeout.
-func TestTCPLingerSendNonZeroTimeout(t *testing.T) {
-	for _, tt := range []struct {
-		description string
-		lingerOn    bool
-	}{
-		{"WithSendNonZeroLinger", true},
-		{"WithoutLinger", false},
-	} {
-		t.Run(tt.description, func(t *testing.T) {
-			// Create a socket, listen, TCP connect, and accept.
-			dut := testbench.NewDUT(t)
-			acceptFD, listenFD, conn := createSocket(t, dut)
-			defer closeAll(t, dut, listenFD, conn)
-
-			dut.SetSockLingerOption(t, acceptFD, lingerDuration, tt.lingerOn)
-
-			// Send data.
-			sampleData := []byte("Sample Data")
-			dut.Send(t, acceptFD, sampleData, 0)
-
-			// Increase timeout as Close will take longer time to
-			// return when SO_LINGER is set with non-zero timeout.
-			timeout := lingerDuration + 1*time.Second
-			ctx, cancel := context.WithTimeout(context.Background(), timeout)
-			defer cancel()
-			start := time.Now()
-			dut.CloseWithErrno(ctx, t, acceptFD)
-			end := time.Now()
-			diff := end.Sub(start)
-
-			if tt.lingerOn && diff < lingerDuration {
-				t.Errorf("expected close to return after %v seconds, but returned sooner", lingerDuration)
-			} else if !tt.lingerOn && diff > 1*time.Second {
-				t.Errorf("expected close to return within a second, but returned later")
-			}
-
-			samplePayload := &testbench.Payload{Bytes: sampleData}
-			if _, err := conn.ExpectData(t, &testbench.TCP{}, samplePayload, time.Second); err != nil {
-				t.Fatalf("expected a packet with payload %v: %s", samplePayload, err)
-			}
-
-			if _, err := conn.Expect(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagFin | header.TCPFlagAck)}, time.Second); err != nil {
-				t.Errorf("expected FIN-ACK packet within a second but got none: %s", err)
-			}
-			conn.Send(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)})
-		})
-	}
-}
-
-// TestTCPLingerShutdownZeroTimeout tests SO_LINGER with shutdown() and zero
-// timeout. DUT should send RST-ACK when socket is closed.
-func TestTCPLingerShutdownZeroTimeout(t *testing.T) {
-	// Create a socket, listen, TCP connect, and accept.
-	dut := testbench.NewDUT(t)
-	acceptFD, listenFD, conn := createSocket(t, dut)
-	defer closeAll(t, dut, listenFD, conn)
-
-	dut.SetSockLingerOption(t, acceptFD, 0, true)
-	dut.Shutdown(t, acceptFD, syscall.SHUT_RDWR)
-	dut.Close(t, acceptFD)
-
-	// Shutdown will send FIN-ACK with read/write option.
-	if _, err := conn.Expect(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagFin | header.TCPFlagAck)}, time.Second); err != nil {
-		t.Errorf("expected FIN-ACK packet within a second but got none: %s", err)
-	}
-
-	// If the linger timeout is set to zero, the DUT should send a RST.
-	if _, err := conn.Expect(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagRst | header.TCPFlagAck)}, time.Second); err != nil {
-		t.Errorf("expected RST-ACK packet within a second but got none: %s", err)
-	}
-	conn.Send(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)})
-}
-
-// TestTCPLingerShutdownSendNonZeroTimeout tests SO_LINGER with shutdown() and
-// non-zero timeout. DUT should close the socket after timeout.
-func TestTCPLingerShutdownSendNonZeroTimeout(t *testing.T) {
-	for _, tt := range []struct {
-		description string
-		lingerOn    bool
-	}{
-		{"shutdownRDWR", true},
-		{"shutdownRDWR", false},
-	} {
-		t.Run(tt.description, func(t *testing.T) {
-			// Create a socket, listen, TCP connect, and accept.
-			dut := testbench.NewDUT(t)
-			acceptFD, listenFD, conn := createSocket(t, dut)
-			defer closeAll(t, dut, listenFD, conn)
-
-			dut.SetSockLingerOption(t, acceptFD, lingerDuration, tt.lingerOn)
-
-			// Send data.
-			sampleData := []byte("Sample Data")
-			dut.Send(t, acceptFD, sampleData, 0)
-
-			dut.Shutdown(t, acceptFD, syscall.SHUT_RDWR)
-
-			// Increase timeout as Close will take longer time to
-			// return when SO_LINGER is set with non-zero timeout.
-			timeout := lingerDuration + 1*time.Second
-			ctx, cancel := context.WithTimeout(context.Background(), timeout)
-			defer cancel()
-			start := time.Now()
-			dut.CloseWithErrno(ctx, t, acceptFD)
-			end := time.Now()
-			diff := end.Sub(start)
-
-			if tt.lingerOn && diff < lingerDuration {
-				t.Errorf("expected close to return after %v seconds, but returned sooner", lingerDuration)
-			} else if !tt.lingerOn && diff > 1*time.Second {
-				t.Errorf("expected close to return within a second, but returned later")
-			}
-
-			samplePayload := &testbench.Payload{Bytes: sampleData}
-			if _, err := conn.ExpectData(t, &testbench.TCP{}, samplePayload, time.Second); err != nil {
-				t.Fatalf("expected a packet with payload %v: %s", samplePayload, err)
-			}
-
-			if _, err := conn.Expect(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagFin | header.TCPFlagAck)}, time.Second); err != nil {
-				t.Errorf("expected FIN-ACK packet within a second but got none: %s", err)
-			}
-			conn.Send(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)})
-		})
-	}
-}
diff --git a/test/syscalls/linux/socket_inet_loopback.cc b/test/syscalls/linux/socket_inet_loopback.cc
index 425084228f..ffcd904752 100644
--- a/test/syscalls/linux/socket_inet_loopback.cc
+++ b/test/syscalls/linux/socket_inet_loopback.cc
@@ -1116,6 +1116,9 @@ TEST_P(SocketInetLoopbackTest, TCPAcceptAfterReset) {
   TestAddress const& listener = param.listener;
   TestAddress const& connector = param.connector;
 
+  // TODO(gvisor.dev/issue/1400): Remove this after SO_LINGER is fixed.
+  SKIP_IF(IsRunningOnGvisor());
+
   // Create the listening socket.
   const FileDescriptor listen_fd = ASSERT_NO_ERRNO_AND_VALUE(
       Socket(listener.family(), SOCK_STREAM, IPPROTO_TCP));
diff --git a/test/syscalls/linux/socket_ip_tcp_generic.cc b/test/syscalls/linux/socket_ip_tcp_generic.cc
index f4b69c46c3..04356b780a 100644
--- a/test/syscalls/linux/socket_ip_tcp_generic.cc
+++ b/test/syscalls/linux/socket_ip_tcp_generic.cc
@@ -1080,124 +1080,5 @@ TEST_P(TCPSocketPairTest, TCPResetDuringClose_NoRandomSave) {
   }
 }
 
-// Test setsockopt and getsockopt for a socket with SO_LINGER option.
-TEST_P(TCPSocketPairTest, SetAndGetLingerOption) {
-  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
-
-  // Check getsockopt before SO_LINGER option is set.
-  struct linger got_linger = {-1, -1};
-  socklen_t got_len = sizeof(got_linger);
-
-  ASSERT_THAT(getsockopt(sockets->first_fd(), SOL_SOCKET, SO_LINGER,
-                         &got_linger, &got_len),
-              SyscallSucceeds());
-  ASSERT_THAT(got_len, sizeof(got_linger));
-  struct linger want_linger = {};
-  EXPECT_EQ(0, memcmp(&want_linger, &got_linger, got_len));
-
-  // Set and get SO_LINGER with negative values.
-  struct linger sl;
-  sl.l_onoff = 1;
-  sl.l_linger = -3;
-  ASSERT_THAT(
-      setsockopt(sockets->first_fd(), SOL_SOCKET, SO_LINGER, &sl, sizeof(sl)),
-      SyscallSucceeds());
-  ASSERT_THAT(getsockopt(sockets->first_fd(), SOL_SOCKET, SO_LINGER,
-                         &got_linger, &got_len),
-              SyscallSucceeds());
-  ASSERT_EQ(got_len, sizeof(got_linger));
-  EXPECT_EQ(sl.l_onoff, got_linger.l_onoff);
-  // Linux returns a different value as it uses HZ to convert the seconds to
-  // jiffies which overflows for negative values. We want to be compatible with
-  // linux for getsockopt return value.
-  if (IsRunningOnGvisor()) {
-    EXPECT_EQ(sl.l_linger, got_linger.l_linger);
-  }
-
-  // Set and get SO_LINGER option with positive values.
-  sl.l_onoff = 1;
-  sl.l_linger = 5;
-  ASSERT_THAT(
-      setsockopt(sockets->first_fd(), SOL_SOCKET, SO_LINGER, &sl, sizeof(sl)),
-      SyscallSucceeds());
-  ASSERT_THAT(getsockopt(sockets->first_fd(), SOL_SOCKET, SO_LINGER,
-                         &got_linger, &got_len),
-              SyscallSucceeds());
-  ASSERT_EQ(got_len, sizeof(got_linger));
-  EXPECT_EQ(0, memcmp(&sl, &got_linger, got_len));
-}
-
-// Test socket to disable SO_LINGER option.
-TEST_P(TCPSocketPairTest, SetOffLingerOption) {
-  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
-
-  // Set the SO_LINGER option.
-  struct linger sl;
-  sl.l_onoff = 1;
-  sl.l_linger = 5;
-  ASSERT_THAT(
-      setsockopt(sockets->first_fd(), SOL_SOCKET, SO_LINGER, &sl, sizeof(sl)),
-      SyscallSucceeds());
-
-  // Check getsockopt after SO_LINGER option is set.
-  struct linger got_linger = {-1, -1};
-  socklen_t got_len = sizeof(got_linger);
-  ASSERT_THAT(getsockopt(sockets->first_fd(), SOL_SOCKET, SO_LINGER,
-                         &got_linger, &got_len),
-              SyscallSucceeds());
-  ASSERT_EQ(got_len, sizeof(got_linger));
-  EXPECT_EQ(0, memcmp(&sl, &got_linger, got_len));
-
-  sl.l_onoff = 0;
-  sl.l_linger = 5;
-  ASSERT_THAT(
-      setsockopt(sockets->first_fd(), SOL_SOCKET, SO_LINGER, &sl, sizeof(sl)),
-      SyscallSucceeds());
-
-  // Check getsockopt after SO_LINGER option is set to zero.
-  ASSERT_THAT(getsockopt(sockets->first_fd(), SOL_SOCKET, SO_LINGER,
-                         &got_linger, &got_len),
-              SyscallSucceeds());
-  ASSERT_EQ(got_len, sizeof(got_linger));
-  EXPECT_EQ(0, memcmp(&sl, &got_linger, got_len));
-}
-
-// Test close on dup'd socket with SO_LINGER option set.
-TEST_P(TCPSocketPairTest, CloseWithLingerOption) {
-  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
-
-  // Set the SO_LINGER option.
-  struct linger sl;
-  sl.l_onoff = 1;
-  sl.l_linger = 5;
-  ASSERT_THAT(
-      setsockopt(sockets->first_fd(), SOL_SOCKET, SO_LINGER, &sl, sizeof(sl)),
-      SyscallSucceeds());
-
-  // Check getsockopt after SO_LINGER option is set.
-  struct linger got_linger = {-1, -1};
-  socklen_t got_len = sizeof(got_linger);
-  ASSERT_THAT(getsockopt(sockets->first_fd(), SOL_SOCKET, SO_LINGER,
-                         &got_linger, &got_len),
-              SyscallSucceeds());
-  ASSERT_EQ(got_len, sizeof(got_linger));
-  EXPECT_EQ(0, memcmp(&sl, &got_linger, got_len));
-
-  FileDescriptor dupFd = FileDescriptor(dup(sockets->first_fd()));
-  ASSERT_THAT(close(sockets->release_first_fd()), SyscallSucceeds());
-  char buf[10] = {};
-  // Write on dupFd should succeed as socket will not be closed until
-  // all references are removed.
-  ASSERT_THAT(RetryEINTR(write)(dupFd.get(), buf, sizeof(buf)),
-              SyscallSucceedsWithValue(sizeof(buf)));
-  ASSERT_THAT(RetryEINTR(write)(sockets->first_fd(), buf, sizeof(buf)),
-              SyscallFailsWithErrno(EBADF));
-
-  // Close the socket.
-  dupFd.reset();
-  // Write on dupFd should fail as all references for socket are removed.
-  ASSERT_THAT(RetryEINTR(write)(dupFd.get(), buf, sizeof(buf)),
-              SyscallFailsWithErrno(EBADF));
-}
 }  // namespace testing
 }  // namespace gvisor
diff --git a/test/syscalls/linux/socket_ip_udp_generic.cc b/test/syscalls/linux/socket_ip_udp_generic.cc
index 6e4ecd6806..bbe3561168 100644
--- a/test/syscalls/linux/socket_ip_udp_generic.cc
+++ b/test/syscalls/linux/socket_ip_udp_generic.cc
@@ -450,35 +450,5 @@ TEST_P(UDPSocketPairTest, TClassRecvMismatch) {
               SyscallFailsWithErrno(EOPNOTSUPP));
 }
 
-// Test the SO_LINGER option can be set/get on udp socket.
-TEST_P(UDPSocketPairTest, SoLingerFail) {
-  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
-  int level = SOL_SOCKET;
-  int type = SO_LINGER;
-
-  struct linger sl;
-  sl.l_onoff = 1;
-  sl.l_linger = 5;
-  ASSERT_THAT(setsockopt(sockets->first_fd(), level, type, &sl, sizeof(sl)),
-              SyscallSucceedsWithValue(0));
-
-  struct linger got_linger = {};
-  socklen_t length = sizeof(sl);
-  ASSERT_THAT(
-      getsockopt(sockets->first_fd(), level, type, &got_linger, &length),
-      SyscallSucceedsWithValue(0));
-
-  ASSERT_EQ(length, sizeof(got_linger));
-  // Linux returns the values which are set in the SetSockOpt for SO_LINGER.
-  // In gVisor, we do not store the linger values for UDP as SO_LINGER for UDP
-  // is a no-op.
-  if (IsRunningOnGvisor()) {
-    struct linger want_linger = {};
-    EXPECT_EQ(0, memcmp(&want_linger, &got_linger, length));
-  } else {
-    EXPECT_EQ(0, memcmp(&sl, &got_linger, length));
-  }
-}
-
 }  // namespace testing
 }  // namespace gvisor

From 76ec9623728fb0d62d071327798aa37145f0daab Mon Sep 17 00:00:00 2001
From: Fabricio Voznika <fvoznika@google.com>
Date: Tue, 1 Sep 2020 11:10:15 -0700
Subject: [PATCH 126/211] Let flags be overriden from OCI annotations

This allows runsc flags to be set per sandbox instance. For
example, K8s pod annotations can be used to enable
--debug for a single pod, making troubleshoot much easier.
Similarly, features like --vfs2 can be enabled for
experimentation without affecting other pods in the node.

Closes #3494

PiperOrigin-RevId: 329542815
---
 runsc/cmd/boot.go            |  2 +-
 runsc/cmd/checkpoint.go      |  2 +-
 runsc/cmd/create.go          |  2 +-
 runsc/cmd/gofer.go           |  6 +--
 runsc/cmd/restore.go         |  2 +-
 runsc/cmd/run.go             |  2 +-
 runsc/config/config.go       |  2 +
 runsc/config/config_test.go  | 87 ++++++++++++++++++++++++++++++++++++
 runsc/config/flags.go        | 36 +++++++++++++++
 runsc/specutils/BUILD        |  1 +
 runsc/specutils/specutils.go | 21 +++++++--
 11 files changed, 152 insertions(+), 11 deletions(-)

diff --git a/runsc/cmd/boot.go b/runsc/cmd/boot.go
index 357f46517b..cd419e1aa5 100644
--- a/runsc/cmd/boot.go
+++ b/runsc/cmd/boot.go
@@ -168,7 +168,7 @@ func (b *Boot) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
 	// Get the spec from the specFD.
 	specFile := os.NewFile(uintptr(b.specFD), "spec file")
 	defer specFile.Close()
-	spec, err := specutils.ReadSpecFromFile(b.bundleDir, specFile)
+	spec, err := specutils.ReadSpecFromFile(b.bundleDir, specFile, conf)
 	if err != nil {
 		Fatalf("reading spec: %v", err)
 	}
diff --git a/runsc/cmd/checkpoint.go b/runsc/cmd/checkpoint.go
index db46d509f7..8fe0c427ab 100644
--- a/runsc/cmd/checkpoint.go
+++ b/runsc/cmd/checkpoint.go
@@ -118,7 +118,7 @@ func (c *Checkpoint) Execute(_ context.Context, f *flag.FlagSet, args ...interfa
 		Fatalf("setting bundleDir")
 	}
 
-	spec, err := specutils.ReadSpec(bundleDir)
+	spec, err := specutils.ReadSpec(bundleDir, conf)
 	if err != nil {
 		Fatalf("reading spec: %v", err)
 	}
diff --git a/runsc/cmd/create.go b/runsc/cmd/create.go
index 4d9085244b..e76f7ba1d5 100644
--- a/runsc/cmd/create.go
+++ b/runsc/cmd/create.go
@@ -91,7 +91,7 @@ func (c *Create) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}
 	if bundleDir == "" {
 		bundleDir = getwdOrDie()
 	}
-	spec, err := specutils.ReadSpec(bundleDir)
+	spec, err := specutils.ReadSpec(bundleDir, conf)
 	if err != nil {
 		return Errorf("reading spec: %v", err)
 	}
diff --git a/runsc/cmd/gofer.go b/runsc/cmd/gofer.go
index 7da02c3af5..bba00d5511 100644
--- a/runsc/cmd/gofer.go
+++ b/runsc/cmd/gofer.go
@@ -100,15 +100,15 @@ func (g *Gofer) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
 		return subcommands.ExitUsageError
 	}
 
+	conf := args[0].(*config.Config)
+
 	specFile := os.NewFile(uintptr(g.specFD), "spec file")
 	defer specFile.Close()
-	spec, err := specutils.ReadSpecFromFile(g.bundleDir, specFile)
+	spec, err := specutils.ReadSpecFromFile(g.bundleDir, specFile, conf)
 	if err != nil {
 		Fatalf("reading spec: %v", err)
 	}
 
-	conf := args[0].(*config.Config)
-
 	if g.setUpRoot {
 		if err := setupRootFS(spec, conf); err != nil {
 			Fatalf("Error setting up root FS: %v", err)
diff --git a/runsc/cmd/restore.go b/runsc/cmd/restore.go
index b169758042..096ec814cc 100644
--- a/runsc/cmd/restore.go
+++ b/runsc/cmd/restore.go
@@ -88,7 +88,7 @@ func (r *Restore) Execute(_ context.Context, f *flag.FlagSet, args ...interface{
 	if bundleDir == "" {
 		bundleDir = getwdOrDie()
 	}
-	spec, err := specutils.ReadSpec(bundleDir)
+	spec, err := specutils.ReadSpec(bundleDir, conf)
 	if err != nil {
 		return Errorf("reading spec: %v", err)
 	}
diff --git a/runsc/cmd/run.go b/runsc/cmd/run.go
index 1161de67a1..c48cbe4cde 100644
--- a/runsc/cmd/run.go
+++ b/runsc/cmd/run.go
@@ -75,7 +75,7 @@ func (r *Run) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) s
 	if bundleDir == "" {
 		bundleDir = getwdOrDie()
 	}
-	spec, err := specutils.ReadSpec(bundleDir)
+	spec, err := specutils.ReadSpec(bundleDir, conf)
 	if err != nil {
 		return Errorf("reading spec: %v", err)
 	}
diff --git a/runsc/config/config.go b/runsc/config/config.go
index bca27ebf1a..df134bb2f6 100644
--- a/runsc/config/config.go
+++ b/runsc/config/config.go
@@ -157,6 +157,8 @@ type Config struct {
 	// Enables FUSE usage.
 	FUSE bool `flag:"fuse"`
 
+	AllowFlagOverride bool `flag:"allow-flag-override"`
+
 	// TestOnlyAllowRunAsCurrentUserWithoutChroot should only be used in
 	// tests. It allows runsc to start the sandbox process as the current
 	// user, and without chrooting the sandbox process. This can be
diff --git a/runsc/config/config_test.go b/runsc/config/config_test.go
index af7867a2aa..fb162b7eb0 100644
--- a/runsc/config/config_test.go
+++ b/runsc/config/config_test.go
@@ -183,3 +183,90 @@ func TestValidationFail(t *testing.T) {
 		})
 	}
 }
+
+func TestOverride(t *testing.T) {
+	c, err := NewFromFlags()
+	if err != nil {
+		t.Fatal(err)
+	}
+	c.AllowFlagOverride = true
+
+	t.Run("string", func(t *testing.T) {
+		c.RootDir = "foobar"
+		if err := c.Override("root", "bar"); err != nil {
+			t.Fatalf("Override(root, bar) failed: %v", err)
+		}
+		defer setDefault("root")
+		if c.RootDir != "bar" {
+			t.Errorf("Override(root, bar) didn't work: %+v", c)
+		}
+	})
+
+	t.Run("bool", func(t *testing.T) {
+		c.Debug = true
+		if err := c.Override("debug", "false"); err != nil {
+			t.Fatalf("Override(debug, false) failed: %v", err)
+		}
+		defer setDefault("debug")
+		if c.Debug {
+			t.Errorf("Override(debug, false) didn't work: %+v", c)
+		}
+	})
+
+	t.Run("enum", func(t *testing.T) {
+		c.FileAccess = FileAccessShared
+		if err := c.Override("file-access", "exclusive"); err != nil {
+			t.Fatalf("Override(file-access, exclusive) failed: %v", err)
+		}
+		defer setDefault("file-access")
+		if c.FileAccess != FileAccessExclusive {
+			t.Errorf("Override(file-access, exclusive) didn't work: %+v", c)
+		}
+	})
+}
+
+func TestOverrideDisabled(t *testing.T) {
+	c, err := NewFromFlags()
+	if err != nil {
+		t.Fatal(err)
+	}
+	const errMsg = "flag override disabled"
+	if err := c.Override("root", "path"); err == nil || !strings.Contains(err.Error(), errMsg) {
+		t.Errorf("Override() wrong error: %v", err)
+	}
+}
+
+func TestOverrideError(t *testing.T) {
+	c, err := NewFromFlags()
+	if err != nil {
+		t.Fatal(err)
+	}
+	c.AllowFlagOverride = true
+	for _, tc := range []struct {
+		name  string
+		value string
+		error string
+	}{
+		{
+			name:  "invalid",
+			value: "valid",
+			error: `flag "invalid" not found`,
+		},
+		{
+			name:  "debug",
+			value: "invalid",
+			error: "error setting flag debug",
+		},
+		{
+			name:  "file-access",
+			value: "invalid",
+			error: "invalid file access type",
+		},
+	} {
+		t.Run(tc.name, func(t *testing.T) {
+			if err := c.Override(tc.name, tc.value); err == nil || !strings.Contains(err.Error(), tc.error) {
+				t.Errorf("Override(%q, %q) wrong error: %v", tc.name, tc.value, err)
+			}
+		})
+	}
+}
diff --git a/runsc/config/flags.go b/runsc/config/flags.go
index 488a4b9fb5..eff46e9382 100644
--- a/runsc/config/flags.go
+++ b/runsc/config/flags.go
@@ -48,6 +48,7 @@ func RegisterFlags() {
 		flag.Bool("log-packets", false, "enable network packet logging.")
 		flag.String("debug-log-format", "text", "log format: text (default), json, or json-k8s.")
 		flag.Bool("alsologtostderr", false, "send log messages to stderr.")
+		flag.Bool("allow-flag-override", false, "allow OCI annotations (dev.gvisor.flag.<name>) to override flags for debugging.")
 
 		// Debugging flags: strace related
 		flag.Bool("strace", false, "enable strace.")
@@ -149,6 +150,41 @@ func (c *Config) ToFlags() []string {
 	return rv
 }
 
+// Override writes a new value to a flag.
+func (c *Config) Override(name string, value string) error {
+	if !c.AllowFlagOverride {
+		return fmt.Errorf("flag override disabled, use --allow-flag-override to enable it")
+	}
+
+	obj := reflect.ValueOf(c).Elem()
+	st := obj.Type()
+	for i := 0; i < st.NumField(); i++ {
+		f := st.Field(i)
+		fieldName, ok := f.Tag.Lookup("flag")
+		if !ok || fieldName != name {
+			// Not a flag field, or flag name doesn't match.
+			continue
+		}
+		fl := flag.CommandLine.Lookup(name)
+		if fl == nil {
+			// Flag must exist if there is a field match above.
+			panic(fmt.Sprintf("Flag %q not found", name))
+		}
+
+		// Use flag to convert the string value to the underlying flag type, using
+		// the same rules as the command-line for consistency.
+		if err := fl.Value.Set(value); err != nil {
+			return fmt.Errorf("error setting flag %s=%q: %w", name, value, err)
+		}
+		x := reflect.ValueOf(flag.Get(fl.Value))
+		obj.Field(i).Set(x)
+
+		// Validates the config again to ensure it's left in a consistent state.
+		return c.validate()
+	}
+	return fmt.Errorf("flag %q not found. Cannot set it to %q", name, value)
+}
+
 func getVal(field reflect.Value) string {
 	if str, ok := field.Addr().Interface().(fmt.Stringer); ok {
 		return str.String()
diff --git a/runsc/specutils/BUILD b/runsc/specutils/BUILD
index 43851a22f1..679d8bc8e5 100644
--- a/runsc/specutils/BUILD
+++ b/runsc/specutils/BUILD
@@ -16,6 +16,7 @@ go_library(
         "//pkg/bits",
         "//pkg/log",
         "//pkg/sentry/kernel/auth",
+        "//runsc/config",
         "@com_github_cenkalti_backoff//:go_default_library",
         "@com_github_mohae_deepcopy//:go_default_library",
         "@com_github_opencontainers_runtime_spec//specs-go:go_default_library",
diff --git a/runsc/specutils/specutils.go b/runsc/specutils/specutils.go
index 5015c3a842..a2275398aa 100644
--- a/runsc/specutils/specutils.go
+++ b/runsc/specutils/specutils.go
@@ -35,6 +35,7 @@ import (
 	"gvisor.dev/gvisor/pkg/bits"
 	"gvisor.dev/gvisor/pkg/log"
 	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+	"gvisor.dev/gvisor/runsc/config"
 )
 
 // ExePath must point to runsc binary, which is normally the same binary. It's
@@ -161,18 +162,18 @@ func OpenSpec(bundleDir string) (*os.File, error) {
 // ReadSpec reads an OCI runtime spec from the given bundle directory.
 // ReadSpec also normalizes all potential relative paths into absolute
 // path, e.g. spec.Root.Path, mount.Source.
-func ReadSpec(bundleDir string) (*specs.Spec, error) {
+func ReadSpec(bundleDir string, conf *config.Config) (*specs.Spec, error) {
 	specFile, err := OpenSpec(bundleDir)
 	if err != nil {
 		return nil, fmt.Errorf("error opening spec file %q: %v", filepath.Join(bundleDir, "config.json"), err)
 	}
 	defer specFile.Close()
-	return ReadSpecFromFile(bundleDir, specFile)
+	return ReadSpecFromFile(bundleDir, specFile, conf)
 }
 
 // ReadSpecFromFile reads an OCI runtime spec from the given File, and
 // normalizes all relative paths into absolute by prepending the bundle dir.
-func ReadSpecFromFile(bundleDir string, specFile *os.File) (*specs.Spec, error) {
+func ReadSpecFromFile(bundleDir string, specFile *os.File, conf *config.Config) (*specs.Spec, error) {
 	if _, err := specFile.Seek(0, os.SEEK_SET); err != nil {
 		return nil, fmt.Errorf("error seeking to beginning of file %q: %v", specFile.Name(), err)
 	}
@@ -195,6 +196,20 @@ func ReadSpecFromFile(bundleDir string, specFile *os.File) (*specs.Spec, error)
 			m.Source = absPath(bundleDir, m.Source)
 		}
 	}
+
+	// Override flags using annotation to allow customization per sandbox
+	// instance.
+	for annotation, val := range spec.Annotations {
+		const flagPrefix = "dev.gvisor.flag."
+		if strings.HasPrefix(annotation, flagPrefix) {
+			name := annotation[len(flagPrefix):]
+			log.Infof("Overriding flag: %s=%q", name, val)
+			if err := conf.Override(name, val); err != nil {
+				return nil, err
+			}
+		}
+	}
+
 	return &spec, nil
 }
 

From 114d1268b26e71c99fabb9d5b13d7c95c7c5c34b Mon Sep 17 00:00:00 2001
From: Ayush Ranjan <ayushranjan@google.com>
Date: Tue, 1 Sep 2020 12:59:49 -0700
Subject: [PATCH 127/211] [go-marshal] Enable auto-marshalling for  fs/tty.

PiperOrigin-RevId: 329564614
---
 pkg/abi/linux/tty.go                        |  1 +
 pkg/sentry/fs/tty/BUILD                     |  1 +
 pkg/sentry/fs/tty/line_discipline.go        | 33 ++++++++-------------
 pkg/sentry/fs/tty/master.go                 | 33 +++++++++++++--------
 pkg/sentry/fs/tty/queue.go                  | 12 ++++----
 pkg/sentry/fs/tty/slave.go                  | 33 +++++++++++++--------
 pkg/sentry/fs/tty/terminal.go               | 21 ++++++-------
 pkg/sentry/fsimpl/devpts/BUILD              |  2 ++
 pkg/sentry/fsimpl/devpts/line_discipline.go | 33 ++++++++-------------
 pkg/sentry/fsimpl/devpts/master.go          | 33 +++++++++++++--------
 pkg/sentry/fsimpl/devpts/queue.go           | 12 ++++----
 pkg/sentry/fsimpl/devpts/slave.go           | 33 +++++++++++++--------
 pkg/sentry/fsimpl/devpts/terminal.go        | 21 ++++++-------
 13 files changed, 140 insertions(+), 128 deletions(-)

diff --git a/pkg/abi/linux/tty.go b/pkg/abi/linux/tty.go
index e640969a65..5a5ff0aa22 100644
--- a/pkg/abi/linux/tty.go
+++ b/pkg/abi/linux/tty.go
@@ -341,6 +341,7 @@ var DefaultSlaveTermios = KernelTermios{
 // include/uapi/asm-generic/termios.h.
 //
 // +stateify savable
+// +marshal
 type WindowSize struct {
 	Rows uint16
 	Cols uint16
diff --git a/pkg/sentry/fs/tty/BUILD b/pkg/sentry/fs/tty/BUILD
index 5cb0e0417b..b3f5a82449 100644
--- a/pkg/sentry/fs/tty/BUILD
+++ b/pkg/sentry/fs/tty/BUILD
@@ -31,6 +31,7 @@ go_library(
         "//pkg/syserror",
         "//pkg/usermem",
         "//pkg/waiter",
+        "//tools/go_marshal/primitive",
     ],
 )
 
diff --git a/pkg/sentry/fs/tty/line_discipline.go b/pkg/sentry/fs/tty/line_discipline.go
index 2e9dd2d55e..b6bc011a95 100644
--- a/pkg/sentry/fs/tty/line_discipline.go
+++ b/pkg/sentry/fs/tty/line_discipline.go
@@ -21,6 +21,7 @@ import (
 	"gvisor.dev/gvisor/pkg/abi/linux"
 	"gvisor.dev/gvisor/pkg/context"
 	"gvisor.dev/gvisor/pkg/sentry/arch"
+	"gvisor.dev/gvisor/pkg/sentry/kernel"
 	"gvisor.dev/gvisor/pkg/sync"
 	"gvisor.dev/gvisor/pkg/syserror"
 	"gvisor.dev/gvisor/pkg/usermem"
@@ -115,27 +116,23 @@ func newLineDiscipline(termios linux.KernelTermios) *lineDiscipline {
 }
 
 // getTermios gets the linux.Termios for the tty.
-func (l *lineDiscipline) getTermios(ctx context.Context, io usermem.IO, args arch.SyscallArguments) (uintptr, error) {
+func (l *lineDiscipline) getTermios(task *kernel.Task, args arch.SyscallArguments) (uintptr, error) {
 	l.termiosMu.RLock()
 	defer l.termiosMu.RUnlock()
 	// We must copy a Termios struct, not KernelTermios.
 	t := l.termios.ToTermios()
-	_, err := usermem.CopyObjectOut(ctx, io, args[2].Pointer(), t, usermem.IOOpts{
-		AddressSpaceActive: true,
-	})
+	_, err := t.CopyOut(task, args[2].Pointer())
 	return 0, err
 }
 
 // setTermios sets a linux.Termios for the tty.
-func (l *lineDiscipline) setTermios(ctx context.Context, io usermem.IO, args arch.SyscallArguments) (uintptr, error) {
+func (l *lineDiscipline) setTermios(task *kernel.Task, args arch.SyscallArguments) (uintptr, error) {
 	l.termiosMu.Lock()
 	defer l.termiosMu.Unlock()
 	oldCanonEnabled := l.termios.LEnabled(linux.ICANON)
 	// We must copy a Termios struct, not KernelTermios.
 	var t linux.Termios
-	_, err := usermem.CopyObjectIn(ctx, io, args[2].Pointer(), &t, usermem.IOOpts{
-		AddressSpaceActive: true,
-	})
+	_, err := t.CopyIn(task, args[2].Pointer())
 	l.termios.FromTermios(t)
 
 	// If canonical mode is turned off, move bytes from inQueue's wait
@@ -152,21 +149,17 @@ func (l *lineDiscipline) setTermios(ctx context.Context, io usermem.IO, args arc
 	return 0, err
 }
 
-func (l *lineDiscipline) windowSize(ctx context.Context, io usermem.IO, args arch.SyscallArguments) error {
+func (l *lineDiscipline) windowSize(t *kernel.Task, args arch.SyscallArguments) error {
 	l.sizeMu.Lock()
 	defer l.sizeMu.Unlock()
-	_, err := usermem.CopyObjectOut(ctx, io, args[2].Pointer(), l.size, usermem.IOOpts{
-		AddressSpaceActive: true,
-	})
+	_, err := l.size.CopyOut(t, args[2].Pointer())
 	return err
 }
 
-func (l *lineDiscipline) setWindowSize(ctx context.Context, io usermem.IO, args arch.SyscallArguments) error {
+func (l *lineDiscipline) setWindowSize(t *kernel.Task, args arch.SyscallArguments) error {
 	l.sizeMu.Lock()
 	defer l.sizeMu.Unlock()
-	_, err := usermem.CopyObjectIn(ctx, io, args[2].Pointer(), &l.size, usermem.IOOpts{
-		AddressSpaceActive: true,
-	})
+	_, err := l.size.CopyIn(t, args[2].Pointer())
 	return err
 }
 
@@ -182,8 +175,8 @@ func (l *lineDiscipline) slaveReadiness() waiter.EventMask {
 	return l.outQueue.writeReadiness(&l.termios) | l.inQueue.readReadiness(&l.termios)
 }
 
-func (l *lineDiscipline) inputQueueReadSize(ctx context.Context, io usermem.IO, args arch.SyscallArguments) error {
-	return l.inQueue.readableSize(ctx, io, args)
+func (l *lineDiscipline) inputQueueReadSize(t *kernel.Task, args arch.SyscallArguments) error {
+	return l.inQueue.readableSize(t, args)
 }
 
 func (l *lineDiscipline) inputQueueRead(ctx context.Context, dst usermem.IOSequence) (int64, error) {
@@ -217,8 +210,8 @@ func (l *lineDiscipline) inputQueueWrite(ctx context.Context, src usermem.IOSequ
 	return 0, syserror.ErrWouldBlock
 }
 
-func (l *lineDiscipline) outputQueueReadSize(ctx context.Context, io usermem.IO, args arch.SyscallArguments) error {
-	return l.outQueue.readableSize(ctx, io, args)
+func (l *lineDiscipline) outputQueueReadSize(t *kernel.Task, args arch.SyscallArguments) error {
+	return l.outQueue.readableSize(t, args)
 }
 
 func (l *lineDiscipline) outputQueueRead(ctx context.Context, dst usermem.IOSequence) (int64, error) {
diff --git a/pkg/sentry/fs/tty/master.go b/pkg/sentry/fs/tty/master.go
index e007460173..1754572976 100644
--- a/pkg/sentry/fs/tty/master.go
+++ b/pkg/sentry/fs/tty/master.go
@@ -20,10 +20,12 @@ import (
 	"gvisor.dev/gvisor/pkg/sentry/arch"
 	"gvisor.dev/gvisor/pkg/sentry/fs"
 	"gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
+	"gvisor.dev/gvisor/pkg/sentry/kernel"
 	"gvisor.dev/gvisor/pkg/sentry/unimpl"
 	"gvisor.dev/gvisor/pkg/syserror"
 	"gvisor.dev/gvisor/pkg/usermem"
 	"gvisor.dev/gvisor/pkg/waiter"
+	"gvisor.dev/gvisor/tools/go_marshal/primitive"
 )
 
 // LINT.IfChange
@@ -152,46 +154,51 @@ func (mf *masterFileOperations) Write(ctx context.Context, _ *fs.File, src userm
 
 // Ioctl implements fs.FileOperations.Ioctl.
 func (mf *masterFileOperations) Ioctl(ctx context.Context, _ *fs.File, io usermem.IO, args arch.SyscallArguments) (uintptr, error) {
+	t := kernel.TaskFromContext(ctx)
+	if t == nil {
+		// ioctl(2) may only be called from a task goroutine.
+		return 0, syserror.ENOTTY
+	}
+
 	switch cmd := args[1].Uint(); cmd {
 	case linux.FIONREAD: // linux.FIONREAD == linux.TIOCINQ
 		// Get the number of bytes in the output queue read buffer.
-		return 0, mf.t.ld.outputQueueReadSize(ctx, io, args)
+		return 0, mf.t.ld.outputQueueReadSize(t, args)
 	case linux.TCGETS:
 		// N.B. TCGETS on the master actually returns the configuration
 		// of the slave end.
-		return mf.t.ld.getTermios(ctx, io, args)
+		return mf.t.ld.getTermios(t, args)
 	case linux.TCSETS:
 		// N.B. TCSETS on the master actually affects the configuration
 		// of the slave end.
-		return mf.t.ld.setTermios(ctx, io, args)
+		return mf.t.ld.setTermios(t, args)
 	case linux.TCSETSW:
 		// TODO(b/29356795): This should drain the output queue first.
-		return mf.t.ld.setTermios(ctx, io, args)
+		return mf.t.ld.setTermios(t, args)
 	case linux.TIOCGPTN:
-		_, err := usermem.CopyObjectOut(ctx, io, args[2].Pointer(), uint32(mf.t.n), usermem.IOOpts{
-			AddressSpaceActive: true,
-		})
+		nP := primitive.Uint32(mf.t.n)
+		_, err := nP.CopyOut(t, args[2].Pointer())
 		return 0, err
 	case linux.TIOCSPTLCK:
 		// TODO(b/29356795): Implement pty locking. For now just pretend we do.
 		return 0, nil
 	case linux.TIOCGWINSZ:
-		return 0, mf.t.ld.windowSize(ctx, io, args)
+		return 0, mf.t.ld.windowSize(t, args)
 	case linux.TIOCSWINSZ:
-		return 0, mf.t.ld.setWindowSize(ctx, io, args)
+		return 0, mf.t.ld.setWindowSize(t, args)
 	case linux.TIOCSCTTY:
 		// Make the given terminal the controlling terminal of the
 		// calling process.
-		return 0, mf.t.setControllingTTY(ctx, io, args, true /* isMaster */)
+		return 0, mf.t.setControllingTTY(ctx, args, true /* isMaster */)
 	case linux.TIOCNOTTY:
 		// Release this process's controlling terminal.
-		return 0, mf.t.releaseControllingTTY(ctx, io, args, true /* isMaster */)
+		return 0, mf.t.releaseControllingTTY(ctx, args, true /* isMaster */)
 	case linux.TIOCGPGRP:
 		// Get the foreground process group.
-		return mf.t.foregroundProcessGroup(ctx, io, args, true /* isMaster */)
+		return mf.t.foregroundProcessGroup(ctx, args, true /* isMaster */)
 	case linux.TIOCSPGRP:
 		// Set the foreground process group.
-		return mf.t.setForegroundProcessGroup(ctx, io, args, true /* isMaster */)
+		return mf.t.setForegroundProcessGroup(ctx, args, true /* isMaster */)
 	default:
 		maybeEmitUnimplementedEvent(ctx, cmd)
 		return 0, syserror.ENOTTY
diff --git a/pkg/sentry/fs/tty/queue.go b/pkg/sentry/fs/tty/queue.go
index c5d7ec717c..f0a18c75a6 100644
--- a/pkg/sentry/fs/tty/queue.go
+++ b/pkg/sentry/fs/tty/queue.go
@@ -19,10 +19,12 @@ import (
 	"gvisor.dev/gvisor/pkg/context"
 	"gvisor.dev/gvisor/pkg/safemem"
 	"gvisor.dev/gvisor/pkg/sentry/arch"
+	"gvisor.dev/gvisor/pkg/sentry/kernel"
 	"gvisor.dev/gvisor/pkg/sync"
 	"gvisor.dev/gvisor/pkg/syserror"
 	"gvisor.dev/gvisor/pkg/usermem"
 	"gvisor.dev/gvisor/pkg/waiter"
+	"gvisor.dev/gvisor/tools/go_marshal/primitive"
 )
 
 // LINT.IfChange
@@ -85,17 +87,15 @@ func (q *queue) writeReadiness(t *linux.KernelTermios) waiter.EventMask {
 }
 
 // readableSize writes the number of readable bytes to userspace.
-func (q *queue) readableSize(ctx context.Context, io usermem.IO, args arch.SyscallArguments) error {
+func (q *queue) readableSize(t *kernel.Task, args arch.SyscallArguments) error {
 	q.mu.Lock()
 	defer q.mu.Unlock()
-	var size int32
+	size := primitive.Int32(0)
 	if q.readable {
-		size = int32(len(q.readBuf))
+		size = primitive.Int32(len(q.readBuf))
 	}
 
-	_, err := usermem.CopyObjectOut(ctx, io, args[2].Pointer(), size, usermem.IOOpts{
-		AddressSpaceActive: true,
-	})
+	_, err := size.CopyOut(t, args[2].Pointer())
 	return err
 
 }
diff --git a/pkg/sentry/fs/tty/slave.go b/pkg/sentry/fs/tty/slave.go
index 7c72926873..933d2c3ff8 100644
--- a/pkg/sentry/fs/tty/slave.go
+++ b/pkg/sentry/fs/tty/slave.go
@@ -20,9 +20,11 @@ import (
 	"gvisor.dev/gvisor/pkg/sentry/arch"
 	"gvisor.dev/gvisor/pkg/sentry/fs"
 	"gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
+	"gvisor.dev/gvisor/pkg/sentry/kernel"
 	"gvisor.dev/gvisor/pkg/syserror"
 	"gvisor.dev/gvisor/pkg/usermem"
 	"gvisor.dev/gvisor/pkg/waiter"
+	"gvisor.dev/gvisor/tools/go_marshal/primitive"
 )
 
 // LINT.IfChange
@@ -136,39 +138,44 @@ func (sf *slaveFileOperations) Write(ctx context.Context, _ *fs.File, src userme
 
 // Ioctl implements fs.FileOperations.Ioctl.
 func (sf *slaveFileOperations) Ioctl(ctx context.Context, _ *fs.File, io usermem.IO, args arch.SyscallArguments) (uintptr, error) {
+	t := kernel.TaskFromContext(ctx)
+	if t == nil {
+		// ioctl(2) may only be called from a task goroutine.
+		return 0, syserror.ENOTTY
+	}
+
 	switch cmd := args[1].Uint(); cmd {
 	case linux.FIONREAD: // linux.FIONREAD == linux.TIOCINQ
 		// Get the number of bytes in the input queue read buffer.
-		return 0, sf.si.t.ld.inputQueueReadSize(ctx, io, args)
+		return 0, sf.si.t.ld.inputQueueReadSize(t, args)
 	case linux.TCGETS:
-		return sf.si.t.ld.getTermios(ctx, io, args)
+		return sf.si.t.ld.getTermios(t, args)
 	case linux.TCSETS:
-		return sf.si.t.ld.setTermios(ctx, io, args)
+		return sf.si.t.ld.setTermios(t, args)
 	case linux.TCSETSW:
 		// TODO(b/29356795): This should drain the output queue first.
-		return sf.si.t.ld.setTermios(ctx, io, args)
+		return sf.si.t.ld.setTermios(t, args)
 	case linux.TIOCGPTN:
-		_, err := usermem.CopyObjectOut(ctx, io, args[2].Pointer(), uint32(sf.si.t.n), usermem.IOOpts{
-			AddressSpaceActive: true,
-		})
+		nP := primitive.Uint32(sf.si.t.n)
+		_, err := nP.CopyOut(t, args[2].Pointer())
 		return 0, err
 	case linux.TIOCGWINSZ:
-		return 0, sf.si.t.ld.windowSize(ctx, io, args)
+		return 0, sf.si.t.ld.windowSize(t, args)
 	case linux.TIOCSWINSZ:
-		return 0, sf.si.t.ld.setWindowSize(ctx, io, args)
+		return 0, sf.si.t.ld.setWindowSize(t, args)
 	case linux.TIOCSCTTY:
 		// Make the given terminal the controlling terminal of the
 		// calling process.
-		return 0, sf.si.t.setControllingTTY(ctx, io, args, false /* isMaster */)
+		return 0, sf.si.t.setControllingTTY(ctx, args, false /* isMaster */)
 	case linux.TIOCNOTTY:
 		// Release this process's controlling terminal.
-		return 0, sf.si.t.releaseControllingTTY(ctx, io, args, false /* isMaster */)
+		return 0, sf.si.t.releaseControllingTTY(ctx, args, false /* isMaster */)
 	case linux.TIOCGPGRP:
 		// Get the foreground process group.
-		return sf.si.t.foregroundProcessGroup(ctx, io, args, false /* isMaster */)
+		return sf.si.t.foregroundProcessGroup(ctx, args, false /* isMaster */)
 	case linux.TIOCSPGRP:
 		// Set the foreground process group.
-		return sf.si.t.setForegroundProcessGroup(ctx, io, args, false /* isMaster */)
+		return sf.si.t.setForegroundProcessGroup(ctx, args, false /* isMaster */)
 	default:
 		maybeEmitUnimplementedEvent(ctx, cmd)
 		return 0, syserror.ENOTTY
diff --git a/pkg/sentry/fs/tty/terminal.go b/pkg/sentry/fs/tty/terminal.go
index ddcccf4daa..56b59632d3 100644
--- a/pkg/sentry/fs/tty/terminal.go
+++ b/pkg/sentry/fs/tty/terminal.go
@@ -20,7 +20,7 @@ import (
 	"gvisor.dev/gvisor/pkg/refs"
 	"gvisor.dev/gvisor/pkg/sentry/arch"
 	"gvisor.dev/gvisor/pkg/sentry/kernel"
-	"gvisor.dev/gvisor/pkg/usermem"
+	"gvisor.dev/gvisor/tools/go_marshal/primitive"
 )
 
 // LINT.IfChange
@@ -64,7 +64,7 @@ func newTerminal(ctx context.Context, d *dirInodeOperations, n uint32) *Terminal
 
 // setControllingTTY makes tm the controlling terminal of the calling thread
 // group.
-func (tm *Terminal) setControllingTTY(ctx context.Context, io usermem.IO, args arch.SyscallArguments, isMaster bool) error {
+func (tm *Terminal) setControllingTTY(ctx context.Context, args arch.SyscallArguments, isMaster bool) error {
 	task := kernel.TaskFromContext(ctx)
 	if task == nil {
 		panic("setControllingTTY must be called from a task context")
@@ -75,7 +75,7 @@ func (tm *Terminal) setControllingTTY(ctx context.Context, io usermem.IO, args a
 
 // releaseControllingTTY removes tm as the controlling terminal of the calling
 // thread group.
-func (tm *Terminal) releaseControllingTTY(ctx context.Context, io usermem.IO, args arch.SyscallArguments, isMaster bool) error {
+func (tm *Terminal) releaseControllingTTY(ctx context.Context, args arch.SyscallArguments, isMaster bool) error {
 	task := kernel.TaskFromContext(ctx)
 	if task == nil {
 		panic("releaseControllingTTY must be called from a task context")
@@ -85,7 +85,7 @@ func (tm *Terminal) releaseControllingTTY(ctx context.Context, io usermem.IO, ar
 }
 
 // foregroundProcessGroup gets the process group ID of tm's foreground process.
-func (tm *Terminal) foregroundProcessGroup(ctx context.Context, io usermem.IO, args arch.SyscallArguments, isMaster bool) (uintptr, error) {
+func (tm *Terminal) foregroundProcessGroup(ctx context.Context, args arch.SyscallArguments, isMaster bool) (uintptr, error) {
 	task := kernel.TaskFromContext(ctx)
 	if task == nil {
 		panic("foregroundProcessGroup must be called from a task context")
@@ -97,24 +97,21 @@ func (tm *Terminal) foregroundProcessGroup(ctx context.Context, io usermem.IO, a
 	}
 
 	// Write it out to *arg.
-	_, err = usermem.CopyObjectOut(ctx, io, args[2].Pointer(), int32(ret), usermem.IOOpts{
-		AddressSpaceActive: true,
-	})
+	retP := primitive.Int32(ret)
+	_, err = retP.CopyOut(task, args[2].Pointer())
 	return 0, err
 }
 
 // foregroundProcessGroup sets tm's foreground process.
-func (tm *Terminal) setForegroundProcessGroup(ctx context.Context, io usermem.IO, args arch.SyscallArguments, isMaster bool) (uintptr, error) {
+func (tm *Terminal) setForegroundProcessGroup(ctx context.Context, args arch.SyscallArguments, isMaster bool) (uintptr, error) {
 	task := kernel.TaskFromContext(ctx)
 	if task == nil {
 		panic("setForegroundProcessGroup must be called from a task context")
 	}
 
 	// Read in the process group ID.
-	var pgid int32
-	if _, err := usermem.CopyObjectIn(ctx, io, args[2].Pointer(), &pgid, usermem.IOOpts{
-		AddressSpaceActive: true,
-	}); err != nil {
+	var pgid primitive.Int32
+	if _, err := pgid.CopyIn(task, args[2].Pointer()); err != nil {
 		return 0, err
 	}
 
diff --git a/pkg/sentry/fsimpl/devpts/BUILD b/pkg/sentry/fsimpl/devpts/BUILD
index 3f64fab3a7..3e8c5e3fdb 100644
--- a/pkg/sentry/fsimpl/devpts/BUILD
+++ b/pkg/sentry/fsimpl/devpts/BUILD
@@ -43,6 +43,8 @@ go_library(
         "//pkg/syserror",
         "//pkg/usermem",
         "//pkg/waiter",
+        "//tools/go_marshal/marshal",
+        "//tools/go_marshal/primitive",
     ],
 )
 
diff --git a/pkg/sentry/fsimpl/devpts/line_discipline.go b/pkg/sentry/fsimpl/devpts/line_discipline.go
index f7bc325d15..b954c1ba1c 100644
--- a/pkg/sentry/fsimpl/devpts/line_discipline.go
+++ b/pkg/sentry/fsimpl/devpts/line_discipline.go
@@ -21,6 +21,7 @@ import (
 	"gvisor.dev/gvisor/pkg/abi/linux"
 	"gvisor.dev/gvisor/pkg/context"
 	"gvisor.dev/gvisor/pkg/sentry/arch"
+	"gvisor.dev/gvisor/pkg/sentry/kernel"
 	"gvisor.dev/gvisor/pkg/sync"
 	"gvisor.dev/gvisor/pkg/syserror"
 	"gvisor.dev/gvisor/pkg/usermem"
@@ -113,27 +114,23 @@ func newLineDiscipline(termios linux.KernelTermios) *lineDiscipline {
 }
 
 // getTermios gets the linux.Termios for the tty.
-func (l *lineDiscipline) getTermios(ctx context.Context, io usermem.IO, args arch.SyscallArguments) (uintptr, error) {
+func (l *lineDiscipline) getTermios(task *kernel.Task, args arch.SyscallArguments) (uintptr, error) {
 	l.termiosMu.RLock()
 	defer l.termiosMu.RUnlock()
 	// We must copy a Termios struct, not KernelTermios.
 	t := l.termios.ToTermios()
-	_, err := usermem.CopyObjectOut(ctx, io, args[2].Pointer(), t, usermem.IOOpts{
-		AddressSpaceActive: true,
-	})
+	_, err := t.CopyOut(task, args[2].Pointer())
 	return 0, err
 }
 
 // setTermios sets a linux.Termios for the tty.
-func (l *lineDiscipline) setTermios(ctx context.Context, io usermem.IO, args arch.SyscallArguments) (uintptr, error) {
+func (l *lineDiscipline) setTermios(task *kernel.Task, args arch.SyscallArguments) (uintptr, error) {
 	l.termiosMu.Lock()
 	defer l.termiosMu.Unlock()
 	oldCanonEnabled := l.termios.LEnabled(linux.ICANON)
 	// We must copy a Termios struct, not KernelTermios.
 	var t linux.Termios
-	_, err := usermem.CopyObjectIn(ctx, io, args[2].Pointer(), &t, usermem.IOOpts{
-		AddressSpaceActive: true,
-	})
+	_, err := t.CopyIn(task, args[2].Pointer())
 	l.termios.FromTermios(t)
 
 	// If canonical mode is turned off, move bytes from inQueue's wait
@@ -150,21 +147,17 @@ func (l *lineDiscipline) setTermios(ctx context.Context, io usermem.IO, args arc
 	return 0, err
 }
 
-func (l *lineDiscipline) windowSize(ctx context.Context, io usermem.IO, args arch.SyscallArguments) error {
+func (l *lineDiscipline) windowSize(t *kernel.Task, args arch.SyscallArguments) error {
 	l.sizeMu.Lock()
 	defer l.sizeMu.Unlock()
-	_, err := usermem.CopyObjectOut(ctx, io, args[2].Pointer(), l.size, usermem.IOOpts{
-		AddressSpaceActive: true,
-	})
+	_, err := l.size.CopyOut(t, args[2].Pointer())
 	return err
 }
 
-func (l *lineDiscipline) setWindowSize(ctx context.Context, io usermem.IO, args arch.SyscallArguments) error {
+func (l *lineDiscipline) setWindowSize(t *kernel.Task, args arch.SyscallArguments) error {
 	l.sizeMu.Lock()
 	defer l.sizeMu.Unlock()
-	_, err := usermem.CopyObjectIn(ctx, io, args[2].Pointer(), &l.size, usermem.IOOpts{
-		AddressSpaceActive: true,
-	})
+	_, err := l.size.CopyIn(t, args[2].Pointer())
 	return err
 }
 
@@ -180,8 +173,8 @@ func (l *lineDiscipline) slaveReadiness() waiter.EventMask {
 	return l.outQueue.writeReadiness(&l.termios) | l.inQueue.readReadiness(&l.termios)
 }
 
-func (l *lineDiscipline) inputQueueReadSize(ctx context.Context, io usermem.IO, args arch.SyscallArguments) error {
-	return l.inQueue.readableSize(ctx, io, args)
+func (l *lineDiscipline) inputQueueReadSize(t *kernel.Task, io usermem.IO, args arch.SyscallArguments) error {
+	return l.inQueue.readableSize(t, io, args)
 }
 
 func (l *lineDiscipline) inputQueueRead(ctx context.Context, dst usermem.IOSequence) (int64, error) {
@@ -215,8 +208,8 @@ func (l *lineDiscipline) inputQueueWrite(ctx context.Context, src usermem.IOSequ
 	return 0, syserror.ErrWouldBlock
 }
 
-func (l *lineDiscipline) outputQueueReadSize(ctx context.Context, io usermem.IO, args arch.SyscallArguments) error {
-	return l.outQueue.readableSize(ctx, io, args)
+func (l *lineDiscipline) outputQueueReadSize(t *kernel.Task, io usermem.IO, args arch.SyscallArguments) error {
+	return l.outQueue.readableSize(t, io, args)
 }
 
 func (l *lineDiscipline) outputQueueRead(ctx context.Context, dst usermem.IOSequence) (int64, error) {
diff --git a/pkg/sentry/fsimpl/devpts/master.go b/pkg/sentry/fsimpl/devpts/master.go
index 60feb1993f..3422db6a4d 100644
--- a/pkg/sentry/fsimpl/devpts/master.go
+++ b/pkg/sentry/fsimpl/devpts/master.go
@@ -20,12 +20,14 @@ import (
 	"gvisor.dev/gvisor/pkg/sentry/arch"
 	fslock "gvisor.dev/gvisor/pkg/sentry/fs/lock"
 	"gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
+	"gvisor.dev/gvisor/pkg/sentry/kernel"
 	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
 	"gvisor.dev/gvisor/pkg/sentry/unimpl"
 	"gvisor.dev/gvisor/pkg/sentry/vfs"
 	"gvisor.dev/gvisor/pkg/syserror"
 	"gvisor.dev/gvisor/pkg/usermem"
 	"gvisor.dev/gvisor/pkg/waiter"
+	"gvisor.dev/gvisor/tools/go_marshal/primitive"
 )
 
 // masterInode is the inode for the master end of the Terminal.
@@ -131,46 +133,51 @@ func (mfd *masterFileDescription) Write(ctx context.Context, src usermem.IOSeque
 
 // Ioctl implements vfs.FileDescriptionImpl.Ioctl.
 func (mfd *masterFileDescription) Ioctl(ctx context.Context, io usermem.IO, args arch.SyscallArguments) (uintptr, error) {
+	t := kernel.TaskFromContext(ctx)
+	if t == nil {
+		// ioctl(2) may only be called from a task goroutine.
+		return 0, syserror.ENOTTY
+	}
+
 	switch cmd := args[1].Uint(); cmd {
 	case linux.FIONREAD: // linux.FIONREAD == linux.TIOCINQ
 		// Get the number of bytes in the output queue read buffer.
-		return 0, mfd.t.ld.outputQueueReadSize(ctx, io, args)
+		return 0, mfd.t.ld.outputQueueReadSize(t, io, args)
 	case linux.TCGETS:
 		// N.B. TCGETS on the master actually returns the configuration
 		// of the slave end.
-		return mfd.t.ld.getTermios(ctx, io, args)
+		return mfd.t.ld.getTermios(t, args)
 	case linux.TCSETS:
 		// N.B. TCSETS on the master actually affects the configuration
 		// of the slave end.
-		return mfd.t.ld.setTermios(ctx, io, args)
+		return mfd.t.ld.setTermios(t, args)
 	case linux.TCSETSW:
 		// TODO(b/29356795): This should drain the output queue first.
-		return mfd.t.ld.setTermios(ctx, io, args)
+		return mfd.t.ld.setTermios(t, args)
 	case linux.TIOCGPTN:
-		_, err := usermem.CopyObjectOut(ctx, io, args[2].Pointer(), uint32(mfd.t.n), usermem.IOOpts{
-			AddressSpaceActive: true,
-		})
+		nP := primitive.Uint32(mfd.t.n)
+		_, err := nP.CopyOut(t, args[2].Pointer())
 		return 0, err
 	case linux.TIOCSPTLCK:
 		// TODO(b/29356795): Implement pty locking. For now just pretend we do.
 		return 0, nil
 	case linux.TIOCGWINSZ:
-		return 0, mfd.t.ld.windowSize(ctx, io, args)
+		return 0, mfd.t.ld.windowSize(t, args)
 	case linux.TIOCSWINSZ:
-		return 0, mfd.t.ld.setWindowSize(ctx, io, args)
+		return 0, mfd.t.ld.setWindowSize(t, args)
 	case linux.TIOCSCTTY:
 		// Make the given terminal the controlling terminal of the
 		// calling process.
-		return 0, mfd.t.setControllingTTY(ctx, io, args, true /* isMaster */)
+		return 0, mfd.t.setControllingTTY(ctx, args, true /* isMaster */)
 	case linux.TIOCNOTTY:
 		// Release this process's controlling terminal.
-		return 0, mfd.t.releaseControllingTTY(ctx, io, args, true /* isMaster */)
+		return 0, mfd.t.releaseControllingTTY(ctx, args, true /* isMaster */)
 	case linux.TIOCGPGRP:
 		// Get the foreground process group.
-		return mfd.t.foregroundProcessGroup(ctx, io, args, true /* isMaster */)
+		return mfd.t.foregroundProcessGroup(ctx, args, true /* isMaster */)
 	case linux.TIOCSPGRP:
 		// Set the foreground process group.
-		return mfd.t.setForegroundProcessGroup(ctx, io, args, true /* isMaster */)
+		return mfd.t.setForegroundProcessGroup(ctx, args, true /* isMaster */)
 	default:
 		maybeEmitUnimplementedEvent(ctx, cmd)
 		return 0, syserror.ENOTTY
diff --git a/pkg/sentry/fsimpl/devpts/queue.go b/pkg/sentry/fsimpl/devpts/queue.go
index 331c139977..08eca2589b 100644
--- a/pkg/sentry/fsimpl/devpts/queue.go
+++ b/pkg/sentry/fsimpl/devpts/queue.go
@@ -19,10 +19,12 @@ import (
 	"gvisor.dev/gvisor/pkg/context"
 	"gvisor.dev/gvisor/pkg/safemem"
 	"gvisor.dev/gvisor/pkg/sentry/arch"
+	"gvisor.dev/gvisor/pkg/sentry/kernel"
 	"gvisor.dev/gvisor/pkg/sync"
 	"gvisor.dev/gvisor/pkg/syserror"
 	"gvisor.dev/gvisor/pkg/usermem"
 	"gvisor.dev/gvisor/pkg/waiter"
+	"gvisor.dev/gvisor/tools/go_marshal/primitive"
 )
 
 // waitBufMaxBytes is the maximum size of a wait buffer. It is based on
@@ -83,17 +85,15 @@ func (q *queue) writeReadiness(t *linux.KernelTermios) waiter.EventMask {
 }
 
 // readableSize writes the number of readable bytes to userspace.
-func (q *queue) readableSize(ctx context.Context, io usermem.IO, args arch.SyscallArguments) error {
+func (q *queue) readableSize(t *kernel.Task, io usermem.IO, args arch.SyscallArguments) error {
 	q.mu.Lock()
 	defer q.mu.Unlock()
-	var size int32
+	size := primitive.Int32(0)
 	if q.readable {
-		size = int32(len(q.readBuf))
+		size = primitive.Int32(len(q.readBuf))
 	}
 
-	_, err := usermem.CopyObjectOut(ctx, io, args[2].Pointer(), size, usermem.IOOpts{
-		AddressSpaceActive: true,
-	})
+	_, err := size.CopyOut(t, args[2].Pointer())
 	return err
 
 }
diff --git a/pkg/sentry/fsimpl/devpts/slave.go b/pkg/sentry/fsimpl/devpts/slave.go
index a9da7af648..5f4b474b32 100644
--- a/pkg/sentry/fsimpl/devpts/slave.go
+++ b/pkg/sentry/fsimpl/devpts/slave.go
@@ -20,11 +20,13 @@ import (
 	"gvisor.dev/gvisor/pkg/sentry/arch"
 	fslock "gvisor.dev/gvisor/pkg/sentry/fs/lock"
 	"gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
+	"gvisor.dev/gvisor/pkg/sentry/kernel"
 	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
 	"gvisor.dev/gvisor/pkg/sentry/vfs"
 	"gvisor.dev/gvisor/pkg/syserror"
 	"gvisor.dev/gvisor/pkg/usermem"
 	"gvisor.dev/gvisor/pkg/waiter"
+	"gvisor.dev/gvisor/tools/go_marshal/primitive"
 )
 
 // slaveInode is the inode for the slave end of the Terminal.
@@ -135,39 +137,44 @@ func (sfd *slaveFileDescription) Write(ctx context.Context, src usermem.IOSequen
 
 // Ioctl implements vfs.FileDescriptionImpl.Ioctl.
 func (sfd *slaveFileDescription) Ioctl(ctx context.Context, io usermem.IO, args arch.SyscallArguments) (uintptr, error) {
+	t := kernel.TaskFromContext(ctx)
+	if t == nil {
+		// ioctl(2) may only be called from a task goroutine.
+		return 0, syserror.ENOTTY
+	}
+
 	switch cmd := args[1].Uint(); cmd {
 	case linux.FIONREAD: // linux.FIONREAD == linux.TIOCINQ
 		// Get the number of bytes in the input queue read buffer.
-		return 0, sfd.inode.t.ld.inputQueueReadSize(ctx, io, args)
+		return 0, sfd.inode.t.ld.inputQueueReadSize(t, io, args)
 	case linux.TCGETS:
-		return sfd.inode.t.ld.getTermios(ctx, io, args)
+		return sfd.inode.t.ld.getTermios(t, args)
 	case linux.TCSETS:
-		return sfd.inode.t.ld.setTermios(ctx, io, args)
+		return sfd.inode.t.ld.setTermios(t, args)
 	case linux.TCSETSW:
 		// TODO(b/29356795): This should drain the output queue first.
-		return sfd.inode.t.ld.setTermios(ctx, io, args)
+		return sfd.inode.t.ld.setTermios(t, args)
 	case linux.TIOCGPTN:
-		_, err := usermem.CopyObjectOut(ctx, io, args[2].Pointer(), uint32(sfd.inode.t.n), usermem.IOOpts{
-			AddressSpaceActive: true,
-		})
+		nP := primitive.Uint32(sfd.inode.t.n)
+		_, err := nP.CopyOut(t, args[2].Pointer())
 		return 0, err
 	case linux.TIOCGWINSZ:
-		return 0, sfd.inode.t.ld.windowSize(ctx, io, args)
+		return 0, sfd.inode.t.ld.windowSize(t, args)
 	case linux.TIOCSWINSZ:
-		return 0, sfd.inode.t.ld.setWindowSize(ctx, io, args)
+		return 0, sfd.inode.t.ld.setWindowSize(t, args)
 	case linux.TIOCSCTTY:
 		// Make the given terminal the controlling terminal of the
 		// calling process.
-		return 0, sfd.inode.t.setControllingTTY(ctx, io, args, false /* isMaster */)
+		return 0, sfd.inode.t.setControllingTTY(ctx, args, false /* isMaster */)
 	case linux.TIOCNOTTY:
 		// Release this process's controlling terminal.
-		return 0, sfd.inode.t.releaseControllingTTY(ctx, io, args, false /* isMaster */)
+		return 0, sfd.inode.t.releaseControllingTTY(ctx, args, false /* isMaster */)
 	case linux.TIOCGPGRP:
 		// Get the foreground process group.
-		return sfd.inode.t.foregroundProcessGroup(ctx, io, args, false /* isMaster */)
+		return sfd.inode.t.foregroundProcessGroup(ctx, args, false /* isMaster */)
 	case linux.TIOCSPGRP:
 		// Set the foreground process group.
-		return sfd.inode.t.setForegroundProcessGroup(ctx, io, args, false /* isMaster */)
+		return sfd.inode.t.setForegroundProcessGroup(ctx, args, false /* isMaster */)
 	default:
 		maybeEmitUnimplementedEvent(ctx, cmd)
 		return 0, syserror.ENOTTY
diff --git a/pkg/sentry/fsimpl/devpts/terminal.go b/pkg/sentry/fsimpl/devpts/terminal.go
index 7d2781c54e..e88eb63607 100644
--- a/pkg/sentry/fsimpl/devpts/terminal.go
+++ b/pkg/sentry/fsimpl/devpts/terminal.go
@@ -19,7 +19,7 @@ import (
 	"gvisor.dev/gvisor/pkg/context"
 	"gvisor.dev/gvisor/pkg/sentry/arch"
 	"gvisor.dev/gvisor/pkg/sentry/kernel"
-	"gvisor.dev/gvisor/pkg/usermem"
+	"gvisor.dev/gvisor/tools/go_marshal/primitive"
 )
 
 // Terminal is a pseudoterminal.
@@ -54,7 +54,7 @@ func newTerminal(n uint32) *Terminal {
 
 // setControllingTTY makes tm the controlling terminal of the calling thread
 // group.
-func (tm *Terminal) setControllingTTY(ctx context.Context, io usermem.IO, args arch.SyscallArguments, isMaster bool) error {
+func (tm *Terminal) setControllingTTY(ctx context.Context, args arch.SyscallArguments, isMaster bool) error {
 	task := kernel.TaskFromContext(ctx)
 	if task == nil {
 		panic("setControllingTTY must be called from a task context")
@@ -65,7 +65,7 @@ func (tm *Terminal) setControllingTTY(ctx context.Context, io usermem.IO, args a
 
 // releaseControllingTTY removes tm as the controlling terminal of the calling
 // thread group.
-func (tm *Terminal) releaseControllingTTY(ctx context.Context, io usermem.IO, args arch.SyscallArguments, isMaster bool) error {
+func (tm *Terminal) releaseControllingTTY(ctx context.Context, args arch.SyscallArguments, isMaster bool) error {
 	task := kernel.TaskFromContext(ctx)
 	if task == nil {
 		panic("releaseControllingTTY must be called from a task context")
@@ -75,7 +75,7 @@ func (tm *Terminal) releaseControllingTTY(ctx context.Context, io usermem.IO, ar
 }
 
 // foregroundProcessGroup gets the process group ID of tm's foreground process.
-func (tm *Terminal) foregroundProcessGroup(ctx context.Context, io usermem.IO, args arch.SyscallArguments, isMaster bool) (uintptr, error) {
+func (tm *Terminal) foregroundProcessGroup(ctx context.Context, args arch.SyscallArguments, isMaster bool) (uintptr, error) {
 	task := kernel.TaskFromContext(ctx)
 	if task == nil {
 		panic("foregroundProcessGroup must be called from a task context")
@@ -87,24 +87,21 @@ func (tm *Terminal) foregroundProcessGroup(ctx context.Context, io usermem.IO, a
 	}
 
 	// Write it out to *arg.
-	_, err = usermem.CopyObjectOut(ctx, io, args[2].Pointer(), int32(ret), usermem.IOOpts{
-		AddressSpaceActive: true,
-	})
+	retP := primitive.Int32(ret)
+	_, err = retP.CopyOut(task, args[2].Pointer())
 	return 0, err
 }
 
 // foregroundProcessGroup sets tm's foreground process.
-func (tm *Terminal) setForegroundProcessGroup(ctx context.Context, io usermem.IO, args arch.SyscallArguments, isMaster bool) (uintptr, error) {
+func (tm *Terminal) setForegroundProcessGroup(ctx context.Context, args arch.SyscallArguments, isMaster bool) (uintptr, error) {
 	task := kernel.TaskFromContext(ctx)
 	if task == nil {
 		panic("setForegroundProcessGroup must be called from a task context")
 	}
 
 	// Read in the process group ID.
-	var pgid int32
-	if _, err := usermem.CopyObjectIn(ctx, io, args[2].Pointer(), &pgid, usermem.IOOpts{
-		AddressSpaceActive: true,
-	}); err != nil {
+	var pgid primitive.Int32
+	if _, err := pgid.CopyIn(task, args[2].Pointer()); err != nil {
 		return 0, err
 	}
 

From 6b992edc8ab85070f50a6860b936b6cef82d48f8 Mon Sep 17 00:00:00 2001
From: Nayana Bidari <nybidari@google.com>
Date: Tue, 1 Sep 2020 13:38:44 -0700
Subject: [PATCH 128/211] Fix panic when calling dup2().

PiperOrigin-RevId: 329572337
---
 pkg/sentry/kernel/fd_table.go            | 43 ++++++++++++------------
 pkg/sentry/kernel/fd_table_test.go       |  8 ++---
 pkg/sentry/kernel/fd_table_unsafe.go     | 15 +++++----
 pkg/sentry/syscalls/linux/sys_file.go    | 12 ++++---
 pkg/sentry/syscalls/linux/sys_pipe.go    |  2 +-
 pkg/sentry/syscalls/linux/sys_socket.go  |  2 +-
 pkg/sentry/syscalls/linux/vfs2/fd.go     |  4 +--
 pkg/sentry/syscalls/linux/vfs2/ioctl.go  |  4 +--
 pkg/sentry/syscalls/linux/vfs2/pipe.go   |  2 +-
 pkg/sentry/syscalls/linux/vfs2/socket.go |  2 +-
 10 files changed, 49 insertions(+), 45 deletions(-)

diff --git a/pkg/sentry/kernel/fd_table.go b/pkg/sentry/kernel/fd_table.go
index 5773244ac7..89223fa365 100644
--- a/pkg/sentry/kernel/fd_table.go
+++ b/pkg/sentry/kernel/fd_table.go
@@ -112,7 +112,7 @@ func (f *FDTable) loadDescriptorTable(m map[int32]descriptor) {
 	ctx := context.Background()
 	f.init() // Initialize table.
 	for fd, d := range m {
-		f.setAll(fd, d.file, d.fileVFS2, d.flags)
+		f.setAll(ctx, fd, d.file, d.fileVFS2, d.flags)
 
 		// Note that we do _not_ need to acquire a extra table reference here. The
 		// table reference will already be accounted for in the file, so we drop the
@@ -127,7 +127,7 @@ func (f *FDTable) loadDescriptorTable(m map[int32]descriptor) {
 }
 
 // drop drops the table reference.
-func (f *FDTable) drop(file *fs.File) {
+func (f *FDTable) drop(ctx context.Context, file *fs.File) {
 	// Release locks.
 	file.Dirent.Inode.LockCtx.Posix.UnlockRegion(f, lock.LockRange{0, lock.LockEOF})
 
@@ -145,14 +145,13 @@ func (f *FDTable) drop(file *fs.File) {
 	d.InotifyEvent(ev, 0)
 
 	// Drop the table reference.
-	file.DecRef(context.Background())
+	file.DecRef(ctx)
 }
 
 // dropVFS2 drops the table reference.
-func (f *FDTable) dropVFS2(file *vfs.FileDescription) {
+func (f *FDTable) dropVFS2(ctx context.Context, file *vfs.FileDescription) {
 	// Release any POSIX lock possibly held by the FDTable. Range {0, 0} means the
 	// entire file.
-	ctx := context.Background()
 	err := file.UnlockPOSIX(ctx, f, 0, 0, linux.SEEK_SET)
 	if err != nil && err != syserror.ENOLCK {
 		panic(fmt.Sprintf("UnlockPOSIX failed: %v", err))
@@ -289,15 +288,15 @@ func (f *FDTable) NewFDs(ctx context.Context, fd int32, files []*fs.File, flags
 	// Install all entries.
 	for i := fd; i < end && len(fds) < len(files); i++ {
 		if d, _, _ := f.get(i); d == nil {
-			f.set(i, files[len(fds)], flags) // Set the descriptor.
-			fds = append(fds, i)             // Record the file descriptor.
+			f.set(ctx, i, files[len(fds)], flags) // Set the descriptor.
+			fds = append(fds, i)                  // Record the file descriptor.
 		}
 	}
 
 	// Failure? Unwind existing FDs.
 	if len(fds) < len(files) {
 		for _, i := range fds {
-			f.set(i, nil, FDFlags{}) // Zap entry.
+			f.set(ctx, i, nil, FDFlags{}) // Zap entry.
 		}
 		return nil, syscall.EMFILE
 	}
@@ -344,15 +343,15 @@ func (f *FDTable) NewFDsVFS2(ctx context.Context, fd int32, files []*vfs.FileDes
 	// Install all entries.
 	for i := fd; i < end && len(fds) < len(files); i++ {
 		if d, _, _ := f.getVFS2(i); d == nil {
-			f.setVFS2(i, files[len(fds)], flags) // Set the descriptor.
-			fds = append(fds, i)                 // Record the file descriptor.
+			f.setVFS2(ctx, i, files[len(fds)], flags) // Set the descriptor.
+			fds = append(fds, i)                      // Record the file descriptor.
 		}
 	}
 
 	// Failure? Unwind existing FDs.
 	if len(fds) < len(files) {
 		for _, i := range fds {
-			f.setVFS2(i, nil, FDFlags{}) // Zap entry.
+			f.setVFS2(ctx, i, nil, FDFlags{}) // Zap entry.
 		}
 		return nil, syscall.EMFILE
 	}
@@ -397,7 +396,7 @@ func (f *FDTable) NewFDVFS2(ctx context.Context, minfd int32, file *vfs.FileDesc
 	}
 	for fd < end {
 		if d, _, _ := f.getVFS2(fd); d == nil {
-			f.setVFS2(fd, file, flags)
+			f.setVFS2(ctx, fd, file, flags)
 			if fd == f.next {
 				// Update next search start position.
 				f.next = fd + 1
@@ -439,14 +438,14 @@ func (f *FDTable) newFDAt(ctx context.Context, fd int32, file *fs.File, fileVFS2
 	// Install the entry.
 	f.mu.Lock()
 	defer f.mu.Unlock()
-	f.setAll(fd, file, fileVFS2, flags)
+	f.setAll(ctx, fd, file, fileVFS2, flags)
 	return nil
 }
 
 // SetFlags sets the flags for the given file descriptor.
 //
 // True is returned iff flags were changed.
-func (f *FDTable) SetFlags(fd int32, flags FDFlags) error {
+func (f *FDTable) SetFlags(ctx context.Context, fd int32, flags FDFlags) error {
 	if fd < 0 {
 		// Don't accept negative FDs.
 		return syscall.EBADF
@@ -462,14 +461,14 @@ func (f *FDTable) SetFlags(fd int32, flags FDFlags) error {
 	}
 
 	// Update the flags.
-	f.set(fd, file, flags)
+	f.set(ctx, fd, file, flags)
 	return nil
 }
 
 // SetFlagsVFS2 sets the flags for the given file descriptor.
 //
 // True is returned iff flags were changed.
-func (f *FDTable) SetFlagsVFS2(fd int32, flags FDFlags) error {
+func (f *FDTable) SetFlagsVFS2(ctx context.Context, fd int32, flags FDFlags) error {
 	if fd < 0 {
 		// Don't accept negative FDs.
 		return syscall.EBADF
@@ -485,7 +484,7 @@ func (f *FDTable) SetFlagsVFS2(fd int32, flags FDFlags) error {
 	}
 
 	// Update the flags.
-	f.setVFS2(fd, file, flags)
+	f.setVFS2(ctx, fd, file, flags)
 	return nil
 }
 
@@ -584,9 +583,9 @@ func (f *FDTable) Fork(ctx context.Context) *FDTable {
 		// reference for the clone. We don't need anything else.
 		switch {
 		case file != nil:
-			clone.set(fd, file, flags)
+			clone.set(ctx, fd, file, flags)
 		case fileVFS2 != nil:
-			clone.setVFS2(fd, fileVFS2, flags)
+			clone.setVFS2(ctx, fd, fileVFS2, flags)
 		}
 	})
 	return clone
@@ -595,7 +594,7 @@ func (f *FDTable) Fork(ctx context.Context) *FDTable {
 // Remove removes an FD from and returns a non-file iff successful.
 //
 // N.B. Callers are required to use DecRef when they are done.
-func (f *FDTable) Remove(fd int32) (*fs.File, *vfs.FileDescription) {
+func (f *FDTable) Remove(ctx context.Context, fd int32) (*fs.File, *vfs.FileDescription) {
 	if fd < 0 {
 		return nil, nil
 	}
@@ -618,7 +617,7 @@ func (f *FDTable) Remove(fd int32) (*fs.File, *vfs.FileDescription) {
 		orig2.IncRef()
 	}
 	if orig != nil || orig2 != nil {
-		f.setAll(fd, nil, nil, FDFlags{}) // Zap entry.
+		f.setAll(ctx, fd, nil, nil, FDFlags{}) // Zap entry.
 	}
 	return orig, orig2
 }
@@ -630,7 +629,7 @@ func (f *FDTable) RemoveIf(ctx context.Context, cond func(*fs.File, *vfs.FileDes
 
 	f.forEach(ctx, func(fd int32, file *fs.File, fileVFS2 *vfs.FileDescription, flags FDFlags) {
 		if cond(file, fileVFS2, flags) {
-			f.set(fd, nil, FDFlags{}) // Clear from table.
+			f.set(ctx, fd, nil, FDFlags{}) // Clear from table.
 			// Update current available position.
 			if fd < f.next {
 				f.next = fd
diff --git a/pkg/sentry/kernel/fd_table_test.go b/pkg/sentry/kernel/fd_table_test.go
index e3f30ba2ac..bf54600837 100644
--- a/pkg/sentry/kernel/fd_table_test.go
+++ b/pkg/sentry/kernel/fd_table_test.go
@@ -72,7 +72,7 @@ func TestFDTableMany(t *testing.T) {
 		}
 
 		i := int32(2)
-		fdTable.Remove(i)
+		fdTable.Remove(ctx, i)
 		if fds, err := fdTable.NewFDs(ctx, 0, []*fs.File{file}, FDFlags{}); err != nil || fds[0] != i {
 			t.Fatalf("Allocated %v FDs but wanted to allocate %v: %v", i, maxFD, err)
 		}
@@ -93,7 +93,7 @@ func TestFDTableOverLimit(t *testing.T) {
 			t.Fatalf("fdTable.NewFDs(maxFD-3, {f,f,f}): got %v, wanted nil", err)
 		} else {
 			for _, fd := range fds {
-				fdTable.Remove(fd)
+				fdTable.Remove(ctx, fd)
 			}
 		}
 
@@ -150,13 +150,13 @@ func TestFDTable(t *testing.T) {
 			t.Fatalf("fdTable.Get(2): got a %v, wanted nil", ref)
 		}
 
-		ref, _ := fdTable.Remove(1)
+		ref, _ := fdTable.Remove(ctx, 1)
 		if ref == nil {
 			t.Fatalf("fdTable.Remove(1) for an existing FD: failed, want success")
 		}
 		ref.DecRef(ctx)
 
-		if ref, _ := fdTable.Remove(1); ref != nil {
+		if ref, _ := fdTable.Remove(ctx, 1); ref != nil {
 			t.Fatalf("r.Remove(1) for a removed FD: got success, want failure")
 		}
 	})
diff --git a/pkg/sentry/kernel/fd_table_unsafe.go b/pkg/sentry/kernel/fd_table_unsafe.go
index 6b8feb1074..555b14f8e0 100644
--- a/pkg/sentry/kernel/fd_table_unsafe.go
+++ b/pkg/sentry/kernel/fd_table_unsafe.go
@@ -18,6 +18,7 @@ import (
 	"sync/atomic"
 	"unsafe"
 
+	"gvisor.dev/gvisor/pkg/context"
 	"gvisor.dev/gvisor/pkg/sentry/fs"
 	"gvisor.dev/gvisor/pkg/sentry/vfs"
 )
@@ -84,8 +85,8 @@ func (f *FDTable) getAll(fd int32) (*fs.File, *vfs.FileDescription, FDFlags, boo
 // reference needed by the table iff the file is different.
 //
 // Precondition: mu must be held.
-func (f *FDTable) set(fd int32, file *fs.File, flags FDFlags) {
-	f.setAll(fd, file, nil, flags)
+func (f *FDTable) set(ctx context.Context, fd int32, file *fs.File, flags FDFlags) {
+	f.setAll(ctx, fd, file, nil, flags)
 }
 
 // setVFS2 sets an entry.
@@ -94,8 +95,8 @@ func (f *FDTable) set(fd int32, file *fs.File, flags FDFlags) {
 // reference needed by the table iff the file is different.
 //
 // Precondition: mu must be held.
-func (f *FDTable) setVFS2(fd int32, file *vfs.FileDescription, flags FDFlags) {
-	f.setAll(fd, nil, file, flags)
+func (f *FDTable) setVFS2(ctx context.Context, fd int32, file *vfs.FileDescription, flags FDFlags) {
+	f.setAll(ctx, fd, nil, file, flags)
 }
 
 // setAll sets an entry.
@@ -104,7 +105,7 @@ func (f *FDTable) setVFS2(fd int32, file *vfs.FileDescription, flags FDFlags) {
 // reference needed by the table iff the file is different.
 //
 // Precondition: mu must be held.
-func (f *FDTable) setAll(fd int32, file *fs.File, fileVFS2 *vfs.FileDescription, flags FDFlags) {
+func (f *FDTable) setAll(ctx context.Context, fd int32, file *fs.File, fileVFS2 *vfs.FileDescription, flags FDFlags) {
 	if file != nil && fileVFS2 != nil {
 		panic("VFS1 and VFS2 files set")
 	}
@@ -152,11 +153,11 @@ func (f *FDTable) setAll(fd int32, file *fs.File, fileVFS2 *vfs.FileDescription,
 		switch {
 		case orig.file != nil:
 			if desc == nil || desc.file != orig.file {
-				f.drop(orig.file)
+				f.drop(ctx, orig.file)
 			}
 		case orig.fileVFS2 != nil:
 			if desc == nil || desc.fileVFS2 != orig.fileVFS2 {
-				f.dropVFS2(orig.fileVFS2)
+				f.dropVFS2(ctx, orig.fileVFS2)
 			}
 		}
 	}
diff --git a/pkg/sentry/syscalls/linux/sys_file.go b/pkg/sentry/syscalls/linux/sys_file.go
index 2564226892..07c77e4428 100644
--- a/pkg/sentry/syscalls/linux/sys_file.go
+++ b/pkg/sentry/syscalls/linux/sys_file.go
@@ -601,12 +601,12 @@ func Ioctl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
 	// Shared flags between file and socket.
 	switch request {
 	case linux.FIONCLEX:
-		t.FDTable().SetFlags(fd, kernel.FDFlags{
+		t.FDTable().SetFlags(t, fd, kernel.FDFlags{
 			CloseOnExec: false,
 		})
 		return 0, nil, nil
 	case linux.FIOCLEX:
-		t.FDTable().SetFlags(fd, kernel.FDFlags{
+		t.FDTable().SetFlags(t, fd, kernel.FDFlags{
 			CloseOnExec: true,
 		})
 		return 0, nil, nil
@@ -787,7 +787,7 @@ func Close(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
 	// Note that Remove provides a reference on the file that we may use to
 	// flush. It is still active until we drop the final reference below
 	// (and other reference-holding operations complete).
-	file, _ := t.FDTable().Remove(fd)
+	file, _ := t.FDTable().Remove(t, fd)
 	if file == nil {
 		return 0, nil, syserror.EBADF
 	}
@@ -941,7 +941,7 @@ func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
 		return uintptr(flags.ToLinuxFDFlags()), nil, nil
 	case linux.F_SETFD:
 		flags := args[2].Uint()
-		err := t.FDTable().SetFlags(fd, kernel.FDFlags{
+		err := t.FDTable().SetFlags(t, fd, kernel.FDFlags{
 			CloseOnExec: flags&linux.FD_CLOEXEC != 0,
 		})
 		return 0, nil, err
@@ -1154,6 +1154,10 @@ func Fadvise64(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys
 	return 0, nil, nil
 }
 
+// LINT.ThenChange(vfs2/fd.go)
+
+// LINT.IfChange
+
 func mkdirAt(t *kernel.Task, dirFD int32, addr usermem.Addr, mode linux.FileMode) error {
 	path, _, err := copyInPath(t, addr, false /* allowEmpty */)
 	if err != nil {
diff --git a/pkg/sentry/syscalls/linux/sys_pipe.go b/pkg/sentry/syscalls/linux/sys_pipe.go
index 3149e4aadc..c55beb39b3 100644
--- a/pkg/sentry/syscalls/linux/sys_pipe.go
+++ b/pkg/sentry/syscalls/linux/sys_pipe.go
@@ -48,7 +48,7 @@ func pipe2(t *kernel.Task, addr usermem.Addr, flags uint) (uintptr, error) {
 
 	if _, err := t.CopyOut(addr, fds); err != nil {
 		for _, fd := range fds {
-			if file, _ := t.FDTable().Remove(fd); file != nil {
+			if file, _ := t.FDTable().Remove(t, fd); file != nil {
 				file.DecRef(t)
 			}
 		}
diff --git a/pkg/sentry/syscalls/linux/sys_socket.go b/pkg/sentry/syscalls/linux/sys_socket.go
index 38f573c14b..e4528d0954 100644
--- a/pkg/sentry/syscalls/linux/sys_socket.go
+++ b/pkg/sentry/syscalls/linux/sys_socket.go
@@ -249,7 +249,7 @@ func SocketPair(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sy
 	// Copy the file descriptors out.
 	if _, err := t.CopyOut(socks, fds); err != nil {
 		for _, fd := range fds {
-			if file, _ := t.FDTable().Remove(fd); file != nil {
+			if file, _ := t.FDTable().Remove(t, fd); file != nil {
 				file.DecRef(t)
 			}
 		}
diff --git a/pkg/sentry/syscalls/linux/vfs2/fd.go b/pkg/sentry/syscalls/linux/vfs2/fd.go
index 4856554fe7..fdd8f88c51 100644
--- a/pkg/sentry/syscalls/linux/vfs2/fd.go
+++ b/pkg/sentry/syscalls/linux/vfs2/fd.go
@@ -34,7 +34,7 @@ func Close(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
 	// Note that Remove provides a reference on the file that we may use to
 	// flush. It is still active until we drop the final reference below
 	// (and other reference-holding operations complete).
-	_, file := t.FDTable().Remove(fd)
+	_, file := t.FDTable().Remove(t, fd)
 	if file == nil {
 		return 0, nil, syserror.EBADF
 	}
@@ -137,7 +137,7 @@ func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
 		return uintptr(flags.ToLinuxFDFlags()), nil, nil
 	case linux.F_SETFD:
 		flags := args[2].Uint()
-		err := t.FDTable().SetFlagsVFS2(fd, kernel.FDFlags{
+		err := t.FDTable().SetFlagsVFS2(t, fd, kernel.FDFlags{
 			CloseOnExec: flags&linux.FD_CLOEXEC != 0,
 		})
 		return 0, nil, err
diff --git a/pkg/sentry/syscalls/linux/vfs2/ioctl.go b/pkg/sentry/syscalls/linux/vfs2/ioctl.go
index 38778a3886..baa8a49afc 100644
--- a/pkg/sentry/syscalls/linux/vfs2/ioctl.go
+++ b/pkg/sentry/syscalls/linux/vfs2/ioctl.go
@@ -34,13 +34,13 @@ func Ioctl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
 	// Handle ioctls that apply to all FDs.
 	switch args[1].Int() {
 	case linux.FIONCLEX:
-		t.FDTable().SetFlagsVFS2(fd, kernel.FDFlags{
+		t.FDTable().SetFlagsVFS2(t, fd, kernel.FDFlags{
 			CloseOnExec: false,
 		})
 		return 0, nil, nil
 
 	case linux.FIOCLEX:
-		t.FDTable().SetFlagsVFS2(fd, kernel.FDFlags{
+		t.FDTable().SetFlagsVFS2(t, fd, kernel.FDFlags{
 			CloseOnExec: true,
 		})
 		return 0, nil, nil
diff --git a/pkg/sentry/syscalls/linux/vfs2/pipe.go b/pkg/sentry/syscalls/linux/vfs2/pipe.go
index 9b4848d9e4..3aa6d939d2 100644
--- a/pkg/sentry/syscalls/linux/vfs2/pipe.go
+++ b/pkg/sentry/syscalls/linux/vfs2/pipe.go
@@ -53,7 +53,7 @@ func pipe2(t *kernel.Task, addr usermem.Addr, flags int32) error {
 	}
 	if _, err := t.CopyOut(addr, fds); err != nil {
 		for _, fd := range fds {
-			if _, file := t.FDTable().Remove(fd); file != nil {
+			if _, file := t.FDTable().Remove(t, fd); file != nil {
 				file.DecRef(t)
 			}
 		}
diff --git a/pkg/sentry/syscalls/linux/vfs2/socket.go b/pkg/sentry/syscalls/linux/vfs2/socket.go
index a5032657a0..a15dad29fc 100644
--- a/pkg/sentry/syscalls/linux/vfs2/socket.go
+++ b/pkg/sentry/syscalls/linux/vfs2/socket.go
@@ -252,7 +252,7 @@ func SocketPair(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sy
 
 	if _, err := t.CopyOut(addr, fds); err != nil {
 		for _, fd := range fds {
-			if _, file := t.FDTable().Remove(fd); file != nil {
+			if _, file := t.FDTable().Remove(t, fd); file != nil {
 				file.DecRef(t)
 			}
 		}

From 4332affa833c6a11326aa5db366419ba7445cdaf Mon Sep 17 00:00:00 2001
From: Ayush Ranjan <ayushranjan@google.com>
Date: Tue, 1 Sep 2020 14:41:54 -0700
Subject: [PATCH 129/211] Refactor tty codebase to use master-replica
 terminology.

Updates #2972

PiperOrigin-RevId: 329584905
---
 pkg/abi/linux/dev.go                          |   6 +-
 pkg/abi/linux/tty.go                          |   6 +-
 pkg/sentry/fs/host/tty.go                     |   2 +-
 pkg/sentry/fs/tty/BUILD                       |   2 +-
 pkg/sentry/fs/tty/dir.go                      |  46 ++--
 pkg/sentry/fs/tty/fs.go                       |   4 +-
 pkg/sentry/fs/tty/line_discipline.go          |  22 +-
 pkg/sentry/fs/tty/master.go                   |   4 +-
 pkg/sentry/fs/tty/queue.go                    |   2 +-
 pkg/sentry/fs/tty/{slave.go => replica.go}    |  55 ++--
 pkg/sentry/fs/tty/terminal.go                 |  18 +-
 pkg/sentry/fs/tty/tty_test.go                 |   4 +-
 pkg/sentry/fsimpl/devpts/BUILD                |   2 +-
 pkg/sentry/fsimpl/devpts/devpts.go            |  34 +--
 pkg/sentry/fsimpl/devpts/devpts_test.go       |   4 +-
 pkg/sentry/fsimpl/devpts/line_discipline.go   |  22 +-
 pkg/sentry/fsimpl/devpts/master.go            |   4 +-
 pkg/sentry/fsimpl/devpts/queue.go             |   2 +-
 .../fsimpl/devpts/{slave.go => replica.go}    |  56 ++--
 pkg/sentry/fsimpl/devpts/terminal.go          |  16 +-
 pkg/sentry/fsimpl/host/host.go                |   2 +-
 runsc/boot/fs.go                              |  12 +-
 runsc/cmd/exec.go                             |   4 +-
 runsc/console/console.go                      |  18 +-
 runsc/container/console_test.go               |  10 +-
 runsc/container/multi_container_test.go       |   2 +-
 runsc/sandbox/sandbox.go                      |   4 +-
 test/syscalls/linux/pty.cc                    | 243 +++++++++---------
 test/syscalls/linux/pty_root.cc               |   4 +-
 test/util/pty_util.cc                         |  10 +-
 test/util/pty_util.h                          |   8 +-
 31 files changed, 316 insertions(+), 312 deletions(-)
 rename pkg/sentry/fs/tty/{slave.go => replica.go} (70%)
 rename pkg/sentry/fsimpl/devpts/{slave.go => replica.go} (69%)

diff --git a/pkg/abi/linux/dev.go b/pkg/abi/linux/dev.go
index 192e2093b3..7771650b35 100644
--- a/pkg/abi/linux/dev.go
+++ b/pkg/abi/linux/dev.go
@@ -54,9 +54,9 @@ const (
 	// Unix98 PTY masters.
 	UNIX98_PTY_MASTER_MAJOR = 128
 
-	// UNIX98_PTY_SLAVE_MAJOR is the initial major device number for
-	// Unix98 PTY slaves.
-	UNIX98_PTY_SLAVE_MAJOR = 136
+	// UNIX98_PTY_REPLICA_MAJOR is the initial major device number for
+	// Unix98 PTY replicas.
+	UNIX98_PTY_REPLICA_MAJOR = 136
 )
 
 // Minor device numbers for TTYAUX_MAJOR.
diff --git a/pkg/abi/linux/tty.go b/pkg/abi/linux/tty.go
index 5a5ff0aa22..47e65d9fbb 100644
--- a/pkg/abi/linux/tty.go
+++ b/pkg/abi/linux/tty.go
@@ -325,9 +325,9 @@ var MasterTermios = KernelTermios{
 	OutputSpeed:       38400,
 }
 
-// DefaultSlaveTermios is the default terminal configuration of the slave end
-// of a Unix98 pseudoterminal.
-var DefaultSlaveTermios = KernelTermios{
+// DefaultReplicaTermios is the default terminal configuration of the replica
+// end of a Unix98 pseudoterminal.
+var DefaultReplicaTermios = KernelTermios{
 	InputFlags:        ICRNL | IXON,
 	OutputFlags:       OPOST | ONLCR,
 	ControlFlags:      B38400 | CS8 | CREAD,
diff --git a/pkg/sentry/fs/host/tty.go b/pkg/sentry/fs/host/tty.go
index 67a807f9d4..87d56a51d5 100644
--- a/pkg/sentry/fs/host/tty.go
+++ b/pkg/sentry/fs/host/tty.go
@@ -54,7 +54,7 @@ type TTYFileOperations struct {
 func newTTYFile(ctx context.Context, dirent *fs.Dirent, flags fs.FileFlags, iops *inodeOperations) *fs.File {
 	return fs.NewFile(ctx, dirent, flags, &TTYFileOperations{
 		fileOperations: fileOperations{iops: iops},
-		termios:        linux.DefaultSlaveTermios,
+		termios:        linux.DefaultReplicaTermios,
 	})
 }
 
diff --git a/pkg/sentry/fs/tty/BUILD b/pkg/sentry/fs/tty/BUILD
index b3f5a82449..fdd5a40d50 100644
--- a/pkg/sentry/fs/tty/BUILD
+++ b/pkg/sentry/fs/tty/BUILD
@@ -10,7 +10,7 @@ go_library(
         "line_discipline.go",
         "master.go",
         "queue.go",
-        "slave.go",
+        "replica.go",
         "terminal.go",
     ],
     visibility = ["//pkg/sentry:internal"],
diff --git a/pkg/sentry/fs/tty/dir.go b/pkg/sentry/fs/tty/dir.go
index 463f6189e1..c2da80bc2a 100644
--- a/pkg/sentry/fs/tty/dir.go
+++ b/pkg/sentry/fs/tty/dir.go
@@ -37,14 +37,14 @@ import (
 // This indirectly manages all terminals within the mount.
 //
 // New Terminals are created by masterInodeOperations.GetFile, which registers
-// the slave Inode in the this directory for discovery via Lookup/Readdir. The
-// slave inode is unregistered when the master file is Released, as the slave
+// the replica Inode in the this directory for discovery via Lookup/Readdir. The
+// replica inode is unregistered when the master file is Released, as the replica
 // is no longer discoverable at that point.
 //
 // References on the underlying Terminal are held by masterFileOperations and
-// slaveInodeOperations.
+// replicaInodeOperations.
 //
-// masterInodeOperations and slaveInodeOperations hold a pointer to
+// masterInodeOperations and replicaInodeOperations hold a pointer to
 // dirInodeOperations, which is reference counted by the refcount their
 // corresponding Dirents hold on their parent (this directory).
 //
@@ -76,16 +76,16 @@ type dirInodeOperations struct {
 	// master is the master PTY inode.
 	master *fs.Inode
 
-	// slaves contains the slave inodes reachable from the directory.
+	// replicas contains the replica inodes reachable from the directory.
 	//
-	// A new slave is added by allocateTerminal and is removed by
+	// A new replica is added by allocateTerminal and is removed by
 	// masterFileOperations.Release.
 	//
-	// A reference is held on every slave in the map.
-	slaves map[uint32]*fs.Inode
+	// A reference is held on every replica in the map.
+	replicas map[uint32]*fs.Inode
 
 	// dentryMap is a SortedDentryMap used to implement Readdir containing
-	// the master and all entries in slaves.
+	// the master and all entries in replicas.
 	dentryMap *fs.SortedDentryMap
 
 	// next is the next pty index to use.
@@ -101,7 +101,7 @@ func newDir(ctx context.Context, m *fs.MountSource) *fs.Inode {
 	d := &dirInodeOperations{
 		InodeSimpleAttributes: fsutil.NewInodeSimpleAttributes(ctx, fs.RootOwner, fs.FilePermsFromMode(0555), linux.DEVPTS_SUPER_MAGIC),
 		msrc:                  m,
-		slaves:                make(map[uint32]*fs.Inode),
+		replicas:              make(map[uint32]*fs.Inode),
 		dentryMap:             fs.NewSortedDentryMap(nil),
 	}
 	// Linux devpts uses a default mode of 0000 for ptmx which can be
@@ -133,7 +133,7 @@ func (d *dirInodeOperations) Release(ctx context.Context) {
 	defer d.mu.Unlock()
 
 	d.master.DecRef(ctx)
-	if len(d.slaves) != 0 {
+	if len(d.replicas) != 0 {
 		panic(fmt.Sprintf("devpts directory still contains active terminals: %+v", d))
 	}
 }
@@ -149,14 +149,14 @@ func (d *dirInodeOperations) Lookup(ctx context.Context, dir *fs.Inode, name str
 		return fs.NewDirent(ctx, d.master, name), nil
 	}
 
-	// Slave number?
+	// Replica number?
 	n, err := strconv.ParseUint(name, 10, 32)
 	if err != nil {
 		// Not found.
 		return nil, syserror.ENOENT
 	}
 
-	s, ok := d.slaves[uint32(n)]
+	s, ok := d.replicas[uint32(n)]
 	if !ok {
 		return nil, syserror.ENOENT
 	}
@@ -236,7 +236,7 @@ func (d *dirInodeOperations) allocateTerminal(ctx context.Context) (*Terminal, e
 		return nil, syserror.ENOMEM
 	}
 
-	if _, ok := d.slaves[n]; ok {
+	if _, ok := d.replicas[n]; ok {
 		panic(fmt.Sprintf("pty index collision; index %d already exists", n))
 	}
 
@@ -244,19 +244,19 @@ func (d *dirInodeOperations) allocateTerminal(ctx context.Context) (*Terminal, e
 	d.next++
 
 	// The reference returned by newTerminal is returned to the caller.
-	// Take another for the slave inode.
+	// Take another for the replica inode.
 	t.IncRef()
 
 	// Create a pts node. The owner is based on the context that opens
 	// ptmx.
 	creds := auth.CredentialsFromContext(ctx)
 	uid, gid := creds.EffectiveKUID, creds.EffectiveKGID
-	slave := newSlaveInode(ctx, d, t, fs.FileOwner{uid, gid}, fs.FilePermsFromMode(0666))
+	replica := newReplicaInode(ctx, d, t, fs.FileOwner{uid, gid}, fs.FilePermsFromMode(0666))
 
-	d.slaves[n] = slave
+	d.replicas[n] = replica
 	d.dentryMap.Add(strconv.FormatUint(uint64(n), 10), fs.DentAttr{
-		Type:    slave.StableAttr.Type,
-		InodeID: slave.StableAttr.InodeID,
+		Type:    replica.StableAttr.Type,
+		InodeID: replica.StableAttr.InodeID,
 	})
 
 	return t, nil
@@ -267,18 +267,18 @@ func (d *dirInodeOperations) masterClose(ctx context.Context, t *Terminal) {
 	d.mu.Lock()
 	defer d.mu.Unlock()
 
-	// The slave end disappears from the directory when the master end is
-	// closed, even if the slave end is open elsewhere.
+	// The replica end disappears from the directory when the master end is
+	// closed, even if the replica end is open elsewhere.
 	//
 	// N.B. since we're using a backdoor method to remove a directory entry
 	// we won't properly fire inotify events like Linux would.
-	s, ok := d.slaves[t.n]
+	s, ok := d.replicas[t.n]
 	if !ok {
 		panic(fmt.Sprintf("Terminal %+v doesn't exist in %+v?", t, d))
 	}
 
 	s.DecRef(ctx)
-	delete(d.slaves, t.n)
+	delete(d.replicas, t.n)
 	d.dentryMap.Remove(strconv.FormatUint(uint64(t.n), 10))
 }
 
diff --git a/pkg/sentry/fs/tty/fs.go b/pkg/sentry/fs/tty/fs.go
index 2d4d44bf3c..13f4901db1 100644
--- a/pkg/sentry/fs/tty/fs.go
+++ b/pkg/sentry/fs/tty/fs.go
@@ -79,8 +79,8 @@ type superOperations struct{}
 //
 // It always returns true, forcing a Lookup for all entries.
 //
-// Slave entries are dropped from dir when their master is closed, so an
-// existing slave Dirent in the tree is not sufficient to guarantee that it
+// Replica entries are dropped from dir when their master is closed, so an
+// existing replica Dirent in the tree is not sufficient to guarantee that it
 // still exists on the filesystem.
 func (superOperations) Revalidate(context.Context, string, *fs.Inode, *fs.Inode) bool {
 	return true
diff --git a/pkg/sentry/fs/tty/line_discipline.go b/pkg/sentry/fs/tty/line_discipline.go
index b6bc011a95..b34f4a0eb0 100644
--- a/pkg/sentry/fs/tty/line_discipline.go
+++ b/pkg/sentry/fs/tty/line_discipline.go
@@ -44,7 +44,7 @@ const (
 )
 
 // lineDiscipline dictates how input and output are handled between the
-// pseudoterminal (pty) master and slave. It can be configured to alter I/O,
+// pseudoterminal (pty) master and replica. It can be configured to alter I/O,
 // modify control characters (e.g. Ctrl-C for SIGINT), etc. The following man
 // pages are good resources for how to affect the line discipline:
 //
@@ -55,8 +55,8 @@ const (
 //
 // lineDiscipline has a simple structure but supports a multitude of options
 // (see the above man pages). It consists of two queues of bytes: one from the
-// terminal master to slave (the input queue) and one from slave to master (the
-// output queue). When bytes are written to one end of the pty, the line
+// terminal master to replica (the input queue) and one from replica to master
+// (the output queue). When bytes are written to one end of the pty, the line
 // discipline reads the bytes, modifies them or takes special action if
 // required, and enqueues them to be read by the other end of the pty:
 //
@@ -65,7 +65,7 @@ const (
 //    |   (inputQueueWrite)     +-------------+     (inputQueueRead)      |
 //    |                                                                   |
 //    |                                                                   v
-// masterFD                                                            slaveFD
+// masterFD                                                           replicaFD
 //    ^                                                                   |
 //    |                                                                   |
 //    |   output to terminal   +--------------+    output from process    |
@@ -104,8 +104,8 @@ type lineDiscipline struct {
 	// masterWaiter is used to wait on the master end of the TTY.
 	masterWaiter waiter.Queue `state:"zerovalue"`
 
-	// slaveWaiter is used to wait on the slave end of the TTY.
-	slaveWaiter waiter.Queue `state:"zerovalue"`
+	// replicaWaiter is used to wait on the replica end of the TTY.
+	replicaWaiter waiter.Queue `state:"zerovalue"`
 }
 
 func newLineDiscipline(termios linux.KernelTermios) *lineDiscipline {
@@ -143,7 +143,7 @@ func (l *lineDiscipline) setTermios(task *kernel.Task, args arch.SyscallArgument
 		l.inQueue.pushWaitBufLocked(l)
 		l.inQueue.readable = true
 		l.inQueue.mu.Unlock()
-		l.slaveWaiter.Notify(waiter.EventIn)
+		l.replicaWaiter.Notify(waiter.EventIn)
 	}
 
 	return 0, err
@@ -169,7 +169,7 @@ func (l *lineDiscipline) masterReadiness() waiter.EventMask {
 	return l.inQueue.writeReadiness(&linux.MasterTermios) | l.outQueue.readReadiness(&linux.MasterTermios)
 }
 
-func (l *lineDiscipline) slaveReadiness() waiter.EventMask {
+func (l *lineDiscipline) replicaReadiness() waiter.EventMask {
 	l.termiosMu.RLock()
 	defer l.termiosMu.RUnlock()
 	return l.outQueue.writeReadiness(&l.termios) | l.inQueue.readReadiness(&l.termios)
@@ -189,7 +189,7 @@ func (l *lineDiscipline) inputQueueRead(ctx context.Context, dst usermem.IOSeque
 	if n > 0 {
 		l.masterWaiter.Notify(waiter.EventOut)
 		if pushed {
-			l.slaveWaiter.Notify(waiter.EventIn)
+			l.replicaWaiter.Notify(waiter.EventIn)
 		}
 		return n, nil
 	}
@@ -204,7 +204,7 @@ func (l *lineDiscipline) inputQueueWrite(ctx context.Context, src usermem.IOSequ
 		return 0, err
 	}
 	if n > 0 {
-		l.slaveWaiter.Notify(waiter.EventIn)
+		l.replicaWaiter.Notify(waiter.EventIn)
 		return n, nil
 	}
 	return 0, syserror.ErrWouldBlock
@@ -222,7 +222,7 @@ func (l *lineDiscipline) outputQueueRead(ctx context.Context, dst usermem.IOSequ
 		return 0, err
 	}
 	if n > 0 {
-		l.slaveWaiter.Notify(waiter.EventOut)
+		l.replicaWaiter.Notify(waiter.EventOut)
 		if pushed {
 			l.masterWaiter.Notify(waiter.EventIn)
 		}
diff --git a/pkg/sentry/fs/tty/master.go b/pkg/sentry/fs/tty/master.go
index 1754572976..bebf90ffa8 100644
--- a/pkg/sentry/fs/tty/master.go
+++ b/pkg/sentry/fs/tty/master.go
@@ -166,11 +166,11 @@ func (mf *masterFileOperations) Ioctl(ctx context.Context, _ *fs.File, io userme
 		return 0, mf.t.ld.outputQueueReadSize(t, args)
 	case linux.TCGETS:
 		// N.B. TCGETS on the master actually returns the configuration
-		// of the slave end.
+		// of the replica end.
 		return mf.t.ld.getTermios(t, args)
 	case linux.TCSETS:
 		// N.B. TCSETS on the master actually affects the configuration
-		// of the slave end.
+		// of the replica end.
 		return mf.t.ld.setTermios(t, args)
 	case linux.TCSETSW:
 		// TODO(b/29356795): This should drain the output queue first.
diff --git a/pkg/sentry/fs/tty/queue.go b/pkg/sentry/fs/tty/queue.go
index f0a18c75a6..e070a1b718 100644
--- a/pkg/sentry/fs/tty/queue.go
+++ b/pkg/sentry/fs/tty/queue.go
@@ -34,7 +34,7 @@ import (
 const waitBufMaxBytes = 131072
 
 // queue represents one of the input or output queues between a pty master and
-// slave. Bytes written to a queue are added to the read buffer until it is
+// replica. Bytes written to a queue are added to the read buffer until it is
 // full, at which point they are written to the wait buffer. Bytes are
 // processed (i.e. undergo termios transformations) as they are added to the
 // read buffer. The read buffer is readable when its length is nonzero and
diff --git a/pkg/sentry/fs/tty/slave.go b/pkg/sentry/fs/tty/replica.go
similarity index 70%
rename from pkg/sentry/fs/tty/slave.go
rename to pkg/sentry/fs/tty/replica.go
index 933d2c3ff8..cb6cd68642 100644
--- a/pkg/sentry/fs/tty/slave.go
+++ b/pkg/sentry/fs/tty/replica.go
@@ -29,11 +29,11 @@ import (
 
 // LINT.IfChange
 
-// slaveInodeOperations are the fs.InodeOperations for the slave end of the
+// replicaInodeOperations are the fs.InodeOperations for the replica end of the
 // Terminal (pts file).
 //
 // +stateify savable
-type slaveInodeOperations struct {
+type replicaInodeOperations struct {
 	fsutil.SimpleFileInode
 
 	// d is the containing dir.
@@ -43,13 +43,13 @@ type slaveInodeOperations struct {
 	t *Terminal
 }
 
-var _ fs.InodeOperations = (*slaveInodeOperations)(nil)
+var _ fs.InodeOperations = (*replicaInodeOperations)(nil)
 
-// newSlaveInode creates an fs.Inode for the slave end of a terminal.
+// newReplicaInode creates an fs.Inode for the replica end of a terminal.
 //
-// newSlaveInode takes ownership of t.
-func newSlaveInode(ctx context.Context, d *dirInodeOperations, t *Terminal, owner fs.FileOwner, p fs.FilePermissions) *fs.Inode {
-	iops := &slaveInodeOperations{
+// newReplicaInode takes ownership of t.
+func newReplicaInode(ctx context.Context, d *dirInodeOperations, t *Terminal, owner fs.FileOwner, p fs.FilePermissions) *fs.Inode {
+	iops := &replicaInodeOperations{
 		SimpleFileInode: *fsutil.NewSimpleFileInode(ctx, owner, p, linux.DEVPTS_SUPER_MAGIC),
 		d:               d,
 		t:               t,
@@ -66,18 +66,18 @@ func newSlaveInode(ctx context.Context, d *dirInodeOperations, t *Terminal, owne
 		Type:    fs.CharacterDevice,
 		// See fs/devpts/inode.c:devpts_fill_super.
 		BlockSize:       1024,
-		DeviceFileMajor: linux.UNIX98_PTY_SLAVE_MAJOR,
+		DeviceFileMajor: linux.UNIX98_PTY_REPLICA_MAJOR,
 		DeviceFileMinor: t.n,
 	})
 }
 
 // Release implements fs.InodeOperations.Release.
-func (si *slaveInodeOperations) Release(ctx context.Context) {
+func (si *replicaInodeOperations) Release(ctx context.Context) {
 	si.t.DecRef(ctx)
 }
 
 // Truncate implements fs.InodeOperations.Truncate.
-func (*slaveInodeOperations) Truncate(context.Context, *fs.Inode, int64) error {
+func (*replicaInodeOperations) Truncate(context.Context, *fs.Inode, int64) error {
 	return nil
 }
 
@@ -85,14 +85,15 @@ func (*slaveInodeOperations) Truncate(context.Context, *fs.Inode, int64) error {
 //
 // This may race with destruction of the terminal. If the terminal is gone, it
 // returns ENOENT.
-func (si *slaveInodeOperations) GetFile(ctx context.Context, d *fs.Dirent, flags fs.FileFlags) (*fs.File, error) {
-	return fs.NewFile(ctx, d, flags, &slaveFileOperations{si: si}), nil
+func (si *replicaInodeOperations) GetFile(ctx context.Context, d *fs.Dirent, flags fs.FileFlags) (*fs.File, error) {
+	return fs.NewFile(ctx, d, flags, &replicaFileOperations{si: si}), nil
 }
 
-// slaveFileOperations are the fs.FileOperations for the slave end of a terminal.
+// replicaFileOperations are the fs.FileOperations for the replica end of a
+// terminal.
 //
 // +stateify savable
-type slaveFileOperations struct {
+type replicaFileOperations struct {
 	fsutil.FilePipeSeek             `state:"nosave"`
 	fsutil.FileNotDirReaddir        `state:"nosave"`
 	fsutil.FileNoFsync              `state:"nosave"`
@@ -102,42 +103,42 @@ type slaveFileOperations struct {
 	fsutil.FileUseInodeUnstableAttr `state:"nosave"`
 
 	// si is the inode operations.
-	si *slaveInodeOperations
+	si *replicaInodeOperations
 }
 
-var _ fs.FileOperations = (*slaveFileOperations)(nil)
+var _ fs.FileOperations = (*replicaFileOperations)(nil)
 
 // Release implements fs.FileOperations.Release.
-func (sf *slaveFileOperations) Release(context.Context) {
+func (sf *replicaFileOperations) Release(context.Context) {
 }
 
 // EventRegister implements waiter.Waitable.EventRegister.
-func (sf *slaveFileOperations) EventRegister(e *waiter.Entry, mask waiter.EventMask) {
-	sf.si.t.ld.slaveWaiter.EventRegister(e, mask)
+func (sf *replicaFileOperations) EventRegister(e *waiter.Entry, mask waiter.EventMask) {
+	sf.si.t.ld.replicaWaiter.EventRegister(e, mask)
 }
 
 // EventUnregister implements waiter.Waitable.EventUnregister.
-func (sf *slaveFileOperations) EventUnregister(e *waiter.Entry) {
-	sf.si.t.ld.slaveWaiter.EventUnregister(e)
+func (sf *replicaFileOperations) EventUnregister(e *waiter.Entry) {
+	sf.si.t.ld.replicaWaiter.EventUnregister(e)
 }
 
 // Readiness implements waiter.Waitable.Readiness.
-func (sf *slaveFileOperations) Readiness(mask waiter.EventMask) waiter.EventMask {
-	return sf.si.t.ld.slaveReadiness()
+func (sf *replicaFileOperations) Readiness(mask waiter.EventMask) waiter.EventMask {
+	return sf.si.t.ld.replicaReadiness()
 }
 
 // Read implements fs.FileOperations.Read.
-func (sf *slaveFileOperations) Read(ctx context.Context, _ *fs.File, dst usermem.IOSequence, _ int64) (int64, error) {
+func (sf *replicaFileOperations) Read(ctx context.Context, _ *fs.File, dst usermem.IOSequence, _ int64) (int64, error) {
 	return sf.si.t.ld.inputQueueRead(ctx, dst)
 }
 
 // Write implements fs.FileOperations.Write.
-func (sf *slaveFileOperations) Write(ctx context.Context, _ *fs.File, src usermem.IOSequence, _ int64) (int64, error) {
+func (sf *replicaFileOperations) Write(ctx context.Context, _ *fs.File, src usermem.IOSequence, _ int64) (int64, error) {
 	return sf.si.t.ld.outputQueueWrite(ctx, src)
 }
 
 // Ioctl implements fs.FileOperations.Ioctl.
-func (sf *slaveFileOperations) Ioctl(ctx context.Context, _ *fs.File, io usermem.IO, args arch.SyscallArguments) (uintptr, error) {
+func (sf *replicaFileOperations) Ioctl(ctx context.Context, _ *fs.File, io usermem.IO, args arch.SyscallArguments) (uintptr, error) {
 	t := kernel.TaskFromContext(ctx)
 	if t == nil {
 		// ioctl(2) may only be called from a task goroutine.
@@ -182,4 +183,4 @@ func (sf *slaveFileOperations) Ioctl(ctx context.Context, _ *fs.File, io usermem
 	}
 }
 
-// LINT.ThenChange(../../fsimpl/devpts/slave.go)
+// LINT.ThenChange(../../fsimpl/devpts/replica.go)
diff --git a/pkg/sentry/fs/tty/terminal.go b/pkg/sentry/fs/tty/terminal.go
index 56b59632d3..c9dbf1f3b3 100644
--- a/pkg/sentry/fs/tty/terminal.go
+++ b/pkg/sentry/fs/tty/terminal.go
@@ -44,19 +44,19 @@ type Terminal struct {
 	// this terminal. This field is immutable.
 	masterKTTY *kernel.TTY
 
-	// slaveKTTY contains the controlling process of the slave end of this
+	// replicaKTTY contains the controlling process of the replica end of this
 	// terminal. This field is immutable.
-	slaveKTTY *kernel.TTY
+	replicaKTTY *kernel.TTY
 }
 
 func newTerminal(ctx context.Context, d *dirInodeOperations, n uint32) *Terminal {
-	termios := linux.DefaultSlaveTermios
+	termios := linux.DefaultReplicaTermios
 	t := Terminal{
-		d:          d,
-		n:          n,
-		ld:         newLineDiscipline(termios),
-		masterKTTY: &kernel.TTY{Index: n},
-		slaveKTTY:  &kernel.TTY{Index: n},
+		d:           d,
+		n:           n,
+		ld:          newLineDiscipline(termios),
+		masterKTTY:  &kernel.TTY{Index: n},
+		replicaKTTY: &kernel.TTY{Index: n},
 	}
 	t.EnableLeakCheck("tty.Terminal")
 	return &t
@@ -123,7 +123,7 @@ func (tm *Terminal) tty(isMaster bool) *kernel.TTY {
 	if isMaster {
 		return tm.masterKTTY
 	}
-	return tm.slaveKTTY
+	return tm.replicaKTTY
 }
 
 // LINT.ThenChange(../../fsimpl/devpts/terminal.go)
diff --git a/pkg/sentry/fs/tty/tty_test.go b/pkg/sentry/fs/tty/tty_test.go
index 2cbc056783..49edee83d1 100644
--- a/pkg/sentry/fs/tty/tty_test.go
+++ b/pkg/sentry/fs/tty/tty_test.go
@@ -22,8 +22,8 @@ import (
 	"gvisor.dev/gvisor/pkg/usermem"
 )
 
-func TestSimpleMasterToSlave(t *testing.T) {
-	ld := newLineDiscipline(linux.DefaultSlaveTermios)
+func TestSimpleMasterToReplica(t *testing.T) {
+	ld := newLineDiscipline(linux.DefaultReplicaTermios)
 	ctx := contexttest.Context(t)
 	inBytes := []byte("hello, tty\n")
 	src := usermem.BytesIOSequence(inBytes)
diff --git a/pkg/sentry/fsimpl/devpts/BUILD b/pkg/sentry/fsimpl/devpts/BUILD
index 3e8c5e3fdb..ac48ab34b6 100644
--- a/pkg/sentry/fsimpl/devpts/BUILD
+++ b/pkg/sentry/fsimpl/devpts/BUILD
@@ -21,8 +21,8 @@ go_library(
         "line_discipline.go",
         "master.go",
         "queue.go",
+        "replica.go",
         "root_inode_refs.go",
-        "slave.go",
         "terminal.go",
     ],
     visibility = ["//pkg/sentry:internal"],
diff --git a/pkg/sentry/fsimpl/devpts/devpts.go b/pkg/sentry/fsimpl/devpts/devpts.go
index 57580f4d47..dcf1ee25bc 100644
--- a/pkg/sentry/fsimpl/devpts/devpts.go
+++ b/pkg/sentry/fsimpl/devpts/devpts.go
@@ -79,7 +79,7 @@ func (fstype FilesystemType) newFilesystem(vfsObj *vfs.VirtualFilesystem, creds
 
 	// Construct the root directory. This is always inode id 1.
 	root := &rootInode{
-		slaves: make(map[uint32]*slaveInode),
+		replicas: make(map[uint32]*replicaInode),
 	}
 	root.InodeAttrs.Init(creds, linux.UNNAMED_MAJOR, devMinor, 1, linux.ModeDirectory|0555)
 	root.OrderedChildren.Init(kernfs.OrderedChildrenOptions{})
@@ -133,8 +133,8 @@ type rootInode struct {
 	// mu protects the fields below.
 	mu sync.Mutex
 
-	// slaves maps pty ids to slave inodes.
-	slaves map[uint32]*slaveInode
+	// replicas maps pty ids to replica inodes.
+	replicas map[uint32]*replicaInode
 
 	// nextIdx is the next pty index to use. Must be accessed atomically.
 	//
@@ -154,22 +154,22 @@ func (i *rootInode) allocateTerminal(creds *auth.Credentials) (*Terminal, error)
 	idx := i.nextIdx
 	i.nextIdx++
 
-	// Sanity check that slave with idx does not exist.
-	if _, ok := i.slaves[idx]; ok {
+	// Sanity check that replica with idx does not exist.
+	if _, ok := i.replicas[idx]; ok {
 		panic(fmt.Sprintf("pty index collision; index %d already exists", idx))
 	}
 
-	// Create the new terminal and slave.
+	// Create the new terminal and replica.
 	t := newTerminal(idx)
-	slave := &slaveInode{
+	replica := &replicaInode{
 		root: i,
 		t:    t,
 	}
 	// Linux always uses pty index + 3 as the inode id. See
 	// fs/devpts/inode.c:devpts_pty_new().
-	slave.InodeAttrs.Init(creds, i.InodeAttrs.DevMajor(), i.InodeAttrs.DevMinor(), uint64(idx+3), linux.ModeCharacterDevice|0600)
-	slave.dentry.Init(slave)
-	i.slaves[idx] = slave
+	replica.InodeAttrs.Init(creds, i.InodeAttrs.DevMajor(), i.InodeAttrs.DevMinor(), uint64(idx+3), linux.ModeCharacterDevice|0600)
+	replica.dentry.Init(replica)
+	i.replicas[idx] = replica
 
 	return t, nil
 }
@@ -179,11 +179,11 @@ func (i *rootInode) masterClose(t *Terminal) {
 	i.mu.Lock()
 	defer i.mu.Unlock()
 
-	// Sanity check that slave with idx exists.
-	if _, ok := i.slaves[t.n]; !ok {
+	// Sanity check that replica with idx exists.
+	if _, ok := i.replicas[t.n]; !ok {
 		panic(fmt.Sprintf("pty with index %d does not exist", t.n))
 	}
-	delete(i.slaves, t.n)
+	delete(i.replicas, t.n)
 }
 
 // Open implements kernfs.Inode.Open.
@@ -205,7 +205,7 @@ func (i *rootInode) Lookup(ctx context.Context, name string) (*vfs.Dentry, error
 	}
 	i.mu.Lock()
 	defer i.mu.Unlock()
-	if si, ok := i.slaves[uint32(idx)]; ok {
+	if si, ok := i.replicas[uint32(idx)]; ok {
 		si.dentry.IncRef()
 		return si.dentry.VFSDentry(), nil
 
@@ -217,8 +217,8 @@ func (i *rootInode) Lookup(ctx context.Context, name string) (*vfs.Dentry, error
 func (i *rootInode) IterDirents(ctx context.Context, cb vfs.IterDirentsCallback, offset, relOffset int64) (int64, error) {
 	i.mu.Lock()
 	defer i.mu.Unlock()
-	ids := make([]int, 0, len(i.slaves))
-	for id := range i.slaves {
+	ids := make([]int, 0, len(i.replicas))
+	for id := range i.replicas {
 		ids = append(ids, int(id))
 	}
 	sort.Ints(ids)
@@ -226,7 +226,7 @@ func (i *rootInode) IterDirents(ctx context.Context, cb vfs.IterDirentsCallback,
 		dirent := vfs.Dirent{
 			Name:    strconv.FormatUint(uint64(id), 10),
 			Type:    linux.DT_CHR,
-			Ino:     i.slaves[uint32(id)].InodeAttrs.Ino(),
+			Ino:     i.replicas[uint32(id)].InodeAttrs.Ino(),
 			NextOff: offset + 1,
 		}
 		if err := cb.Handle(dirent); err != nil {
diff --git a/pkg/sentry/fsimpl/devpts/devpts_test.go b/pkg/sentry/fsimpl/devpts/devpts_test.go
index b7c1490470..448390cfe4 100644
--- a/pkg/sentry/fsimpl/devpts/devpts_test.go
+++ b/pkg/sentry/fsimpl/devpts/devpts_test.go
@@ -22,8 +22,8 @@ import (
 	"gvisor.dev/gvisor/pkg/usermem"
 )
 
-func TestSimpleMasterToSlave(t *testing.T) {
-	ld := newLineDiscipline(linux.DefaultSlaveTermios)
+func TestSimpleMasterToReplica(t *testing.T) {
+	ld := newLineDiscipline(linux.DefaultReplicaTermios)
 	ctx := contexttest.Context(t)
 	inBytes := []byte("hello, tty\n")
 	src := usermem.BytesIOSequence(inBytes)
diff --git a/pkg/sentry/fsimpl/devpts/line_discipline.go b/pkg/sentry/fsimpl/devpts/line_discipline.go
index b954c1ba1c..e6b0e81cf7 100644
--- a/pkg/sentry/fsimpl/devpts/line_discipline.go
+++ b/pkg/sentry/fsimpl/devpts/line_discipline.go
@@ -42,7 +42,7 @@ const (
 )
 
 // lineDiscipline dictates how input and output are handled between the
-// pseudoterminal (pty) master and slave. It can be configured to alter I/O,
+// pseudoterminal (pty) master and replica. It can be configured to alter I/O,
 // modify control characters (e.g. Ctrl-C for SIGINT), etc. The following man
 // pages are good resources for how to affect the line discipline:
 //
@@ -53,8 +53,8 @@ const (
 //
 // lineDiscipline has a simple structure but supports a multitude of options
 // (see the above man pages). It consists of two queues of bytes: one from the
-// terminal master to slave (the input queue) and one from slave to master (the
-// output queue). When bytes are written to one end of the pty, the line
+// terminal master to replica (the input queue) and one from replica to master
+// (the output queue). When bytes are written to one end of the pty, the line
 // discipline reads the bytes, modifies them or takes special action if
 // required, and enqueues them to be read by the other end of the pty:
 //
@@ -63,7 +63,7 @@ const (
 //    |   (inputQueueWrite)     +-------------+     (inputQueueRead)      |
 //    |                                                                   |
 //    |                                                                   v
-// masterFD                                                            slaveFD
+// masterFD                                                           replicaFD
 //    ^                                                                   |
 //    |                                                                   |
 //    |   output to terminal   +--------------+    output from process    |
@@ -102,8 +102,8 @@ type lineDiscipline struct {
 	// masterWaiter is used to wait on the master end of the TTY.
 	masterWaiter waiter.Queue `state:"zerovalue"`
 
-	// slaveWaiter is used to wait on the slave end of the TTY.
-	slaveWaiter waiter.Queue `state:"zerovalue"`
+	// replicaWaiter is used to wait on the replica end of the TTY.
+	replicaWaiter waiter.Queue `state:"zerovalue"`
 }
 
 func newLineDiscipline(termios linux.KernelTermios) *lineDiscipline {
@@ -141,7 +141,7 @@ func (l *lineDiscipline) setTermios(task *kernel.Task, args arch.SyscallArgument
 		l.inQueue.pushWaitBufLocked(l)
 		l.inQueue.readable = true
 		l.inQueue.mu.Unlock()
-		l.slaveWaiter.Notify(waiter.EventIn)
+		l.replicaWaiter.Notify(waiter.EventIn)
 	}
 
 	return 0, err
@@ -167,7 +167,7 @@ func (l *lineDiscipline) masterReadiness() waiter.EventMask {
 	return l.inQueue.writeReadiness(&linux.MasterTermios) | l.outQueue.readReadiness(&linux.MasterTermios)
 }
 
-func (l *lineDiscipline) slaveReadiness() waiter.EventMask {
+func (l *lineDiscipline) replicaReadiness() waiter.EventMask {
 	l.termiosMu.RLock()
 	defer l.termiosMu.RUnlock()
 	return l.outQueue.writeReadiness(&l.termios) | l.inQueue.readReadiness(&l.termios)
@@ -187,7 +187,7 @@ func (l *lineDiscipline) inputQueueRead(ctx context.Context, dst usermem.IOSeque
 	if n > 0 {
 		l.masterWaiter.Notify(waiter.EventOut)
 		if pushed {
-			l.slaveWaiter.Notify(waiter.EventIn)
+			l.replicaWaiter.Notify(waiter.EventIn)
 		}
 		return n, nil
 	}
@@ -202,7 +202,7 @@ func (l *lineDiscipline) inputQueueWrite(ctx context.Context, src usermem.IOSequ
 		return 0, err
 	}
 	if n > 0 {
-		l.slaveWaiter.Notify(waiter.EventIn)
+		l.replicaWaiter.Notify(waiter.EventIn)
 		return n, nil
 	}
 	return 0, syserror.ErrWouldBlock
@@ -220,7 +220,7 @@ func (l *lineDiscipline) outputQueueRead(ctx context.Context, dst usermem.IOSequ
 		return 0, err
 	}
 	if n > 0 {
-		l.slaveWaiter.Notify(waiter.EventOut)
+		l.replicaWaiter.Notify(waiter.EventOut)
 		if pushed {
 			l.masterWaiter.Notify(waiter.EventIn)
 		}
diff --git a/pkg/sentry/fsimpl/devpts/master.go b/pkg/sentry/fsimpl/devpts/master.go
index 3422db6a4d..d07e1ded8d 100644
--- a/pkg/sentry/fsimpl/devpts/master.go
+++ b/pkg/sentry/fsimpl/devpts/master.go
@@ -145,11 +145,11 @@ func (mfd *masterFileDescription) Ioctl(ctx context.Context, io usermem.IO, args
 		return 0, mfd.t.ld.outputQueueReadSize(t, io, args)
 	case linux.TCGETS:
 		// N.B. TCGETS on the master actually returns the configuration
-		// of the slave end.
+		// of the replica end.
 		return mfd.t.ld.getTermios(t, args)
 	case linux.TCSETS:
 		// N.B. TCSETS on the master actually affects the configuration
-		// of the slave end.
+		// of the replica end.
 		return mfd.t.ld.setTermios(t, args)
 	case linux.TCSETSW:
 		// TODO(b/29356795): This should drain the output queue first.
diff --git a/pkg/sentry/fsimpl/devpts/queue.go b/pkg/sentry/fsimpl/devpts/queue.go
index 08eca2589b..ca36b66e9e 100644
--- a/pkg/sentry/fsimpl/devpts/queue.go
+++ b/pkg/sentry/fsimpl/devpts/queue.go
@@ -32,7 +32,7 @@ import (
 const waitBufMaxBytes = 131072
 
 // queue represents one of the input or output queues between a pty master and
-// slave. Bytes written to a queue are added to the read buffer until it is
+// replica. Bytes written to a queue are added to the read buffer until it is
 // full, at which point they are written to the wait buffer. Bytes are
 // processed (i.e. undergo termios transformations) as they are added to the
 // read buffer. The read buffer is readable when its length is nonzero and
diff --git a/pkg/sentry/fsimpl/devpts/slave.go b/pkg/sentry/fsimpl/devpts/replica.go
similarity index 69%
rename from pkg/sentry/fsimpl/devpts/slave.go
rename to pkg/sentry/fsimpl/devpts/replica.go
index 5f4b474b32..1f99f4b4dd 100644
--- a/pkg/sentry/fsimpl/devpts/slave.go
+++ b/pkg/sentry/fsimpl/devpts/replica.go
@@ -29,8 +29,8 @@ import (
 	"gvisor.dev/gvisor/tools/go_marshal/primitive"
 )
 
-// slaveInode is the inode for the slave end of the Terminal.
-type slaveInode struct {
+// replicaInode is the inode for the replica end of the Terminal.
+type replicaInode struct {
 	implStatFS
 	kernfs.InodeAttrs
 	kernfs.InodeNoopRefCount
@@ -49,12 +49,12 @@ type slaveInode struct {
 	t *Terminal
 }
 
-var _ kernfs.Inode = (*slaveInode)(nil)
+var _ kernfs.Inode = (*replicaInode)(nil)
 
 // Open implements kernfs.Inode.Open.
-func (si *slaveInode) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
+func (si *replicaInode) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
 	si.IncRef()
-	fd := &slaveFileDescription{
+	fd := &replicaFileDescription{
 		inode: si,
 	}
 	fd.LockFD.Init(&si.locks)
@@ -67,76 +67,76 @@ func (si *slaveInode) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs
 }
 
 // Valid implements kernfs.Inode.Valid.
-func (si *slaveInode) Valid(context.Context) bool {
-	// Return valid if the slave still exists.
+func (si *replicaInode) Valid(context.Context) bool {
+	// Return valid if the replica still exists.
 	si.root.mu.Lock()
 	defer si.root.mu.Unlock()
-	_, ok := si.root.slaves[si.t.n]
+	_, ok := si.root.replicas[si.t.n]
 	return ok
 }
 
 // Stat implements kernfs.Inode.Stat.
-func (si *slaveInode) Stat(ctx context.Context, vfsfs *vfs.Filesystem, opts vfs.StatOptions) (linux.Statx, error) {
+func (si *replicaInode) Stat(ctx context.Context, vfsfs *vfs.Filesystem, opts vfs.StatOptions) (linux.Statx, error) {
 	statx, err := si.InodeAttrs.Stat(ctx, vfsfs, opts)
 	if err != nil {
 		return linux.Statx{}, err
 	}
 	statx.Blksize = 1024
-	statx.RdevMajor = linux.UNIX98_PTY_SLAVE_MAJOR
+	statx.RdevMajor = linux.UNIX98_PTY_REPLICA_MAJOR
 	statx.RdevMinor = si.t.n
 	return statx, nil
 }
 
 // SetStat implements kernfs.Inode.SetStat
-func (si *slaveInode) SetStat(ctx context.Context, vfsfs *vfs.Filesystem, creds *auth.Credentials, opts vfs.SetStatOptions) error {
+func (si *replicaInode) SetStat(ctx context.Context, vfsfs *vfs.Filesystem, creds *auth.Credentials, opts vfs.SetStatOptions) error {
 	if opts.Stat.Mask&linux.STATX_SIZE != 0 {
 		return syserror.EINVAL
 	}
 	return si.InodeAttrs.SetStat(ctx, vfsfs, creds, opts)
 }
 
-type slaveFileDescription struct {
+type replicaFileDescription struct {
 	vfsfd vfs.FileDescription
 	vfs.FileDescriptionDefaultImpl
 	vfs.LockFD
 
-	inode *slaveInode
+	inode *replicaInode
 }
 
-var _ vfs.FileDescriptionImpl = (*slaveFileDescription)(nil)
+var _ vfs.FileDescriptionImpl = (*replicaFileDescription)(nil)
 
 // Release implements fs.FileOperations.Release.
-func (sfd *slaveFileDescription) Release(ctx context.Context) {
+func (sfd *replicaFileDescription) Release(ctx context.Context) {
 	sfd.inode.DecRef(ctx)
 }
 
 // EventRegister implements waiter.Waitable.EventRegister.
-func (sfd *slaveFileDescription) EventRegister(e *waiter.Entry, mask waiter.EventMask) {
-	sfd.inode.t.ld.slaveWaiter.EventRegister(e, mask)
+func (sfd *replicaFileDescription) EventRegister(e *waiter.Entry, mask waiter.EventMask) {
+	sfd.inode.t.ld.replicaWaiter.EventRegister(e, mask)
 }
 
 // EventUnregister implements waiter.Waitable.EventUnregister.
-func (sfd *slaveFileDescription) EventUnregister(e *waiter.Entry) {
-	sfd.inode.t.ld.slaveWaiter.EventUnregister(e)
+func (sfd *replicaFileDescription) EventUnregister(e *waiter.Entry) {
+	sfd.inode.t.ld.replicaWaiter.EventUnregister(e)
 }
 
 // Readiness implements waiter.Waitable.Readiness.
-func (sfd *slaveFileDescription) Readiness(mask waiter.EventMask) waiter.EventMask {
-	return sfd.inode.t.ld.slaveReadiness()
+func (sfd *replicaFileDescription) Readiness(mask waiter.EventMask) waiter.EventMask {
+	return sfd.inode.t.ld.replicaReadiness()
 }
 
 // Read implements vfs.FileDescriptionImpl.Read.
-func (sfd *slaveFileDescription) Read(ctx context.Context, dst usermem.IOSequence, _ vfs.ReadOptions) (int64, error) {
+func (sfd *replicaFileDescription) Read(ctx context.Context, dst usermem.IOSequence, _ vfs.ReadOptions) (int64, error) {
 	return sfd.inode.t.ld.inputQueueRead(ctx, dst)
 }
 
 // Write implements vfs.FileDescriptionImpl.Write.
-func (sfd *slaveFileDescription) Write(ctx context.Context, src usermem.IOSequence, _ vfs.WriteOptions) (int64, error) {
+func (sfd *replicaFileDescription) Write(ctx context.Context, src usermem.IOSequence, _ vfs.WriteOptions) (int64, error) {
 	return sfd.inode.t.ld.outputQueueWrite(ctx, src)
 }
 
 // Ioctl implements vfs.FileDescriptionImpl.Ioctl.
-func (sfd *slaveFileDescription) Ioctl(ctx context.Context, io usermem.IO, args arch.SyscallArguments) (uintptr, error) {
+func (sfd *replicaFileDescription) Ioctl(ctx context.Context, io usermem.IO, args arch.SyscallArguments) (uintptr, error) {
 	t := kernel.TaskFromContext(ctx)
 	if t == nil {
 		// ioctl(2) may only be called from a task goroutine.
@@ -182,24 +182,24 @@ func (sfd *slaveFileDescription) Ioctl(ctx context.Context, io usermem.IO, args
 }
 
 // SetStat implements vfs.FileDescriptionImpl.SetStat.
-func (sfd *slaveFileDescription) SetStat(ctx context.Context, opts vfs.SetStatOptions) error {
+func (sfd *replicaFileDescription) SetStat(ctx context.Context, opts vfs.SetStatOptions) error {
 	creds := auth.CredentialsFromContext(ctx)
 	fs := sfd.vfsfd.VirtualDentry().Mount().Filesystem()
 	return sfd.inode.SetStat(ctx, fs, creds, opts)
 }
 
 // Stat implements vfs.FileDescriptionImpl.Stat.
-func (sfd *slaveFileDescription) Stat(ctx context.Context, opts vfs.StatOptions) (linux.Statx, error) {
+func (sfd *replicaFileDescription) Stat(ctx context.Context, opts vfs.StatOptions) (linux.Statx, error) {
 	fs := sfd.vfsfd.VirtualDentry().Mount().Filesystem()
 	return sfd.inode.Stat(ctx, fs, opts)
 }
 
 // LockPOSIX implements vfs.FileDescriptionImpl.LockPOSIX.
-func (sfd *slaveFileDescription) LockPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, start, length uint64, whence int16, block fslock.Blocker) error {
+func (sfd *replicaFileDescription) LockPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, start, length uint64, whence int16, block fslock.Blocker) error {
 	return sfd.Locks().LockPOSIX(ctx, &sfd.vfsfd, uid, t, start, length, whence, block)
 }
 
 // UnlockPOSIX implements vfs.FileDescriptionImpl.UnlockPOSIX.
-func (sfd *slaveFileDescription) UnlockPOSIX(ctx context.Context, uid fslock.UniqueID, start, length uint64, whence int16) error {
+func (sfd *replicaFileDescription) UnlockPOSIX(ctx context.Context, uid fslock.UniqueID, start, length uint64, whence int16) error {
 	return sfd.Locks().UnlockPOSIX(ctx, &sfd.vfsfd, uid, start, length, whence)
 }
diff --git a/pkg/sentry/fsimpl/devpts/terminal.go b/pkg/sentry/fsimpl/devpts/terminal.go
index e88eb63607..731955d627 100644
--- a/pkg/sentry/fsimpl/devpts/terminal.go
+++ b/pkg/sentry/fsimpl/devpts/terminal.go
@@ -36,18 +36,18 @@ type Terminal struct {
 	// this terminal. This field is immutable.
 	masterKTTY *kernel.TTY
 
-	// slaveKTTY contains the controlling process of the slave end of this
+	// replicaKTTY contains the controlling process of the replica end of this
 	// terminal. This field is immutable.
-	slaveKTTY *kernel.TTY
+	replicaKTTY *kernel.TTY
 }
 
 func newTerminal(n uint32) *Terminal {
-	termios := linux.DefaultSlaveTermios
+	termios := linux.DefaultReplicaTermios
 	t := Terminal{
-		n:          n,
-		ld:         newLineDiscipline(termios),
-		masterKTTY: &kernel.TTY{Index: n},
-		slaveKTTY:  &kernel.TTY{Index: n},
+		n:           n,
+		ld:          newLineDiscipline(termios),
+		masterKTTY:  &kernel.TTY{Index: n},
+		replicaKTTY: &kernel.TTY{Index: n},
 	}
 	return &t
 }
@@ -113,5 +113,5 @@ func (tm *Terminal) tty(isMaster bool) *kernel.TTY {
 	if isMaster {
 		return tm.masterKTTY
 	}
-	return tm.slaveKTTY
+	return tm.replicaKTTY
 }
diff --git a/pkg/sentry/fsimpl/host/host.go b/pkg/sentry/fsimpl/host/host.go
index 7561f821cb..1bd0e4ee85 100644
--- a/pkg/sentry/fsimpl/host/host.go
+++ b/pkg/sentry/fsimpl/host/host.go
@@ -496,7 +496,7 @@ func (i *inode) open(ctx context.Context, d *vfs.Dentry, mnt *vfs.Mount, flags u
 		if i.isTTY {
 			fd := &TTYFileDescription{
 				fileDescription: fileDescription{inode: i},
-				termios:         linux.DefaultSlaveTermios,
+				termios:         linux.DefaultReplicaTermios,
 			}
 			fd.LockFD.Init(&i.locks)
 			vfsfd := &fd.vfsfd
diff --git a/runsc/boot/fs.go b/runsc/boot/fs.go
index 163265afe1..ea0461a3dd 100644
--- a/runsc/boot/fs.go
+++ b/runsc/boot/fs.go
@@ -453,17 +453,17 @@ func (m *mountHint) isSupported() bool {
 func (m *mountHint) checkCompatible(mount specs.Mount) error {
 	// Remove options that don't affect to mount's behavior.
 	masterOpts := filterUnsupportedOptions(m.mount)
-	slaveOpts := filterUnsupportedOptions(mount)
+	replicaOpts := filterUnsupportedOptions(mount)
 
-	if len(masterOpts) != len(slaveOpts) {
-		return fmt.Errorf("mount options in annotations differ from container mount, annotation: %s, mount: %s", masterOpts, slaveOpts)
+	if len(masterOpts) != len(replicaOpts) {
+		return fmt.Errorf("mount options in annotations differ from container mount, annotation: %s, mount: %s", masterOpts, replicaOpts)
 	}
 
 	sort.Strings(masterOpts)
-	sort.Strings(slaveOpts)
+	sort.Strings(replicaOpts)
 	for i, opt := range masterOpts {
-		if opt != slaveOpts[i] {
-			return fmt.Errorf("mount options in annotations differ from container mount, annotation: %s, mount: %s", masterOpts, slaveOpts)
+		if opt != replicaOpts[i] {
+			return fmt.Errorf("mount options in annotations differ from container mount, annotation: %s, mount: %s", masterOpts, replicaOpts)
 		}
 	}
 	return nil
diff --git a/runsc/cmd/exec.go b/runsc/cmd/exec.go
index 600876a274..775ed4b432 100644
--- a/runsc/cmd/exec.go
+++ b/runsc/cmd/exec.go
@@ -220,7 +220,7 @@ func (ex *Exec) execChildAndWait(waitStatus *syscall.WaitStatus) subcommands.Exi
 	cmd.Stderr = os.Stderr
 
 	// If the console control socket file is provided, then create a new
-	// pty master/slave pair and set the TTY on the sandbox process.
+	// pty master/replica pair and set the TTY on the sandbox process.
 	if ex.consoleSocket != "" {
 		// Create a new TTY pair and send the master on the provided socket.
 		tty, err := console.NewWithSocket(ex.consoleSocket)
@@ -229,7 +229,7 @@ func (ex *Exec) execChildAndWait(waitStatus *syscall.WaitStatus) subcommands.Exi
 		}
 		defer tty.Close()
 
-		// Set stdio to the new TTY slave.
+		// Set stdio to the new TTY replica.
 		cmd.Stdin = tty
 		cmd.Stdout = tty
 		cmd.Stderr = tty
diff --git a/runsc/console/console.go b/runsc/console/console.go
index 64b23639a5..dbb88e117b 100644
--- a/runsc/console/console.go
+++ b/runsc/console/console.go
@@ -24,11 +24,11 @@ import (
 	"golang.org/x/sys/unix"
 )
 
-// NewWithSocket creates pty master/slave pair, sends the master FD over the given
-// socket, and returns the slave.
+// NewWithSocket creates pty master/replica pair, sends the master FD over the given
+// socket, and returns the replica.
 func NewWithSocket(socketPath string) (*os.File, error) {
-	// Create a new pty master and slave.
-	ptyMaster, ptySlave, err := pty.Open()
+	// Create a new pty master and replica.
+	ptyMaster, ptyReplica, err := pty.Open()
 	if err != nil {
 		return nil, fmt.Errorf("opening pty: %v", err)
 	}
@@ -37,18 +37,18 @@ func NewWithSocket(socketPath string) (*os.File, error) {
 	// Get a connection to the socket path.
 	conn, err := net.Dial("unix", socketPath)
 	if err != nil {
-		ptySlave.Close()
+		ptyReplica.Close()
 		return nil, fmt.Errorf("dialing socket %q: %v", socketPath, err)
 	}
 	defer conn.Close()
 	uc, ok := conn.(*net.UnixConn)
 	if !ok {
-		ptySlave.Close()
+		ptyReplica.Close()
 		return nil, fmt.Errorf("connection is not a UnixConn: %T", conn)
 	}
 	socket, err := uc.File()
 	if err != nil {
-		ptySlave.Close()
+		ptyReplica.Close()
 		return nil, fmt.Errorf("getting file for unix socket %v: %v", uc, err)
 	}
 	defer socket.Close()
@@ -56,8 +56,8 @@ func NewWithSocket(socketPath string) (*os.File, error) {
 	// Send the master FD over the connection.
 	msg := unix.UnixRights(int(ptyMaster.Fd()))
 	if err := unix.Sendmsg(int(socket.Fd()), []byte("pty-master"), msg, nil, 0); err != nil {
-		ptySlave.Close()
+		ptyReplica.Close()
 		return nil, fmt.Errorf("sending console over unix socket %q: %v", socketPath, err)
 	}
-	return ptySlave, nil
+	return ptyReplica, nil
 }
diff --git a/runsc/container/console_test.go b/runsc/container/console_test.go
index 995d4e267a..4228399b87 100644
--- a/runsc/container/console_test.go
+++ b/runsc/container/console_test.go
@@ -185,14 +185,14 @@ func TestJobControlSignalExec(t *testing.T) {
 		t.Fatalf("error starting container: %v", err)
 	}
 
-	// Create a pty master/slave. The slave will be passed to the exec
+	// Create a pty master/replica. The replica will be passed to the exec
 	// process.
-	ptyMaster, ptySlave, err := pty.Open()
+	ptyMaster, ptyReplica, err := pty.Open()
 	if err != nil {
 		t.Fatalf("error opening pty: %v", err)
 	}
 	defer ptyMaster.Close()
-	defer ptySlave.Close()
+	defer ptyReplica.Close()
 
 	// Exec bash and attach a terminal. Note that occasionally /bin/sh
 	// may be a different shell or have a different configuration (such
@@ -203,9 +203,9 @@ func TestJobControlSignalExec(t *testing.T) {
 		// Don't let bash execute from profile or rc files, otherwise
 		// our PID counts get messed up.
 		Argv: []string{"/bin/bash", "--noprofile", "--norc"},
-		// Pass the pty slave as FD 0, 1, and 2.
+		// Pass the pty replica as FD 0, 1, and 2.
 		FilePayload: urpc.FilePayload{
-			Files: []*os.File{ptySlave, ptySlave, ptySlave},
+			Files: []*os.File{ptyReplica, ptyReplica, ptyReplica},
 		},
 		StdioIsPty: true,
 	}
diff --git a/runsc/container/multi_container_test.go b/runsc/container/multi_container_test.go
index 1beea123fc..da1694280e 100644
--- a/runsc/container/multi_container_test.go
+++ b/runsc/container/multi_container_test.go
@@ -1360,7 +1360,7 @@ func TestMultiContainerSharedMountRestart(t *testing.T) {
 }
 
 // Test that unsupported pod mounts options are ignored when matching master and
-// slave mounts.
+// replica mounts.
 func TestMultiContainerSharedMountUnsupportedOptions(t *testing.T) {
 	for name, conf := range configsWithVFS2(t, all...) {
 		t.Run(name, func(t *testing.T) {
diff --git a/runsc/sandbox/sandbox.go b/runsc/sandbox/sandbox.go
index a339937fb6..a8f4f64a53 100644
--- a/runsc/sandbox/sandbox.go
+++ b/runsc/sandbox/sandbox.go
@@ -478,10 +478,10 @@ func (s *Sandbox) createSandboxProcess(conf *config.Config, args *Args, startSyn
 	cmd.Stderr = nil
 
 	// If the console control socket file is provided, then create a new
-	// pty master/slave pair and set the TTY on the sandbox process.
+	// pty master/replica pair and set the TTY on the sandbox process.
 	if args.Spec.Process.Terminal && args.ConsoleSocket != "" {
 		// console.NewWithSocket will send the master on the given
-		// socket, and return the slave.
+		// socket, and return the replica.
 		tty, err := console.NewWithSocket(args.ConsoleSocket)
 		if err != nil {
 			return fmt.Errorf("setting up console with socket %q: %v", args.ConsoleSocket, err)
diff --git a/test/syscalls/linux/pty.cc b/test/syscalls/linux/pty.cc
index 2e4ab6ca86..0b174e2be2 100644
--- a/test/syscalls/linux/pty.cc
+++ b/test/syscalls/linux/pty.cc
@@ -71,7 +71,7 @@ constexpr absl::Duration kTimeout = absl::Seconds(20);
 // The maximum line size in bytes returned per read from a pty file.
 constexpr int kMaxLineSize = 4096;
 
-constexpr char kMainPath[] = "/dev/ptmx";
+constexpr char kMasterPath[] = "/dev/ptmx";
 
 // glibc defines its own, different, version of struct termios. We care about
 // what the kernel does, not glibc.
@@ -388,22 +388,22 @@ PosixErrorOr<size_t> PollAndReadFd(int fd, void* buf, size_t count,
 TEST(PtyTrunc, Truncate) {
   // Opening PTYs with O_TRUNC shouldn't cause an error, but calls to
   // (f)truncate should.
-  FileDescriptor main =
-      ASSERT_NO_ERRNO_AND_VALUE(Open(kMainPath, O_RDWR | O_TRUNC));
-  int n = ASSERT_NO_ERRNO_AND_VALUE(ReplicaID(main));
+  FileDescriptor master =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(kMasterPath, O_RDWR | O_TRUNC));
+  int n = ASSERT_NO_ERRNO_AND_VALUE(ReplicaID(master));
   std::string spath = absl::StrCat("/dev/pts/", n);
   FileDescriptor replica =
       ASSERT_NO_ERRNO_AND_VALUE(Open(spath, O_RDWR | O_NONBLOCK | O_TRUNC));
 
-  EXPECT_THAT(truncate(kMainPath, 0), SyscallFailsWithErrno(EINVAL));
+  EXPECT_THAT(truncate(kMasterPath, 0), SyscallFailsWithErrno(EINVAL));
   EXPECT_THAT(truncate(spath.c_str(), 0), SyscallFailsWithErrno(EINVAL));
-  EXPECT_THAT(ftruncate(main.get(), 0), SyscallFailsWithErrno(EINVAL));
+  EXPECT_THAT(ftruncate(master.get(), 0), SyscallFailsWithErrno(EINVAL));
   EXPECT_THAT(ftruncate(replica.get(), 0), SyscallFailsWithErrno(EINVAL));
 }
 
-TEST(BasicPtyTest, StatUnopenedMain) {
+TEST(BasicPtyTest, StatUnopenedMaster) {
   struct stat s;
-  ASSERT_THAT(stat(kMainPath, &s), SyscallSucceeds());
+  ASSERT_THAT(stat(kMasterPath, &s), SyscallSucceeds());
 
   EXPECT_EQ(s.st_rdev, makedev(TTYAUX_MAJOR, kPtmxMinor));
   EXPECT_EQ(s.st_size, 0);
@@ -454,41 +454,41 @@ void ExpectReadable(const FileDescriptor& fd, int expected, char* buf) {
   EXPECT_EQ(expected, n);
 }
 
-TEST(BasicPtyTest, OpenMainReplica) {
-  FileDescriptor main = ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/ptmx", O_RDWR));
-  FileDescriptor replica = ASSERT_NO_ERRNO_AND_VALUE(OpenReplica(main));
+TEST(BasicPtyTest, OpenMasterReplica) {
+  FileDescriptor master = ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/ptmx", O_RDWR));
+  FileDescriptor replica = ASSERT_NO_ERRNO_AND_VALUE(OpenReplica(master));
 }
 
-// The replica entry in /dev/pts/ disappears when the main is closed, even if
+// The replica entry in /dev/pts/ disappears when the master is closed, even if
 // the replica is still open.
-TEST(BasicPtyTest, ReplicaEntryGoneAfterMainClose) {
-  FileDescriptor main = ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/ptmx", O_RDWR));
-  FileDescriptor replica = ASSERT_NO_ERRNO_AND_VALUE(OpenReplica(main));
+TEST(BasicPtyTest, ReplicaEntryGoneAfterMasterClose) {
+  FileDescriptor master = ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/ptmx", O_RDWR));
+  FileDescriptor replica = ASSERT_NO_ERRNO_AND_VALUE(OpenReplica(master));
 
   // Get pty index.
   int index = -1;
-  ASSERT_THAT(ioctl(main.get(), TIOCGPTN, &index), SyscallSucceeds());
+  ASSERT_THAT(ioctl(master.get(), TIOCGPTN, &index), SyscallSucceeds());
 
   std::string path = absl::StrCat("/dev/pts/", index);
 
   struct stat st;
   EXPECT_THAT(stat(path.c_str(), &st), SyscallSucceeds());
 
-  main.reset();
+  master.reset();
 
   EXPECT_THAT(stat(path.c_str(), &st), SyscallFailsWithErrno(ENOENT));
 }
 
 TEST(BasicPtyTest, Getdents) {
-  FileDescriptor main1 = ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/ptmx", O_RDWR));
+  FileDescriptor master1 = ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/ptmx", O_RDWR));
   int index1 = -1;
-  ASSERT_THAT(ioctl(main1.get(), TIOCGPTN, &index1), SyscallSucceeds());
-  FileDescriptor replica1 = ASSERT_NO_ERRNO_AND_VALUE(OpenReplica(main1));
+  ASSERT_THAT(ioctl(master1.get(), TIOCGPTN, &index1), SyscallSucceeds());
+  FileDescriptor replica1 = ASSERT_NO_ERRNO_AND_VALUE(OpenReplica(master1));
 
-  FileDescriptor main2 = ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/ptmx", O_RDWR));
+  FileDescriptor master2 = ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/ptmx", O_RDWR));
   int index2 = -1;
-  ASSERT_THAT(ioctl(main2.get(), TIOCGPTN, &index2), SyscallSucceeds());
-  FileDescriptor replica2 = ASSERT_NO_ERRNO_AND_VALUE(OpenReplica(main2));
+  ASSERT_THAT(ioctl(master2.get(), TIOCGPTN, &index2), SyscallSucceeds());
+  FileDescriptor replica2 = ASSERT_NO_ERRNO_AND_VALUE(OpenReplica(master2));
 
   // The directory contains ptmx, index1, and index2. (Plus any additional PTYs
   // unrelated to this test.)
@@ -498,9 +498,9 @@ TEST(BasicPtyTest, Getdents) {
   EXPECT_THAT(contents, Contains(absl::StrCat(index1)));
   EXPECT_THAT(contents, Contains(absl::StrCat(index2)));
 
-  main2.reset();
+  master2.reset();
 
-  // The directory contains ptmx and index1, but not index2 since the main is
+  // The directory contains ptmx and index1, but not index2 since the master is
   // closed. (Plus any additional PTYs unrelated to this test.)
 
   contents = ASSERT_NO_ERRNO_AND_VALUE(ListDir("/dev/pts/", true));
@@ -519,8 +519,8 @@ TEST(BasicPtyTest, Getdents) {
 class PtyTest : public ::testing::Test {
  protected:
   void SetUp() override {
-    main_ = ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/ptmx", O_RDWR | O_NONBLOCK));
-    replica_ = ASSERT_NO_ERRNO_AND_VALUE(OpenReplica(main_));
+    master_ = ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/ptmx", O_RDWR | O_NONBLOCK));
+    replica_ = ASSERT_NO_ERRNO_AND_VALUE(OpenReplica(master_));
   }
 
   void DisableCanonical() {
@@ -537,21 +537,22 @@ class PtyTest : public ::testing::Test {
     EXPECT_THAT(ioctl(replica_.get(), TCSETS, &t), SyscallSucceeds());
   }
 
-  // Main and replica ends of the PTY. Non-blocking.
-  FileDescriptor main_;
+  // Master and replica ends of the PTY. Non-blocking.
+  FileDescriptor master_;
   FileDescriptor replica_;
 };
 
-// Main to replica sanity test.
-TEST_F(PtyTest, WriteMainToReplica) {
-  // N.B. by default, the replica reads nothing until the main writes a newline.
+// Master to replica sanity test.
+TEST_F(PtyTest, WriteMasterToReplica) {
+  // N.B. by default, the replica reads nothing until the master writes a
+  // newline.
   constexpr char kBuf[] = "hello\n";
 
-  EXPECT_THAT(WriteFd(main_.get(), kBuf, sizeof(kBuf) - 1),
+  EXPECT_THAT(WriteFd(master_.get(), kBuf, sizeof(kBuf) - 1),
               SyscallSucceedsWithValue(sizeof(kBuf) - 1));
 
-  // Linux moves data from the main to the replica via async work scheduled via
-  // tty_flip_buffer_push. Since it is asynchronous, the data may not be
+  // Linux moves data from the master to the replica via async work scheduled
+  // via tty_flip_buffer_push. Since it is asynchronous, the data may not be
   // available for reading immediately. Instead we must poll and assert that it
   // becomes available "soon".
 
@@ -561,63 +562,63 @@ TEST_F(PtyTest, WriteMainToReplica) {
   EXPECT_EQ(memcmp(buf, kBuf, sizeof(kBuf)), 0);
 }
 
-// Replica to main sanity test.
-TEST_F(PtyTest, WriteReplicaToMain) {
-  // N.B. by default, the main reads nothing until the replica writes a newline,
-  // and the main gets a carriage return.
+// Replica to master sanity test.
+TEST_F(PtyTest, WriteReplicaToMaster) {
+  // N.B. by default, the master reads nothing until the replica writes a
+  // newline, and the master gets a carriage return.
   constexpr char kInput[] = "hello\n";
   constexpr char kExpected[] = "hello\r\n";
 
   EXPECT_THAT(WriteFd(replica_.get(), kInput, sizeof(kInput) - 1),
               SyscallSucceedsWithValue(sizeof(kInput) - 1));
 
-  // Linux moves data from the main to the replica via async work scheduled via
-  // tty_flip_buffer_push. Since it is asynchronous, the data may not be
+  // Linux moves data from the master to the replica via async work scheduled
+  // via tty_flip_buffer_push. Since it is asynchronous, the data may not be
   // available for reading immediately. Instead we must poll and assert that it
   // becomes available "soon".
 
   char buf[sizeof(kExpected)] = {};
-  ExpectReadable(main_, sizeof(buf) - 1, buf);
+  ExpectReadable(master_, sizeof(buf) - 1, buf);
 
   EXPECT_EQ(memcmp(buf, kExpected, sizeof(kExpected)), 0);
 }
 
 TEST_F(PtyTest, WriteInvalidUTF8) {
   char c = 0xff;
-  ASSERT_THAT(syscall(__NR_write, main_.get(), &c, sizeof(c)),
+  ASSERT_THAT(syscall(__NR_write, master_.get(), &c, sizeof(c)),
               SyscallSucceedsWithValue(sizeof(c)));
 }
 
-// Both the main and replica report the standard default termios settings.
+// Both the master and replica report the standard default termios settings.
 //
-// Note that TCGETS on the main actually redirects to the replica (see comment
-// on MainTermiosUnchangable).
+// Note that TCGETS on the master actually redirects to the replica (see comment
+// on MasterTermiosUnchangable).
 TEST_F(PtyTest, DefaultTermios) {
   struct kernel_termios t = {};
   EXPECT_THAT(ioctl(replica_.get(), TCGETS, &t), SyscallSucceeds());
   EXPECT_EQ(t, DefaultTermios());
 
-  EXPECT_THAT(ioctl(main_.get(), TCGETS, &t), SyscallSucceeds());
+  EXPECT_THAT(ioctl(master_.get(), TCGETS, &t), SyscallSucceeds());
   EXPECT_EQ(t, DefaultTermios());
 }
 
-// Changing termios from the main actually affects the replica.
+// Changing termios from the master actually affects the replica.
 //
-// TCSETS on the main actually redirects to the replica (see comment on
-// MainTermiosUnchangable).
+// TCSETS on the master actually redirects to the replica (see comment on
+// MasterTermiosUnchangable).
 TEST_F(PtyTest, TermiosAffectsReplica) {
-  struct kernel_termios main_termios = {};
-  EXPECT_THAT(ioctl(main_.get(), TCGETS, &main_termios), SyscallSucceeds());
-  main_termios.c_lflag ^= ICANON;
-  EXPECT_THAT(ioctl(main_.get(), TCSETS, &main_termios), SyscallSucceeds());
+  struct kernel_termios master_termios = {};
+  EXPECT_THAT(ioctl(master_.get(), TCGETS, &master_termios), SyscallSucceeds());
+  master_termios.c_lflag ^= ICANON;
+  EXPECT_THAT(ioctl(master_.get(), TCSETS, &master_termios), SyscallSucceeds());
 
   struct kernel_termios replica_termios = {};
   EXPECT_THAT(ioctl(replica_.get(), TCGETS, &replica_termios),
               SyscallSucceeds());
-  EXPECT_EQ(main_termios, replica_termios);
+  EXPECT_EQ(master_termios, replica_termios);
 }
 
-// The main end of the pty has termios:
+// The master end of the pty has termios:
 //
 // struct kernel_termios t = {
 //   .c_iflag = 0;
@@ -629,25 +630,25 @@ TEST_F(PtyTest, TermiosAffectsReplica) {
 //
 // (From drivers/tty/pty.c:unix98_pty_init)
 //
-// All termios control ioctls on the main actually redirect to the replica
+// All termios control ioctls on the master actually redirect to the replica
 // (drivers/tty/tty_ioctl.c:tty_mode_ioctl), making it impossible to change the
-// main termios.
+// master termios.
 //
 // Verify this by setting ICRNL (which rewrites input \r to \n) and verify that
-// it has no effect on the main.
-TEST_F(PtyTest, MainTermiosUnchangable) {
-  struct kernel_termios main_termios = {};
-  EXPECT_THAT(ioctl(main_.get(), TCGETS, &main_termios), SyscallSucceeds());
-  main_termios.c_lflag |= ICRNL;
-  EXPECT_THAT(ioctl(main_.get(), TCSETS, &main_termios), SyscallSucceeds());
+// it has no effect on the master.
+TEST_F(PtyTest, MasterTermiosUnchangable) {
+  struct kernel_termios master_termios = {};
+  EXPECT_THAT(ioctl(master_.get(), TCGETS, &master_termios), SyscallSucceeds());
+  master_termios.c_lflag |= ICRNL;
+  EXPECT_THAT(ioctl(master_.get(), TCSETS, &master_termios), SyscallSucceeds());
 
   char c = '\r';
   ASSERT_THAT(WriteFd(replica_.get(), &c, 1), SyscallSucceedsWithValue(1));
 
-  ExpectReadable(main_, 1, &c);
+  ExpectReadable(master_, 1, &c);
   EXPECT_EQ(c, '\r');  // ICRNL had no effect!
 
-  ExpectFinished(main_);
+  ExpectFinished(master_);
 }
 
 // ICRNL rewrites input \r to \n.
@@ -658,7 +659,7 @@ TEST_F(PtyTest, TermiosICRNL) {
   ASSERT_THAT(ioctl(replica_.get(), TCSETS, &t), SyscallSucceeds());
 
   char c = '\r';
-  ASSERT_THAT(WriteFd(main_.get(), &c, 1), SyscallSucceedsWithValue(1));
+  ASSERT_THAT(WriteFd(master_.get(), &c, 1), SyscallSucceedsWithValue(1));
 
   ExpectReadable(replica_, 1, &c);
   EXPECT_EQ(c, '\n');
@@ -678,7 +679,7 @@ TEST_F(PtyTest, TermiosONLCR) {
 
   // Extra byte for NUL for EXPECT_STREQ.
   char buf[3] = {};
-  ExpectReadable(main_, 2, buf);
+  ExpectReadable(master_, 2, buf);
   EXPECT_STREQ(buf, "\r\n");
 
   ExpectFinished(replica_);
@@ -691,7 +692,7 @@ TEST_F(PtyTest, TermiosIGNCR) {
   ASSERT_THAT(ioctl(replica_.get(), TCSETS, &t), SyscallSucceeds());
 
   char c = '\r';
-  ASSERT_THAT(WriteFd(main_.get(), &c, 1), SyscallSucceedsWithValue(1));
+  ASSERT_THAT(WriteFd(master_.get(), &c, 1), SyscallSucceedsWithValue(1));
 
   // Nothing to read.
   ASSERT_THAT(PollAndReadFd(replica_.get(), &c, 1, kTimeout),
@@ -725,18 +726,18 @@ TEST_F(PtyTest, TermiosPollReplica) {
   absl::SleepFor(absl::Seconds(1));
 
   char s[] = "foo\n";
-  ASSERT_THAT(WriteFd(main_.get(), s, strlen(s) + 1), SyscallSucceeds());
+  ASSERT_THAT(WriteFd(master_.get(), s, strlen(s) + 1), SyscallSucceeds());
 }
 
-// Test that we can successfully poll for readable data from the main.
-TEST_F(PtyTest, TermiosPollMain) {
+// Test that we can successfully poll for readable data from the master.
+TEST_F(PtyTest, TermiosPollMaster) {
   struct kernel_termios t = DefaultTermios();
   t.c_iflag |= IGNCR;
   t.c_lflag &= ~ICANON;  // for byte-by-byte reading.
-  ASSERT_THAT(ioctl(main_.get(), TCSETS, &t), SyscallSucceeds());
+  ASSERT_THAT(ioctl(master_.get(), TCSETS, &t), SyscallSucceeds());
 
   absl::Notification notify;
-  int mfd = main_.get();
+  int mfd = master_.get();
   ScopedThread th([mfd, &notify]() {
     notify.Notify();
 
@@ -765,7 +766,7 @@ TEST_F(PtyTest, TermiosINLCR) {
   ASSERT_THAT(ioctl(replica_.get(), TCSETS, &t), SyscallSucceeds());
 
   char c = '\n';
-  ASSERT_THAT(WriteFd(main_.get(), &c, 1), SyscallSucceedsWithValue(1));
+  ASSERT_THAT(WriteFd(master_.get(), &c, 1), SyscallSucceedsWithValue(1));
 
   ExpectReadable(replica_, 1, &c);
   EXPECT_EQ(c, '\r');
@@ -784,7 +785,7 @@ TEST_F(PtyTest, TermiosONOCR) {
   ASSERT_THAT(WriteFd(replica_.get(), &c, 1), SyscallSucceedsWithValue(1));
 
   // Nothing to read.
-  ASSERT_THAT(PollAndReadFd(main_.get(), &c, 1, kTimeout),
+  ASSERT_THAT(PollAndReadFd(master_.get(), &c, 1, kTimeout),
               PosixErrorIs(ETIMEDOUT, ::testing::StrEq("Poll timed out")));
 
   // This time the column is greater than 0, so we should be able to read the CR
@@ -795,17 +796,17 @@ TEST_F(PtyTest, TermiosONOCR) {
               SyscallSucceedsWithValue(kInputSize));
 
   char buf[kInputSize] = {};
-  ExpectReadable(main_, kInputSize, buf);
+  ExpectReadable(master_, kInputSize, buf);
 
   EXPECT_EQ(memcmp(buf, kInput, kInputSize), 0);
 
-  ExpectFinished(main_);
+  ExpectFinished(master_);
 
   // Terminal should be at column 0 again, so no CR can be read.
   ASSERT_THAT(WriteFd(replica_.get(), &c, 1), SyscallSucceedsWithValue(1));
 
   // Nothing to read.
-  ASSERT_THAT(PollAndReadFd(main_.get(), &c, 1, kTimeout),
+  ASSERT_THAT(PollAndReadFd(master_.get(), &c, 1, kTimeout),
               PosixErrorIs(ETIMEDOUT, ::testing::StrEq("Poll timed out")));
 }
 
@@ -819,10 +820,10 @@ TEST_F(PtyTest, TermiosOCRNL) {
   char c = '\r';
   ASSERT_THAT(WriteFd(replica_.get(), &c, 1), SyscallSucceedsWithValue(1));
 
-  ExpectReadable(main_, 1, &c);
+  ExpectReadable(master_, 1, &c);
   EXPECT_EQ(c, '\n');
 
-  ExpectFinished(main_);
+  ExpectFinished(master_);
 }
 
 // Tests that VEOL is disabled when we start, and that we can set it to enable
@@ -830,7 +831,7 @@ TEST_F(PtyTest, TermiosOCRNL) {
 TEST_F(PtyTest, VEOLTermination) {
   // Write a few bytes ending with '\0', and confirm that we can't read.
   constexpr char kInput[] = "hello";
-  ASSERT_THAT(WriteFd(main_.get(), kInput, sizeof(kInput)),
+  ASSERT_THAT(WriteFd(master_.get(), kInput, sizeof(kInput)),
               SyscallSucceedsWithValue(sizeof(kInput)));
   char buf[sizeof(kInput)] = {};
   ASSERT_THAT(PollAndReadFd(replica_.get(), buf, sizeof(kInput), kTimeout),
@@ -841,7 +842,7 @@ TEST_F(PtyTest, VEOLTermination) {
   struct kernel_termios t = DefaultTermios();
   t.c_cc[VEOL] = delim;
   ASSERT_THAT(ioctl(replica_.get(), TCSETS, &t), SyscallSucceeds());
-  ASSERT_THAT(WriteFd(main_.get(), &delim, 1), SyscallSucceedsWithValue(1));
+  ASSERT_THAT(WriteFd(master_.get(), &delim, 1), SyscallSucceedsWithValue(1));
 
   // Now we can read, as sending EOL caused the line to become available.
   ExpectReadable(replica_, sizeof(kInput), buf);
@@ -861,7 +862,7 @@ TEST_F(PtyTest, CanonBigWrite) {
   char input[kWriteLen];
   memset(input, 'M', kWriteLen - 1);
   input[kWriteLen - 1] = '\n';
-  ASSERT_THAT(WriteFd(main_.get(), input, kWriteLen),
+  ASSERT_THAT(WriteFd(master_.get(), input, kWriteLen),
               SyscallSucceedsWithValue(kWriteLen));
 
   // We can read the line.
@@ -877,7 +878,7 @@ TEST_F(PtyTest, SwitchCanonToNoncanon) {
   // Write a few bytes without a terminating character, switch to noncanonical
   // mode, and read them.
   constexpr char kInput[] = "hello";
-  ASSERT_THAT(WriteFd(main_.get(), kInput, sizeof(kInput)),
+  ASSERT_THAT(WriteFd(master_.get(), kInput, sizeof(kInput)),
               SyscallSucceedsWithValue(sizeof(kInput)));
 
   // Nothing available yet.
@@ -896,7 +897,7 @@ TEST_F(PtyTest, SwitchCanonToNoncanon) {
 TEST_F(PtyTest, SwitchCanonToNonCanonNewline) {
   // Write a few bytes with a terminating character.
   constexpr char kInput[] = "hello\n";
-  ASSERT_THAT(WriteFd(main_.get(), kInput, sizeof(kInput)),
+  ASSERT_THAT(WriteFd(master_.get(), kInput, sizeof(kInput)),
               SyscallSucceedsWithValue(sizeof(kInput)));
 
   DisableCanonical();
@@ -916,12 +917,12 @@ TEST_F(PtyTest, SwitchNoncanonToCanonNewlineBig) {
   constexpr int kWriteLen = 4100;
   char input[kWriteLen];
   memset(input, 'M', kWriteLen);
-  ASSERT_THAT(WriteFd(main_.get(), input, kWriteLen),
+  ASSERT_THAT(WriteFd(master_.get(), input, kWriteLen),
               SyscallSucceedsWithValue(kWriteLen));
   // Wait for the input queue to fill.
   ASSERT_NO_ERRNO(WaitUntilReceived(replica_.get(), kMaxLineSize - 1));
   constexpr char delim = '\n';
-  ASSERT_THAT(WriteFd(main_.get(), &delim, 1), SyscallSucceedsWithValue(1));
+  ASSERT_THAT(WriteFd(master_.get(), &delim, 1), SyscallSucceedsWithValue(1));
 
   EnableCanonical();
 
@@ -941,7 +942,7 @@ TEST_F(PtyTest, SwitchNoncanonToCanonNoNewline) {
   // Write a few bytes without a terminating character.
   // mode, and read them.
   constexpr char kInput[] = "hello";
-  ASSERT_THAT(WriteFd(main_.get(), kInput, sizeof(kInput) - 1),
+  ASSERT_THAT(WriteFd(master_.get(), kInput, sizeof(kInput) - 1),
               SyscallSucceedsWithValue(sizeof(kInput) - 1));
 
   ASSERT_NO_ERRNO(WaitUntilReceived(replica_.get(), sizeof(kInput) - 1));
@@ -963,7 +964,7 @@ TEST_F(PtyTest, SwitchNoncanonToCanonNoNewlineBig) {
   constexpr int kWriteLen = 4100;
   char input[kWriteLen];
   memset(input, 'M', kWriteLen);
-  ASSERT_THAT(WriteFd(main_.get(), input, kWriteLen),
+  ASSERT_THAT(WriteFd(master_.get(), input, kWriteLen),
               SyscallSucceedsWithValue(kWriteLen));
 
   ASSERT_NO_ERRNO(WaitUntilReceived(replica_.get(), kMaxLineSize - 1));
@@ -987,12 +988,12 @@ TEST_F(PtyTest, NoncanonBigWrite) {
   for (int i = 0; i < kInputSize; i++) {
     // This makes too many syscalls for save/restore.
     const DisableSave ds;
-    ASSERT_THAT(WriteFd(main_.get(), &kInput, sizeof(kInput)),
+    ASSERT_THAT(WriteFd(master_.get(), &kInput, sizeof(kInput)),
                 SyscallSucceedsWithValue(sizeof(kInput)));
   }
 
   // We should be able to read out everything. Sleep a bit so that Linux has a
-  // chance to move data from the main to the replica.
+  // chance to move data from the master to the replica.
   ASSERT_NO_ERRNO(WaitUntilReceived(replica_.get(), kMaxLineSize - 1));
   for (int i = 0; i < kInputSize; i++) {
     // This makes too many syscalls for save/restore.
@@ -1010,7 +1011,7 @@ TEST_F(PtyTest, NoncanonBigWrite) {
 // Test newline.
 TEST_F(PtyTest, TermiosICANONNewline) {
   char input[3] = {'a', 'b', 'c'};
-  ASSERT_THAT(WriteFd(main_.get(), input, sizeof(input)),
+  ASSERT_THAT(WriteFd(master_.get(), input, sizeof(input)),
               SyscallSucceedsWithValue(sizeof(input)));
 
   // Extra bytes for newline (written later) and NUL for EXPECT_STREQ.
@@ -1021,7 +1022,7 @@ TEST_F(PtyTest, TermiosICANONNewline) {
               PosixErrorIs(ETIMEDOUT, ::testing::StrEq("Poll timed out")));
 
   char delim = '\n';
-  ASSERT_THAT(WriteFd(main_.get(), &delim, 1), SyscallSucceedsWithValue(1));
+  ASSERT_THAT(WriteFd(master_.get(), &delim, 1), SyscallSucceedsWithValue(1));
 
   // Now it is available.
   ASSERT_NO_ERRNO(WaitUntilReceived(replica_.get(), sizeof(input) + 1));
@@ -1036,7 +1037,7 @@ TEST_F(PtyTest, TermiosICANONNewline) {
 // Test EOF (^D).
 TEST_F(PtyTest, TermiosICANONEOF) {
   char input[3] = {'a', 'b', 'c'};
-  ASSERT_THAT(WriteFd(main_.get(), input, sizeof(input)),
+  ASSERT_THAT(WriteFd(master_.get(), input, sizeof(input)),
               SyscallSucceedsWithValue(sizeof(input)));
 
   // Extra byte for NUL for EXPECT_STREQ.
@@ -1046,7 +1047,7 @@ TEST_F(PtyTest, TermiosICANONEOF) {
   ASSERT_THAT(PollAndReadFd(replica_.get(), buf, sizeof(input), kTimeout),
               PosixErrorIs(ETIMEDOUT, ::testing::StrEq("Poll timed out")));
   char delim = ControlCharacter('D');
-  ASSERT_THAT(WriteFd(main_.get(), &delim, 1), SyscallSucceedsWithValue(1));
+  ASSERT_THAT(WriteFd(master_.get(), &delim, 1), SyscallSucceedsWithValue(1));
 
   // Now it is available. Note that ^D is not included.
   ExpectReadable(replica_, sizeof(input), buf);
@@ -1069,10 +1070,10 @@ TEST_F(PtyTest, CanonDiscard) {
     // This makes too many syscalls for save/restore.
     const DisableSave ds;
     for (int i = 0; i < kInputSize; i++) {
-      ASSERT_THAT(WriteFd(main_.get(), &kInput, sizeof(kInput)),
+      ASSERT_THAT(WriteFd(master_.get(), &kInput, sizeof(kInput)),
                   SyscallSucceedsWithValue(sizeof(kInput)));
     }
-    ASSERT_THAT(WriteFd(main_.get(), &delim, 1), SyscallSucceedsWithValue(1));
+    ASSERT_THAT(WriteFd(master_.get(), &delim, 1), SyscallSucceedsWithValue(1));
   }
 
   // There should be multiple truncated lines available to read.
@@ -1091,9 +1092,9 @@ TEST_F(PtyTest, CanonMultiline) {
   constexpr char kInput2[] = "BLUE\n";
 
   // Write both lines.
-  ASSERT_THAT(WriteFd(main_.get(), kInput1, sizeof(kInput1) - 1),
+  ASSERT_THAT(WriteFd(master_.get(), kInput1, sizeof(kInput1) - 1),
               SyscallSucceedsWithValue(sizeof(kInput1) - 1));
-  ASSERT_THAT(WriteFd(main_.get(), kInput2, sizeof(kInput2) - 1),
+  ASSERT_THAT(WriteFd(master_.get(), kInput2, sizeof(kInput2) - 1),
               SyscallSucceedsWithValue(sizeof(kInput2) - 1));
 
   // Get the first line.
@@ -1117,9 +1118,9 @@ TEST_F(PtyTest, SwitchNoncanonToCanonMultiline) {
   constexpr char kExpected[] = "GO\nBLUE\n";
 
   // Write both lines.
-  ASSERT_THAT(WriteFd(main_.get(), kInput1, sizeof(kInput1) - 1),
+  ASSERT_THAT(WriteFd(master_.get(), kInput1, sizeof(kInput1) - 1),
               SyscallSucceedsWithValue(sizeof(kInput1) - 1));
-  ASSERT_THAT(WriteFd(main_.get(), kInput2, sizeof(kInput2) - 1),
+  ASSERT_THAT(WriteFd(master_.get(), kInput2, sizeof(kInput2) - 1),
               SyscallSucceedsWithValue(sizeof(kInput2) - 1));
 
   ASSERT_NO_ERRNO(
@@ -1140,7 +1141,7 @@ TEST_F(PtyTest, SwitchTwiceMultiline) {
 
   // Write each line.
   for (const std::string& input : kInputs) {
-    ASSERT_THAT(WriteFd(main_.get(), input.c_str(), input.size()),
+    ASSERT_THAT(WriteFd(master_.get(), input.c_str(), input.size()),
                 SyscallSucceedsWithValue(input.size()));
   }
 
@@ -1162,7 +1163,7 @@ TEST_F(PtyTest, SwitchTwiceMultiline) {
 TEST_F(PtyTest, QueueSize) {
   // Write the line.
   constexpr char kInput1[] = "GO\n";
-  ASSERT_THAT(WriteFd(main_.get(), kInput1, sizeof(kInput1) - 1),
+  ASSERT_THAT(WriteFd(master_.get(), kInput1, sizeof(kInput1) - 1),
               SyscallSucceedsWithValue(sizeof(kInput1) - 1));
   ASSERT_NO_ERRNO(WaitUntilReceived(replica_.get(), sizeof(kInput1) - 1));
 
@@ -1170,7 +1171,7 @@ TEST_F(PtyTest, QueueSize) {
   // readable size.
   char input[kMaxLineSize];
   memset(input, 'M', kMaxLineSize);
-  ASSERT_THAT(WriteFd(main_.get(), input, kMaxLineSize),
+  ASSERT_THAT(WriteFd(master_.get(), input, kMaxLineSize),
               SyscallSucceedsWithValue(kMaxLineSize));
   int inputBufSize = ASSERT_NO_ERRNO_AND_VALUE(
       WaitUntilReceived(replica_.get(), sizeof(kInput1) - 1));
@@ -1192,10 +1193,11 @@ TEST_F(PtyTest, PartialBadBuffer) {
   // Leave only one free byte in the buffer.
   char* bad_buffer = buf + kPageSize - 1;
 
-  // Write to the main.
+  // Write to the master.
   constexpr char kBuf[] = "hello\n";
   constexpr size_t size = sizeof(kBuf) - 1;
-  EXPECT_THAT(WriteFd(main_.get(), kBuf, size), SyscallSucceedsWithValue(size));
+  EXPECT_THAT(WriteFd(master_.get(), kBuf, size),
+              SyscallSucceedsWithValue(size));
 
   // Read from the replica into bad_buffer.
   ASSERT_NO_ERRNO(WaitUntilReceived(replica_.get(), size));
@@ -1207,14 +1209,14 @@ TEST_F(PtyTest, PartialBadBuffer) {
 
 TEST_F(PtyTest, SimpleEcho) {
   constexpr char kInput[] = "Mr. Eko";
-  EXPECT_THAT(WriteFd(main_.get(), kInput, strlen(kInput)),
+  EXPECT_THAT(WriteFd(master_.get(), kInput, strlen(kInput)),
               SyscallSucceedsWithValue(strlen(kInput)));
 
   char buf[100] = {};
-  ExpectReadable(main_, strlen(kInput), buf);
+  ExpectReadable(master_, strlen(kInput), buf);
 
   EXPECT_STREQ(buf, kInput);
-  ExpectFinished(main_);
+  ExpectFinished(master_);
 }
 
 TEST_F(PtyTest, GetWindowSize) {
@@ -1231,16 +1233,17 @@ TEST_F(PtyTest, SetReplicaWindowSize) {
   ASSERT_THAT(ioctl(replica_.get(), TIOCSWINSZ, &ws), SyscallSucceeds());
 
   struct winsize retrieved_ws = {};
-  ASSERT_THAT(ioctl(main_.get(), TIOCGWINSZ, &retrieved_ws), SyscallSucceeds());
+  ASSERT_THAT(ioctl(master_.get(), TIOCGWINSZ, &retrieved_ws),
+              SyscallSucceeds());
   EXPECT_EQ(retrieved_ws.ws_row, kRows);
   EXPECT_EQ(retrieved_ws.ws_col, kCols);
 }
 
-TEST_F(PtyTest, SetMainWindowSize) {
+TEST_F(PtyTest, SetMasterWindowSize) {
   constexpr uint16_t kRows = 343;
   constexpr uint16_t kCols = 2401;
   struct winsize ws = {.ws_row = kRows, .ws_col = kCols};
-  ASSERT_THAT(ioctl(main_.get(), TIOCSWINSZ, &ws), SyscallSucceeds());
+  ASSERT_THAT(ioctl(master_.get(), TIOCSWINSZ, &ws), SyscallSucceeds());
 
   struct winsize retrieved_ws = {};
   ASSERT_THAT(ioctl(replica_.get(), TIOCGWINSZ, &retrieved_ws),
@@ -1252,8 +1255,8 @@ TEST_F(PtyTest, SetMainWindowSize) {
 class JobControlTest : public ::testing::Test {
  protected:
   void SetUp() override {
-    main_ = ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/ptmx", O_RDWR | O_NONBLOCK));
-    replica_ = ASSERT_NO_ERRNO_AND_VALUE(OpenReplica(main_));
+    master_ = ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/ptmx", O_RDWR | O_NONBLOCK));
+    replica_ = ASSERT_NO_ERRNO_AND_VALUE(OpenReplica(master_));
 
     // Make this a session leader, which also drops the controlling terminal.
     // In the gVisor test environment, this test will be run as the session
@@ -1277,15 +1280,15 @@ class JobControlTest : public ::testing::Test {
     return PosixError(wstatus, "process returned");
   }
 
-  // Main and replica ends of the PTY. Non-blocking.
-  FileDescriptor main_;
+  // Master and replica ends of the PTY. Non-blocking.
+  FileDescriptor master_;
   FileDescriptor replica_;
 };
 
-TEST_F(JobControlTest, SetTTYMain) {
+TEST_F(JobControlTest, SetTTYMaster) {
   auto res = RunInChild([=]() {
     TEST_PCHECK(setsid() >= 0);
-    TEST_PCHECK(!ioctl(main_.get(), TIOCSCTTY, 0));
+    TEST_PCHECK(!ioctl(master_.get(), TIOCSCTTY, 0));
   });
   ASSERT_NO_ERRNO(res);
 }
@@ -1360,7 +1363,7 @@ TEST_F(JobControlTest, ReleaseWrongTTY) {
   auto res = RunInChild([=]() {
     TEST_PCHECK(setsid() >= 0);
     TEST_PCHECK(!ioctl(replica_.get(), TIOCSCTTY, 0));
-    TEST_PCHECK(ioctl(main_.get(), TIOCNOTTY) < 0 && errno == ENOTTY);
+    TEST_PCHECK(ioctl(master_.get(), TIOCNOTTY) < 0 && errno == ENOTTY);
   });
   ASSERT_NO_ERRNO(res);
 }
diff --git a/test/syscalls/linux/pty_root.cc b/test/syscalls/linux/pty_root.cc
index a534cf0bba..4ac648729c 100644
--- a/test/syscalls/linux/pty_root.cc
+++ b/test/syscalls/linux/pty_root.cc
@@ -48,9 +48,9 @@ TEST(JobControlRootTest, StealTTY) {
     ASSERT_THAT(setsid(), SyscallSucceeds());
   }
 
-  FileDescriptor main =
+  FileDescriptor master =
       ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/ptmx", O_RDWR | O_NONBLOCK));
-  FileDescriptor replica = ASSERT_NO_ERRNO_AND_VALUE(OpenReplica(main));
+  FileDescriptor replica = ASSERT_NO_ERRNO_AND_VALUE(OpenReplica(master));
 
   // Make replica the controlling terminal.
   ASSERT_THAT(ioctl(replica.get(), TIOCSCTTY, 0), SyscallSucceeds());
diff --git a/test/util/pty_util.cc b/test/util/pty_util.cc
index 5fa6229220..2cf0bea742 100644
--- a/test/util/pty_util.cc
+++ b/test/util/pty_util.cc
@@ -23,25 +23,25 @@
 namespace gvisor {
 namespace testing {
 
-PosixErrorOr<FileDescriptor> OpenReplica(const FileDescriptor& main) {
-  PosixErrorOr<int> n = ReplicaID(main);
+PosixErrorOr<FileDescriptor> OpenReplica(const FileDescriptor& master) {
+  PosixErrorOr<int> n = ReplicaID(master);
   if (!n.ok()) {
     return PosixErrorOr<FileDescriptor>(n.error());
   }
   return Open(absl::StrCat("/dev/pts/", n.ValueOrDie()), O_RDWR | O_NONBLOCK);
 }
 
-PosixErrorOr<int> ReplicaID(const FileDescriptor& main) {
+PosixErrorOr<int> ReplicaID(const FileDescriptor& master) {
   // Get pty index.
   int n;
-  int ret = ioctl(main.get(), TIOCGPTN, &n);
+  int ret = ioctl(master.get(), TIOCGPTN, &n);
   if (ret < 0) {
     return PosixError(errno, "ioctl(TIOCGPTN) failed");
   }
 
   // Unlock pts.
   int unlock = 0;
-  ret = ioctl(main.get(), TIOCSPTLCK, &unlock);
+  ret = ioctl(master.get(), TIOCSPTLCK, &unlock);
   if (ret < 0) {
     return PosixError(errno, "ioctl(TIOSPTLCK) failed");
   }
diff --git a/test/util/pty_util.h b/test/util/pty_util.h
index dff6adab53..ed7658868f 100644
--- a/test/util/pty_util.h
+++ b/test/util/pty_util.h
@@ -21,11 +21,11 @@
 namespace gvisor {
 namespace testing {
 
-// Opens the replica end of the passed main as R/W and nonblocking.
-PosixErrorOr<FileDescriptor> OpenReplica(const FileDescriptor& main);
+// Opens the replica end of the passed master as R/W and nonblocking.
+PosixErrorOr<FileDescriptor> OpenReplica(const FileDescriptor& master);
 
-// Get the number of the replica end of the main.
-PosixErrorOr<int> ReplicaID(const FileDescriptor& main);
+// Get the number of the replica end of the master.
+PosixErrorOr<int> ReplicaID(const FileDescriptor& master);
 
 }  // namespace testing
 }  // namespace gvisor

From 574743ed7a1bcc6affdead0d19afb4c4ebc29362 Mon Sep 17 00:00:00 2001
From: Dean Deng <deandeng@google.com>
Date: Tue, 1 Sep 2020 17:19:26 -0700
Subject: [PATCH 130/211] Test opening file handles with different permissions.

These were problematic for vfs2 gofers before correctly implementing separate
read/write handles.

PiperOrigin-RevId: 329613261
---
 test/syscalls/linux/open_create.cc | 116 ++++++++++++++++++++++++++++-
 1 file changed, 113 insertions(+), 3 deletions(-)

diff --git a/test/syscalls/linux/open_create.cc b/test/syscalls/linux/open_create.cc
index 51eacf3f23..78c36f98f8 100644
--- a/test/syscalls/linux/open_create.cc
+++ b/test/syscalls/linux/open_create.cc
@@ -88,21 +88,21 @@ TEST(CreateTest, CreateExclusively) {
               SyscallFailsWithErrno(EEXIST));
 }
 
-TEST(CreateTeast, CreatWithOTrunc) {
+TEST(CreateTest, CreatWithOTrunc) {
   std::string dirpath = JoinPath(GetAbsoluteTestTmpdir(), "truncd");
   ASSERT_THAT(mkdir(dirpath.c_str(), 0777), SyscallSucceeds());
   ASSERT_THAT(open(dirpath.c_str(), O_CREAT | O_TRUNC, 0666),
               SyscallFailsWithErrno(EISDIR));
 }
 
-TEST(CreateTeast, CreatDirWithOTruncAndReadOnly) {
+TEST(CreateTest, CreatDirWithOTruncAndReadOnly) {
   std::string dirpath = JoinPath(GetAbsoluteTestTmpdir(), "truncd");
   ASSERT_THAT(mkdir(dirpath.c_str(), 0777), SyscallSucceeds());
   ASSERT_THAT(open(dirpath.c_str(), O_CREAT | O_TRUNC | O_RDONLY, 0666),
               SyscallFailsWithErrno(EISDIR));
 }
 
-TEST(CreateTeast, CreatFileWithOTruncAndReadOnly) {
+TEST(CreateTest, CreatFileWithOTruncAndReadOnly) {
   std::string dirpath = JoinPath(GetAbsoluteTestTmpdir(), "truncfile");
   int dirfd;
   ASSERT_THAT(dirfd = open(dirpath.c_str(), O_RDWR | O_CREAT, 0666),
@@ -149,6 +149,116 @@ TEST(CreateTest, OpenCreateROThenRW) {
   EXPECT_THAT(WriteFd(fd2.get(), &c, 1), SyscallSucceedsWithValue(1));
 }
 
+TEST(CreateTest, ChmodReadToWriteBetweenOpens_NoRandomSave) {
+  // Make sure we don't have CAP_DAC_OVERRIDE, since that allows the user to
+  // override file read/write permissions. CAP_DAC_READ_SEARCH needs to be
+  // cleared for the same reason.
+  ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false));
+  ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false));
+
+  const TempPath file =
+      ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileMode(0400));
+
+  const FileDescriptor rfd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDONLY));
+
+  // Cannot restore after making permissions more restrictive.
+  const DisableSave ds;
+  ASSERT_THAT(fchmod(rfd.get(), 0200), SyscallSucceeds());
+
+  EXPECT_THAT(open(file.path().c_str(), O_RDONLY),
+              SyscallFailsWithErrno(EACCES));
+
+  const FileDescriptor wfd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_WRONLY));
+
+  char c = 'x';
+  EXPECT_THAT(write(wfd.get(), &c, 1), SyscallSucceedsWithValue(1));
+  c = 0;
+  EXPECT_THAT(read(rfd.get(), &c, 1), SyscallSucceedsWithValue(1));
+  EXPECT_EQ(c, 'x');
+}
+
+TEST(CreateTest, ChmodWriteToReadBetweenOpens_NoRandomSave) {
+  // Make sure we don't have CAP_DAC_OVERRIDE, since that allows the user to
+  // override file read/write permissions.
+  ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false));
+
+  const TempPath file =
+      ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileMode(0200));
+
+  const FileDescriptor wfd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_WRONLY));
+
+  // Cannot restore after making permissions more restrictive.
+  const DisableSave ds;
+  ASSERT_THAT(fchmod(wfd.get(), 0400), SyscallSucceeds());
+
+  EXPECT_THAT(open(file.path().c_str(), O_WRONLY),
+              SyscallFailsWithErrno(EACCES));
+
+  const FileDescriptor rfd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDONLY));
+
+  char c = 'x';
+  EXPECT_THAT(write(wfd.get(), &c, 1), SyscallSucceedsWithValue(1));
+  c = 0;
+  EXPECT_THAT(read(rfd.get(), &c, 1), SyscallSucceedsWithValue(1));
+  EXPECT_EQ(c, 'x');
+}
+
+TEST(CreateTest, CreateWithReadFlagNotAllowedByMode_NoRandomSave) {
+  // The only time we can open a file with flags forbidden by its permissions
+  // is when we are creating the file. We cannot re-open with the same flags,
+  // so we cannot restore an fd obtained from such an operation.
+  const DisableSave ds;
+
+  // Make sure we don't have CAP_DAC_OVERRIDE, since that allows the user to
+  // override file read/write permissions. CAP_DAC_READ_SEARCH needs to be
+  // cleared for the same reason.
+  ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false));
+  ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false));
+
+  // Create and open a file with read flag but without read permissions.
+  const std::string path = NewTempAbsPath();
+  const FileDescriptor rfd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(path, O_CREAT | O_RDONLY, 0222));
+
+  EXPECT_THAT(open(path.c_str(), O_RDONLY), SyscallFailsWithErrno(EACCES));
+  const FileDescriptor wfd = ASSERT_NO_ERRNO_AND_VALUE(Open(path, O_WRONLY));
+
+  char c = 'x';
+  EXPECT_THAT(write(wfd.get(), &c, 1), SyscallSucceedsWithValue(1));
+  c = 0;
+  EXPECT_THAT(read(rfd.get(), &c, 1), SyscallSucceedsWithValue(1));
+  EXPECT_EQ(c, 'x');
+}
+
+TEST(CreateTest, CreateWithWriteFlagNotAllowedByMode_NoRandomSave) {
+  // The only time we can open a file with flags forbidden by its permissions
+  // is when we are creating the file. We cannot re-open with the same flags,
+  // so we cannot restore an fd obtained from such an operation.
+  const DisableSave ds;
+
+  // Make sure we don't have CAP_DAC_OVERRIDE, since that allows the user to
+  // override file read/write permissions.
+  ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false));
+
+  // Create and open a file with write flag but without write permissions.
+  const std::string path = NewTempAbsPath();
+  const FileDescriptor wfd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(path, O_CREAT | O_WRONLY, 0444));
+
+  EXPECT_THAT(open(path.c_str(), O_WRONLY), SyscallFailsWithErrno(EACCES));
+  const FileDescriptor rfd = ASSERT_NO_ERRNO_AND_VALUE(Open(path, O_RDONLY));
+
+  char c = 'x';
+  EXPECT_THAT(write(wfd.get(), &c, 1), SyscallSucceedsWithValue(1));
+  c = 0;
+  EXPECT_THAT(read(rfd.get(), &c, 1), SyscallSucceedsWithValue(1));
+  EXPECT_EQ(c, 'x');
+}
+
 }  // namespace
 
 }  // namespace testing

From 29306b3f646c57aaa475ffe224b04d4473e364c7 Mon Sep 17 00:00:00 2001
From: Mithun Iyer <iyerm@google.com>
Date: Tue, 1 Sep 2020 17:40:28 -0700
Subject: [PATCH 131/211] Fix handling of unacceptable ACKs during close.

On receiving an ACK with unacceptable ACK number, in a closing state,
TCP, needs to reply back with an ACK with correct seq and ack numbers and
remain in same state. This change is as per RFC793 page 37, but with a
difference that it does not apply to ESTABLISHED state, just as in Linux.
Also add more tests to check for OTW sequence number and unacceptable
ack numbers in these states.

Fixes #3785

PiperOrigin-RevId: 329616283
---
 pkg/tcpip/transport/tcp/rcv.go                |  37 ++-
 test/packetimpact/dut/posix_server.cc         |   9 +
 test/packetimpact/proto/posix_server.proto    |  11 +
 test/packetimpact/testbench/connections.go    |   2 +-
 test/packetimpact/testbench/dut.go            |  26 ++
 test/packetimpact/tests/BUILD                 |   4 +-
 .../tests/tcp_close_wait_ack_test.go          | 109 --------
 .../tests/tcp_unacc_seq_ack_test.go           | 234 ++++++++++++++++++
 8 files changed, 309 insertions(+), 123 deletions(-)
 delete mode 100644 test/packetimpact/tests/tcp_close_wait_ack_test.go
 create mode 100644 test/packetimpact/tests/tcp_unacc_seq_ack_test.go

diff --git a/pkg/tcpip/transport/tcp/rcv.go b/pkg/tcpip/transport/tcp/rcv.go
index 5e0bfe585e..bc920a03b5 100644
--- a/pkg/tcpip/transport/tcp/rcv.go
+++ b/pkg/tcpip/transport/tcp/rcv.go
@@ -268,14 +268,7 @@ func (r *receiver) handleRcvdSegmentClosing(s *segment, state EndpointState, clo
 	// If we are in one of the shutdown states then we need to do
 	// additional checks before we try and process the segment.
 	switch state {
-	case StateCloseWait:
-		// If the ACK acks something not yet sent then we send an ACK.
-		if r.ep.snd.sndNxt.LessThan(s.ackNumber) {
-			r.ep.snd.sendAck()
-			return true, nil
-		}
-		fallthrough
-	case StateClosing, StateLastAck:
+	case StateCloseWait, StateClosing, StateLastAck:
 		if !s.sequenceNumber.LessThanEq(r.rcvNxt) {
 			// Just drop the segment as we have
 			// already received a FIN and this
@@ -284,9 +277,31 @@ func (r *receiver) handleRcvdSegmentClosing(s *segment, state EndpointState, clo
 			return true, nil
 		}
 		fallthrough
-	case StateFinWait1:
-		fallthrough
-	case StateFinWait2:
+	case StateFinWait1, StateFinWait2:
+		// If the ACK acks something not yet sent then we send an ACK.
+		//
+		// RFC793, page 37: If the connection is in a synchronized state,
+		// (ESTABLISHED, FIN-WAIT-1, FIN-WAIT-2, CLOSE-WAIT, CLOSING, LAST-ACK,
+		// TIME-WAIT), any unacceptable segment (out of window sequence number
+		// or unacceptable acknowledgment number) must elicit only an empty
+		// acknowledgment segment containing the current send-sequence number
+		// and an acknowledgment indicating the next sequence number expected
+		// to be received, and the connection remains in the same state.
+		//
+		// Just as on Linux, we do not apply this behavior when state is
+		// ESTABLISHED.
+		// Linux receive processing for all states except ESTABLISHED and
+		// TIME_WAIT is here where if the ACK check fails, we attempt to
+		// reply back with an ACK with correct seq/ack numbers.
+		// https://github.com/torvalds/linux/blob/v5.8/net/ipv4/tcp_input.c#L6186
+		// The ESTABLISHED state processing is here where if the ACK check
+		// fails, we ignore the packet:
+		// https://github.com/torvalds/linux/blob/v5.8/net/ipv4/tcp_input.c#L5591
+		if r.ep.snd.sndNxt.LessThan(s.ackNumber) {
+			r.ep.snd.sendAck()
+			return true, nil
+		}
+
 		// If we are closed for reads (either due to an
 		// incoming FIN or the user calling shutdown(..,
 		// SHUT_RD) then any data past the rcvNxt should
diff --git a/test/packetimpact/dut/posix_server.cc b/test/packetimpact/dut/posix_server.cc
index 2476998f85..de5b4be93b 100644
--- a/test/packetimpact/dut/posix_server.cc
+++ b/test/packetimpact/dut/posix_server.cc
@@ -336,6 +336,15 @@ class PosixImpl final : public posix_server::Posix::Service {
     return ::grpc::Status::OK;
   }
 
+  ::grpc::Status Shutdown(grpc_impl::ServerContext *context,
+                          const ::posix_server::ShutdownRequest *request,
+                          ::posix_server::ShutdownResponse *response) override {
+    if (shutdown(request->fd(), request->how()) < 0) {
+      response->set_errno_(errno);
+    }
+    return ::grpc::Status::OK;
+  }
+
   ::grpc::Status Recv(::grpc::ServerContext *context,
                       const ::posix_server::RecvRequest *request,
                       ::posix_server::RecvResponse *response) override {
diff --git a/test/packetimpact/proto/posix_server.proto b/test/packetimpact/proto/posix_server.proto
index ccd20b10d4..f32ed54eff 100644
--- a/test/packetimpact/proto/posix_server.proto
+++ b/test/packetimpact/proto/posix_server.proto
@@ -188,6 +188,15 @@ message SocketResponse {
   int32 errno_ = 2;  // "errno" may fail to compile in c++.
 }
 
+message ShutdownRequest {
+  int32 fd = 1;
+  int32 how = 2;
+}
+
+message ShutdownResponse {
+  int32 errno_ = 1;  // "errno" may fail to compile in c++.
+}
+
 message RecvRequest {
   int32 sockfd = 1;
   int32 len = 2;
@@ -225,6 +234,8 @@ service Posix {
   rpc SetSockOpt(SetSockOptRequest) returns (SetSockOptResponse);
   // Call socket() on the DUT.
   rpc Socket(SocketRequest) returns (SocketResponse);
+  // Call shutdown() on the DUT.
+  rpc Shutdown(ShutdownRequest) returns (ShutdownResponse);
   // Call recv() on the DUT.
   rpc Recv(RecvRequest) returns (RecvResponse);
 }
diff --git a/test/packetimpact/testbench/connections.go b/test/packetimpact/testbench/connections.go
index 3af5f83fd8..a90046f69d 100644
--- a/test/packetimpact/testbench/connections.go
+++ b/test/packetimpact/testbench/connections.go
@@ -615,7 +615,7 @@ func (conn *Connection) ExpectFrame(t *testing.T, layers Layers, timeout time.Du
 			if errs == nil {
 				return nil, fmt.Errorf("got no frames matching %v during %s", layers, timeout)
 			}
-			return nil, fmt.Errorf("got no frames matching %v during %s: got %w", layers, timeout, errs)
+			return nil, fmt.Errorf("got frames %w want %v during %s", errs, layers, timeout)
 		}
 		if conn.match(layers, gotLayers) {
 			for i, s := range conn.layerStates {
diff --git a/test/packetimpact/testbench/dut.go b/test/packetimpact/testbench/dut.go
index 73c532e75e..ff269d9495 100644
--- a/test/packetimpact/testbench/dut.go
+++ b/test/packetimpact/testbench/dut.go
@@ -700,3 +700,29 @@ func (dut *DUT) RecvWithErrno(ctx context.Context, t *testing.T, sockfd, len, fl
 	}
 	return resp.GetRet(), resp.GetBuf(), syscall.Errno(resp.GetErrno_())
 }
+
+// Shutdown calls shutdown on the DUT and causes a fatal test failure if it doesn't
+// succeed. If more control over the timeout or error handling is needed, use
+// ShutdownWithErrno.
+func (dut *DUT) Shutdown(t *testing.T, fd, how int32) error {
+	t.Helper()
+
+	ctx, cancel := context.WithTimeout(context.Background(), RPCTimeout)
+	defer cancel()
+	return dut.ShutdownWithErrno(ctx, t, fd, how)
+}
+
+// ShutdownWithErrno calls shutdown on the DUT.
+func (dut *DUT) ShutdownWithErrno(ctx context.Context, t *testing.T, fd, how int32) error {
+	t.Helper()
+
+	req := pb.ShutdownRequest{
+		Fd:  fd,
+		How: how,
+	}
+	resp, err := dut.posixServer.Shutdown(ctx, &req)
+	if err != nil {
+		t.Fatalf("failed to call Shutdown: %s", err)
+	}
+	return syscall.Errno(resp.GetErrno_())
+}
diff --git a/test/packetimpact/tests/BUILD b/test/packetimpact/tests/BUILD
index 74658fea03..e1ed0cc609 100644
--- a/test/packetimpact/tests/BUILD
+++ b/test/packetimpact/tests/BUILD
@@ -166,8 +166,8 @@ packetimpact_go_test(
 )
 
 packetimpact_go_test(
-    name = "tcp_close_wait_ack",
-    srcs = ["tcp_close_wait_ack_test.go"],
+    name = "tcp_unacc_seq_ack",
+    srcs = ["tcp_unacc_seq_ack_test.go"],
     deps = [
         "//pkg/tcpip/header",
         "//pkg/tcpip/seqnum",
diff --git a/test/packetimpact/tests/tcp_close_wait_ack_test.go b/test/packetimpact/tests/tcp_close_wait_ack_test.go
deleted file mode 100644
index e6a96f2146..0000000000
--- a/test/packetimpact/tests/tcp_close_wait_ack_test.go
+++ /dev/null
@@ -1,109 +0,0 @@
-// Copyright 2020 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package tcp_close_wait_ack_test
-
-import (
-	"flag"
-	"fmt"
-	"testing"
-	"time"
-
-	"golang.org/x/sys/unix"
-	"gvisor.dev/gvisor/pkg/tcpip/header"
-	"gvisor.dev/gvisor/pkg/tcpip/seqnum"
-	"gvisor.dev/gvisor/test/packetimpact/testbench"
-)
-
-func init() {
-	testbench.RegisterFlags(flag.CommandLine)
-}
-
-func TestCloseWaitAck(t *testing.T) {
-	for _, tt := range []struct {
-		description    string
-		makeTestingTCP func(t *testing.T, conn *testbench.TCPIPv4, seqNumOffset, windowSize seqnum.Size) testbench.TCP
-		seqNumOffset   seqnum.Size
-		expectAck      bool
-	}{
-		{"OTW", generateOTWSeqSegment, 0, false},
-		{"OTW", generateOTWSeqSegment, 1, true},
-		{"OTW", generateOTWSeqSegment, 2, true},
-		{"ACK", generateUnaccACKSegment, 0, false},
-		{"ACK", generateUnaccACKSegment, 1, true},
-		{"ACK", generateUnaccACKSegment, 2, true},
-	} {
-		t.Run(fmt.Sprintf("%s%d", tt.description, tt.seqNumOffset), func(t *testing.T) {
-			dut := testbench.NewDUT(t)
-			defer dut.TearDown()
-			listenFd, remotePort := dut.CreateListener(t, unix.SOCK_STREAM, unix.IPPROTO_TCP, 1)
-			defer dut.Close(t, listenFd)
-			conn := testbench.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort}, testbench.TCP{SrcPort: &remotePort})
-			defer conn.Close(t)
-
-			conn.Connect(t)
-			acceptFd, _ := dut.Accept(t, listenFd)
-
-			// Send a FIN to DUT to intiate the active close
-			conn.Send(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck | header.TCPFlagFin)})
-			gotTCP, err := conn.Expect(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, time.Second)
-			if err != nil {
-				t.Fatalf("expected an ACK for our fin and DUT should enter CLOSE_WAIT: %s", err)
-			}
-			windowSize := seqnum.Size(*gotTCP.WindowSize)
-
-			// Send a segment with OTW Seq / unacc ACK and expect an ACK back
-			conn.Send(t, tt.makeTestingTCP(t, &conn, tt.seqNumOffset, windowSize), &testbench.Payload{Bytes: []byte("Sample Data")})
-			gotAck, err := conn.Expect(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, time.Second)
-			if tt.expectAck && err != nil {
-				t.Fatalf("expected an ack but got none: %s", err)
-			}
-			if !tt.expectAck && gotAck != nil {
-				t.Fatalf("expected no ack but got one: %s", gotAck)
-			}
-
-			// Now let's verify DUT is indeed in CLOSE_WAIT
-			dut.Close(t, acceptFd)
-			if _, err := conn.Expect(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck | header.TCPFlagFin)}, time.Second); err != nil {
-				t.Fatalf("expected DUT to send a FIN: %s", err)
-			}
-			// Ack the FIN from DUT
-			conn.Send(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)})
-			// Send some extra data to DUT
-			conn.Send(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, &testbench.Payload{Bytes: []byte("Sample Data")})
-			if _, err := conn.Expect(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagRst)}, time.Second); err != nil {
-				t.Fatalf("expected DUT to send an RST: %s", err)
-			}
-		})
-	}
-}
-
-// generateOTWSeqSegment generates an segment with
-// seqnum = RCV.NXT + RCV.WND + seqNumOffset, the generated segment is only
-// acceptable when seqNumOffset is 0, otherwise an ACK is expected from the
-// receiver.
-func generateOTWSeqSegment(t *testing.T, conn *testbench.TCPIPv4, seqNumOffset seqnum.Size, windowSize seqnum.Size) testbench.TCP {
-	lastAcceptable := conn.LocalSeqNum(t).Add(windowSize)
-	otwSeq := uint32(lastAcceptable.Add(seqNumOffset))
-	return testbench.TCP{SeqNum: testbench.Uint32(otwSeq), Flags: testbench.Uint8(header.TCPFlagAck)}
-}
-
-// generateUnaccACKSegment generates an segment with
-// acknum = SND.NXT + seqNumOffset, the generated segment is only acceptable
-// when seqNumOffset is 0, otherwise an ACK is expected from the receiver.
-func generateUnaccACKSegment(t *testing.T, conn *testbench.TCPIPv4, seqNumOffset seqnum.Size, windowSize seqnum.Size) testbench.TCP {
-	lastAcceptable := conn.RemoteSeqNum(t)
-	unaccAck := uint32(lastAcceptable.Add(seqNumOffset))
-	return testbench.TCP{AckNum: testbench.Uint32(unaccAck), Flags: testbench.Uint8(header.TCPFlagAck)}
-}
diff --git a/test/packetimpact/tests/tcp_unacc_seq_ack_test.go b/test/packetimpact/tests/tcp_unacc_seq_ack_test.go
new file mode 100644
index 0000000000..d078bbf15c
--- /dev/null
+++ b/test/packetimpact/tests/tcp_unacc_seq_ack_test.go
@@ -0,0 +1,234 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package tcp_unacc_seq_ack_test
+
+import (
+	"flag"
+	"fmt"
+	"syscall"
+	"testing"
+	"time"
+
+	"golang.org/x/sys/unix"
+	"gvisor.dev/gvisor/pkg/tcpip/header"
+	"gvisor.dev/gvisor/pkg/tcpip/seqnum"
+	"gvisor.dev/gvisor/test/packetimpact/testbench"
+)
+
+func init() {
+	testbench.RegisterFlags(flag.CommandLine)
+}
+
+func TestEstablishedUnaccSeqAck(t *testing.T) {
+	for _, tt := range []struct {
+		description    string
+		makeTestingTCP func(t *testing.T, conn *testbench.TCPIPv4, seqNumOffset, windowSize seqnum.Size) testbench.TCP
+		seqNumOffset   seqnum.Size
+		expectAck      bool
+		restoreSeq     bool
+	}{
+		{description: "OTWSeq", makeTestingTCP: generateOTWSeqSegment, seqNumOffset: 0, expectAck: true, restoreSeq: true},
+		{description: "OTWSeq", makeTestingTCP: generateOTWSeqSegment, seqNumOffset: 1, expectAck: true, restoreSeq: true},
+		{description: "OTWSeq", makeTestingTCP: generateOTWSeqSegment, seqNumOffset: 2, expectAck: true, restoreSeq: true},
+		{description: "UnaccAck", makeTestingTCP: generateUnaccACKSegment, seqNumOffset: 0, expectAck: true, restoreSeq: false},
+		{description: "UnaccAck", makeTestingTCP: generateUnaccACKSegment, seqNumOffset: 1, expectAck: false, restoreSeq: true},
+		{description: "UnaccAck", makeTestingTCP: generateUnaccACKSegment, seqNumOffset: 2, expectAck: false, restoreSeq: true},
+	} {
+		t.Run(fmt.Sprintf("%s:offset=%d", tt.description, tt.seqNumOffset), func(t *testing.T) {
+			dut := testbench.NewDUT(t)
+			defer dut.TearDown()
+			listenFD, remotePort := dut.CreateListener(t, unix.SOCK_STREAM, unix.IPPROTO_TCP, 1 /*backlog*/)
+			defer dut.Close(t, listenFD)
+			conn := testbench.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort}, testbench.TCP{SrcPort: &remotePort})
+			defer conn.Close(t)
+
+			conn.Connect(t)
+			dut.Accept(t, listenFD)
+
+			sampleData := []byte("Sample Data")
+			samplePayload := &testbench.Payload{Bytes: sampleData}
+
+			conn.Send(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck | header.TCPFlagPsh)}, samplePayload)
+			gotTCP, err := conn.Expect(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, time.Second)
+			if err != nil {
+				t.Fatalf("expected ack %s", err)
+			}
+			windowSize := seqnum.Size(*gotTCP.WindowSize)
+
+			origSeq := *conn.LocalSeqNum(t)
+			// Send a segment with OTW Seq / unacc ACK.
+			conn.Send(t, tt.makeTestingTCP(t, &conn, tt.seqNumOffset, windowSize), samplePayload)
+			if tt.restoreSeq {
+				// Restore the local sequence number to ensure that the incoming
+				// ACK matches the TCP layer state.
+				*conn.LocalSeqNum(t) = origSeq
+			}
+			gotAck, err := conn.Expect(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, time.Second)
+			if tt.expectAck && err != nil {
+				t.Fatalf("expected an ack but got none: %s", err)
+			}
+			if err == nil && !tt.expectAck && gotAck != nil {
+				t.Fatalf("expected no ack but got one: %s", gotAck)
+			}
+		})
+	}
+}
+
+func TestPassiveCloseUnaccSeqAck(t *testing.T) {
+	for _, tt := range []struct {
+		description    string
+		makeTestingTCP func(t *testing.T, conn *testbench.TCPIPv4, seqNumOffset, windowSize seqnum.Size) testbench.TCP
+		seqNumOffset   seqnum.Size
+		expectAck      bool
+	}{
+		{description: "OTWSeq", makeTestingTCP: generateOTWSeqSegment, seqNumOffset: 0, expectAck: false},
+		{description: "OTWSeq", makeTestingTCP: generateOTWSeqSegment, seqNumOffset: 1, expectAck: true},
+		{description: "OTWSeq", makeTestingTCP: generateOTWSeqSegment, seqNumOffset: 2, expectAck: true},
+		{description: "UnaccAck", makeTestingTCP: generateUnaccACKSegment, seqNumOffset: 0, expectAck: false},
+		{description: "UnaccAck", makeTestingTCP: generateUnaccACKSegment, seqNumOffset: 1, expectAck: true},
+		{description: "UnaccAck", makeTestingTCP: generateUnaccACKSegment, seqNumOffset: 2, expectAck: true},
+	} {
+		t.Run(fmt.Sprintf("%s:offset=%d", tt.description, tt.seqNumOffset), func(t *testing.T) {
+			dut := testbench.NewDUT(t)
+			defer dut.TearDown()
+			listenFD, remotePort := dut.CreateListener(t, unix.SOCK_STREAM, unix.IPPROTO_TCP, 1 /*backlog*/)
+			defer dut.Close(t, listenFD)
+			conn := testbench.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort}, testbench.TCP{SrcPort: &remotePort})
+			defer conn.Close(t)
+
+			conn.Connect(t)
+			acceptFD, _ := dut.Accept(t, listenFD)
+
+			// Send a FIN to DUT to intiate the passive close.
+			conn.Send(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck | header.TCPFlagFin)})
+			gotTCP, err := conn.Expect(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, time.Second)
+			if err != nil {
+				t.Fatalf("expected an ACK for our fin and DUT should enter CLOSE_WAIT: %s", err)
+			}
+			windowSize := seqnum.Size(*gotTCP.WindowSize)
+
+			sampleData := []byte("Sample Data")
+			samplePayload := &testbench.Payload{Bytes: sampleData}
+
+			// Send a segment with OTW Seq / unacc ACK.
+			conn.Send(t, tt.makeTestingTCP(t, &conn, tt.seqNumOffset, windowSize), samplePayload)
+			gotAck, err := conn.Expect(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, time.Second)
+			if tt.expectAck && err != nil {
+				t.Errorf("expected an ack but got none: %s", err)
+			}
+			if err == nil && !tt.expectAck && gotAck != nil {
+				t.Errorf("expected no ack but got one: %s", gotAck)
+			}
+
+			// Now let's verify DUT is indeed in CLOSE_WAIT
+			dut.Close(t, acceptFD)
+			if _, err := conn.Expect(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck | header.TCPFlagFin)}, time.Second); err != nil {
+				t.Fatalf("expected DUT to send a FIN: %s", err)
+			}
+			// Ack the FIN from DUT
+			conn.Send(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)})
+			// Send some extra data to DUT
+			conn.Send(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, samplePayload)
+			if _, err := conn.Expect(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagRst)}, time.Second); err != nil {
+				t.Fatalf("expected DUT to send an RST: %s", err)
+			}
+		})
+	}
+}
+
+func TestActiveCloseUnaccpSeqAck(t *testing.T) {
+	for _, tt := range []struct {
+		description    string
+		makeTestingTCP func(t *testing.T, conn *testbench.TCPIPv4, seqNumOffset, windowSize seqnum.Size) testbench.TCP
+		seqNumOffset   seqnum.Size
+		restoreSeq     bool
+	}{
+		{description: "OTWSeq", makeTestingTCP: generateOTWSeqSegment, seqNumOffset: 0, restoreSeq: true},
+		{description: "OTWSeq", makeTestingTCP: generateOTWSeqSegment, seqNumOffset: 1, restoreSeq: true},
+		{description: "OTWSeq", makeTestingTCP: generateOTWSeqSegment, seqNumOffset: 2, restoreSeq: true},
+		{description: "UnaccAck", makeTestingTCP: generateUnaccACKSegment, seqNumOffset: 0, restoreSeq: false},
+		{description: "UnaccAck", makeTestingTCP: generateUnaccACKSegment, seqNumOffset: 1, restoreSeq: true},
+		{description: "UnaccAck", makeTestingTCP: generateUnaccACKSegment, seqNumOffset: 2, restoreSeq: true},
+	} {
+		t.Run(fmt.Sprintf("%s:offset=%d", tt.description, tt.seqNumOffset), func(t *testing.T) {
+			dut := testbench.NewDUT(t)
+			defer dut.TearDown()
+			listenFD, remotePort := dut.CreateListener(t, unix.SOCK_STREAM, unix.IPPROTO_TCP, 1 /*backlog*/)
+			defer dut.Close(t, listenFD)
+			conn := testbench.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort}, testbench.TCP{SrcPort: &remotePort})
+			defer conn.Close(t)
+
+			conn.Connect(t)
+			acceptFD, _ := dut.Accept(t, listenFD)
+
+			// Trigger active close.
+			dut.Shutdown(t, acceptFD, syscall.SHUT_WR)
+
+			// Get to FIN_WAIT2
+			gotTCP, err := conn.Expect(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagFin | header.TCPFlagAck)}, time.Second)
+			if err != nil {
+				t.Fatalf("expected a FIN: %s", err)
+			}
+			conn.Send(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)})
+
+			sendUnaccSeqAck := func(state string) {
+				t.Helper()
+				sampleData := []byte("Sample Data")
+				samplePayload := &testbench.Payload{Bytes: sampleData}
+
+				origSeq := *conn.LocalSeqNum(t)
+				// Send a segment with OTW Seq / unacc ACK.
+				conn.Send(t, tt.makeTestingTCP(t, &conn, tt.seqNumOffset, seqnum.Size(*gotTCP.WindowSize)), samplePayload)
+				if tt.restoreSeq {
+					// Restore the local sequence number to ensure that the
+					// incoming ACK matches the TCP layer state.
+					*conn.LocalSeqNum(t) = origSeq
+				}
+				if _, err := conn.Expect(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, time.Second); err != nil {
+					t.Errorf("expected an ack in %s state, but got none: %s", state, err)
+				}
+			}
+
+			sendUnaccSeqAck("FIN_WAIT2")
+
+			// Send a FIN to DUT to get to TIME_WAIT
+			conn.Send(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagFin | header.TCPFlagAck)})
+			if _, err := conn.Expect(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, time.Second); err != nil {
+				t.Fatalf("expected an ACK for our fin and DUT should enter TIME_WAIT: %s", err)
+			}
+
+			sendUnaccSeqAck("TIME_WAIT")
+		})
+	}
+}
+
+// generateOTWSeqSegment generates an segment with
+// seqnum = RCV.NXT + RCV.WND + seqNumOffset, the generated segment is only
+// acceptable when seqNumOffset is 0, otherwise an ACK is expected from the
+// receiver.
+func generateOTWSeqSegment(t *testing.T, conn *testbench.TCPIPv4, seqNumOffset seqnum.Size, windowSize seqnum.Size) testbench.TCP {
+	lastAcceptable := conn.LocalSeqNum(t).Add(windowSize)
+	otwSeq := uint32(lastAcceptable.Add(seqNumOffset))
+	return testbench.TCP{SeqNum: testbench.Uint32(otwSeq), Flags: testbench.Uint8(header.TCPFlagAck)}
+}
+
+// generateUnaccACKSegment generates an segment with
+// acknum = SND.NXT + seqNumOffset, the generated segment is only acceptable
+// when seqNumOffset is 0, otherwise an ACK is expected from the receiver.
+func generateUnaccACKSegment(t *testing.T, conn *testbench.TCPIPv4, seqNumOffset seqnum.Size, windowSize seqnum.Size) testbench.TCP {
+	lastAcceptable := conn.RemoteSeqNum(t)
+	unaccAck := uint32(lastAcceptable.Add(seqNumOffset))
+	return testbench.TCP{AckNum: testbench.Uint32(unaccAck), Flags: testbench.Uint8(header.TCPFlagAck)}
+}

From 8a8f457862e093f8d513b92769ebdf637929891a Mon Sep 17 00:00:00 2001
From: Fabricio Voznika <fvoznika@google.com>
Date: Tue, 1 Sep 2020 19:20:37 -0700
Subject: [PATCH 132/211] Implement setattr+clunk in 9P

This is to cover the common pattern: open->read/write->close,
where SetAttr needs to be called to update atime/mtime before
the file is closed.

Benchmark results:

BM_OpenReadClose/10240 CPU
setattr+clunk: 63783 ns
VFS2:          68109 ns
VFS1:          72507 ns

Updates #1198

PiperOrigin-RevId: 329628461
---
 pkg/p9/client_file.go                        |  38 ++++-
 pkg/p9/file.go                               |  24 +++
 pkg/p9/handlers.go                           |  31 ++++
 pkg/p9/messages.go                           |  60 +++++++
 pkg/p9/messages_test.go                      |  24 +++
 pkg/p9/p9.go                                 | 162 ++++++++++---------
 pkg/p9/p9test/client_test.go                 |  23 ++-
 pkg/p9/version.go                            |   8 +-
 pkg/sentry/fsimpl/gofer/gofer.go             |  40 +++--
 pkg/sentry/fsimpl/gofer/p9file.go            |   7 +
 runsc/fsgofer/fsgofer.go                     |   2 +
 test/perf/linux/BUILD                        |  16 ++
 test/perf/linux/open_read_close_benchmark.cc |  61 +++++++
 13 files changed, 389 insertions(+), 107 deletions(-)
 create mode 100644 test/perf/linux/open_read_close_benchmark.cc

diff --git a/pkg/p9/client_file.go b/pkg/p9/client_file.go
index 2ee07b664f..28fe081d6a 100644
--- a/pkg/p9/client_file.go
+++ b/pkg/p9/client_file.go
@@ -54,6 +54,8 @@ func (c *Client) newFile(fid FID) *clientFile {
 //
 // This proxies all of the interfaces found in file.go.
 type clientFile struct {
+	DisallowServerCalls
+
 	// client is the originating client.
 	client *Client
 
@@ -283,6 +285,39 @@ func (c *clientFile) Close() error {
 	return nil
 }
 
+// SetAttrClose implements File.SetAttrClose.
+func (c *clientFile) SetAttrClose(valid SetAttrMask, attr SetAttr) error {
+	if !versionSupportsTsetattrclunk(c.client.version) {
+		setAttrErr := c.SetAttr(valid, attr)
+
+		// Try to close file even in case of failure above. Since the state of the
+		// file is unknown to the caller, it will not attempt to close the file
+		// again.
+		if err := c.Close(); err != nil {
+			return err
+		}
+
+		return setAttrErr
+	}
+
+	// Avoid double close.
+	if !atomic.CompareAndSwapUint32(&c.closed, 0, 1) {
+		return syscall.EBADF
+	}
+
+	// Send the message.
+	if err := c.client.sendRecv(&Tsetattrclunk{FID: c.fid, Valid: valid, SetAttr: attr}, &Rsetattrclunk{}); err != nil {
+		// If an error occurred, we toss away the FID. This isn't ideal,
+		// but I'm not sure what else makes sense in this context.
+		log.Warningf("Tsetattrclunk failed, losing FID %v: %v", c.fid, err)
+		return err
+	}
+
+	// Return the FID to the pool.
+	c.client.fidPool.Put(uint64(c.fid))
+	return nil
+}
+
 // Open implements File.Open.
 func (c *clientFile) Open(flags OpenFlags) (*fd.FD, QID, uint32, error) {
 	if atomic.LoadUint32(&c.closed) != 0 {
@@ -681,6 +716,3 @@ func (c *clientFile) Flush() error {
 
 	return c.client.sendRecv(&Tflushf{FID: c.fid}, &Rflushf{})
 }
-
-// Renamed implements File.Renamed.
-func (c *clientFile) Renamed(newDir File, newName string) {}
diff --git a/pkg/p9/file.go b/pkg/p9/file.go
index cab35896f1..c2e3a3f98e 100644
--- a/pkg/p9/file.go
+++ b/pkg/p9/file.go
@@ -135,6 +135,14 @@ type File interface {
 	// On the server, Close has no concurrency guarantee.
 	Close() error
 
+	// SetAttrClose is the equivalent of calling SetAttr() followed by Close().
+	// This can be used to set file times before closing the file in a single
+	// operation.
+	//
+	// On the server, SetAttr has a write concurrency guarantee.
+	// On the server, Close has no concurrency guarantee.
+	SetAttrClose(valid SetAttrMask, attr SetAttr) error
+
 	// Open must be called prior to using Read, Write or Readdir. Once Open
 	// is called, some operations, such as Walk, will no longer work.
 	//
@@ -286,3 +294,19 @@ type DefaultWalkGetAttr struct{}
 func (DefaultWalkGetAttr) WalkGetAttr([]string) ([]QID, File, AttrMask, Attr, error) {
 	return nil, nil, AttrMask{}, Attr{}, syscall.ENOSYS
 }
+
+// DisallowClientCalls panics if a client-only function is called.
+type DisallowClientCalls struct{}
+
+// SetAttrClose implements File.SetAttrClose.
+func (DisallowClientCalls) SetAttrClose(SetAttrMask, SetAttr) error {
+	panic("SetAttrClose should not be called on the server")
+}
+
+// DisallowServerCalls panics if a server-only function is called.
+type DisallowServerCalls struct{}
+
+// Renamed implements File.Renamed.
+func (*clientFile) Renamed(File, string) {
+	panic("Renamed should not be called on the client")
+}
diff --git a/pkg/p9/handlers.go b/pkg/p9/handlers.go
index 1db5797dda..abd237f460 100644
--- a/pkg/p9/handlers.go
+++ b/pkg/p9/handlers.go
@@ -123,6 +123,37 @@ func (t *Tclunk) handle(cs *connState) message {
 	return &Rclunk{}
 }
 
+func (t *Tsetattrclunk) handle(cs *connState) message {
+	ref, ok := cs.LookupFID(t.FID)
+	if !ok {
+		return newErr(syscall.EBADF)
+	}
+	defer ref.DecRef()
+
+	setAttrErr := ref.safelyWrite(func() error {
+		// We don't allow setattr on files that have been deleted.
+		// This might be technically incorrect, as it's possible that
+		// there were multiple links and you can still change the
+		// corresponding inode information.
+		if ref.isDeleted() {
+			return syscall.EINVAL
+		}
+
+		// Set the attributes.
+		return ref.file.SetAttr(t.Valid, t.SetAttr)
+	})
+
+	// Try to delete FID even in case of failure above. Since the state of the
+	// file is unknown to the caller, it will not attempt to close the file again.
+	if !cs.DeleteFID(t.FID) {
+		return newErr(syscall.EBADF)
+	}
+	if setAttrErr != nil {
+		return newErr(setAttrErr)
+	}
+	return &Rsetattrclunk{}
+}
+
 // handle implements handler.handle.
 func (t *Tremove) handle(cs *connState) message {
 	ref, ok := cs.LookupFID(t.FID)
diff --git a/pkg/p9/messages.go b/pkg/p9/messages.go
index 2cb59f9344..cf13cbb69d 100644
--- a/pkg/p9/messages.go
+++ b/pkg/p9/messages.go
@@ -317,6 +317,64 @@ func (r *Rclunk) String() string {
 	return "Rclunk{}"
 }
 
+// Tsetattrclunk is a setattr+close request.
+type Tsetattrclunk struct {
+	// FID is the FID to change.
+	FID FID
+
+	// Valid is the set of bits which will be used.
+	Valid SetAttrMask
+
+	// SetAttr is the set request.
+	SetAttr SetAttr
+}
+
+// decode implements encoder.decode.
+func (t *Tsetattrclunk) decode(b *buffer) {
+	t.FID = b.ReadFID()
+	t.Valid.decode(b)
+	t.SetAttr.decode(b)
+}
+
+// encode implements encoder.encode.
+func (t *Tsetattrclunk) encode(b *buffer) {
+	b.WriteFID(t.FID)
+	t.Valid.encode(b)
+	t.SetAttr.encode(b)
+}
+
+// Type implements message.Type.
+func (*Tsetattrclunk) Type() MsgType {
+	return MsgTsetattrclunk
+}
+
+// String implements fmt.Stringer.
+func (t *Tsetattrclunk) String() string {
+	return fmt.Sprintf("Tsetattrclunk{FID: %d, Valid: %v, SetAttr: %s}", t.FID, t.Valid, t.SetAttr)
+}
+
+// Rsetattrclunk is a setattr+close response.
+type Rsetattrclunk struct {
+}
+
+// decode implements encoder.decode.
+func (*Rsetattrclunk) decode(*buffer) {
+}
+
+// encode implements encoder.encode.
+func (*Rsetattrclunk) encode(*buffer) {
+}
+
+// Type implements message.Type.
+func (*Rsetattrclunk) Type() MsgType {
+	return MsgRsetattrclunk
+}
+
+// String implements fmt.Stringer.
+func (r *Rsetattrclunk) String() string {
+	return "Rsetattrclunk{}"
+}
+
 // Tremove is a remove request.
 //
 // This will eventually be replaced by Tunlinkat.
@@ -2657,6 +2715,8 @@ func init() {
 	msgRegistry.register(MsgRlconnect, func() message { return &Rlconnect{} })
 	msgRegistry.register(MsgTallocate, func() message { return &Tallocate{} })
 	msgRegistry.register(MsgRallocate, func() message { return &Rallocate{} })
+	msgRegistry.register(MsgTsetattrclunk, func() message { return &Tsetattrclunk{} })
+	msgRegistry.register(MsgRsetattrclunk, func() message { return &Rsetattrclunk{} })
 	msgRegistry.register(MsgTchannel, func() message { return &Tchannel{} })
 	msgRegistry.register(MsgRchannel, func() message { return &Rchannel{} })
 }
diff --git a/pkg/p9/messages_test.go b/pkg/p9/messages_test.go
index 7facc9f5e1..bfeb6c2369 100644
--- a/pkg/p9/messages_test.go
+++ b/pkg/p9/messages_test.go
@@ -376,6 +376,30 @@ func TestEncodeDecode(t *testing.T) {
 		&Rumknod{
 			Rmknod{QID: QID{Type: 1}},
 		},
+		&Tsetattrclunk{
+			FID: 1,
+			Valid: SetAttrMask{
+				Permissions:        true,
+				UID:                true,
+				GID:                true,
+				Size:               true,
+				ATime:              true,
+				MTime:              true,
+				CTime:              true,
+				ATimeNotSystemTime: true,
+				MTimeNotSystemTime: true,
+			},
+			SetAttr: SetAttr{
+				Permissions:      1,
+				UID:              2,
+				GID:              3,
+				Size:             4,
+				ATimeSeconds:     5,
+				ATimeNanoSeconds: 6,
+				MTimeSeconds:     7,
+				MTimeNanoSeconds: 8,
+			},
+		},
 	}
 
 	for _, enc := range objs {
diff --git a/pkg/p9/p9.go b/pkg/p9/p9.go
index 122c457d20..2235f8968b 100644
--- a/pkg/p9/p9.go
+++ b/pkg/p9/p9.go
@@ -315,86 +315,88 @@ type MsgType uint8
 
 // MsgType declarations.
 const (
-	MsgTlerror      MsgType = 6
-	MsgRlerror              = 7
-	MsgTstatfs              = 8
-	MsgRstatfs              = 9
-	MsgTlopen               = 12
-	MsgRlopen               = 13
-	MsgTlcreate             = 14
-	MsgRlcreate             = 15
-	MsgTsymlink             = 16
-	MsgRsymlink             = 17
-	MsgTmknod               = 18
-	MsgRmknod               = 19
-	MsgTrename              = 20
-	MsgRrename              = 21
-	MsgTreadlink            = 22
-	MsgRreadlink            = 23
-	MsgTgetattr             = 24
-	MsgRgetattr             = 25
-	MsgTsetattr             = 26
-	MsgRsetattr             = 27
-	MsgTlistxattr           = 28
-	MsgRlistxattr           = 29
-	MsgTxattrwalk           = 30
-	MsgRxattrwalk           = 31
-	MsgTxattrcreate         = 32
-	MsgRxattrcreate         = 33
-	MsgTgetxattr            = 34
-	MsgRgetxattr            = 35
-	MsgTsetxattr            = 36
-	MsgRsetxattr            = 37
-	MsgTremovexattr         = 38
-	MsgRremovexattr         = 39
-	MsgTreaddir             = 40
-	MsgRreaddir             = 41
-	MsgTfsync               = 50
-	MsgRfsync               = 51
-	MsgTlink                = 70
-	MsgRlink                = 71
-	MsgTmkdir               = 72
-	MsgRmkdir               = 73
-	MsgTrenameat            = 74
-	MsgRrenameat            = 75
-	MsgTunlinkat            = 76
-	MsgRunlinkat            = 77
-	MsgTversion             = 100
-	MsgRversion             = 101
-	MsgTauth                = 102
-	MsgRauth                = 103
-	MsgTattach              = 104
-	MsgRattach              = 105
-	MsgTflush               = 108
-	MsgRflush               = 109
-	MsgTwalk                = 110
-	MsgRwalk                = 111
-	MsgTread                = 116
-	MsgRread                = 117
-	MsgTwrite               = 118
-	MsgRwrite               = 119
-	MsgTclunk               = 120
-	MsgRclunk               = 121
-	MsgTremove              = 122
-	MsgRremove              = 123
-	MsgTflushf              = 124
-	MsgRflushf              = 125
-	MsgTwalkgetattr         = 126
-	MsgRwalkgetattr         = 127
-	MsgTucreate             = 128
-	MsgRucreate             = 129
-	MsgTumkdir              = 130
-	MsgRumkdir              = 131
-	MsgTumknod              = 132
-	MsgRumknod              = 133
-	MsgTusymlink            = 134
-	MsgRusymlink            = 135
-	MsgTlconnect            = 136
-	MsgRlconnect            = 137
-	MsgTallocate            = 138
-	MsgRallocate            = 139
-	MsgTchannel             = 250
-	MsgRchannel             = 251
+	MsgTlerror       MsgType = 6
+	MsgRlerror       MsgType = 7
+	MsgTstatfs       MsgType = 8
+	MsgRstatfs       MsgType = 9
+	MsgTlopen        MsgType = 12
+	MsgRlopen        MsgType = 13
+	MsgTlcreate      MsgType = 14
+	MsgRlcreate      MsgType = 15
+	MsgTsymlink      MsgType = 16
+	MsgRsymlink      MsgType = 17
+	MsgTmknod        MsgType = 18
+	MsgRmknod        MsgType = 19
+	MsgTrename       MsgType = 20
+	MsgRrename       MsgType = 21
+	MsgTreadlink     MsgType = 22
+	MsgRreadlink     MsgType = 23
+	MsgTgetattr      MsgType = 24
+	MsgRgetattr      MsgType = 25
+	MsgTsetattr      MsgType = 26
+	MsgRsetattr      MsgType = 27
+	MsgTlistxattr    MsgType = 28
+	MsgRlistxattr    MsgType = 29
+	MsgTxattrwalk    MsgType = 30
+	MsgRxattrwalk    MsgType = 31
+	MsgTxattrcreate  MsgType = 32
+	MsgRxattrcreate  MsgType = 33
+	MsgTgetxattr     MsgType = 34
+	MsgRgetxattr     MsgType = 35
+	MsgTsetxattr     MsgType = 36
+	MsgRsetxattr     MsgType = 37
+	MsgTremovexattr  MsgType = 38
+	MsgRremovexattr  MsgType = 39
+	MsgTreaddir      MsgType = 40
+	MsgRreaddir      MsgType = 41
+	MsgTfsync        MsgType = 50
+	MsgRfsync        MsgType = 51
+	MsgTlink         MsgType = 70
+	MsgRlink         MsgType = 71
+	MsgTmkdir        MsgType = 72
+	MsgRmkdir        MsgType = 73
+	MsgTrenameat     MsgType = 74
+	MsgRrenameat     MsgType = 75
+	MsgTunlinkat     MsgType = 76
+	MsgRunlinkat     MsgType = 77
+	MsgTversion      MsgType = 100
+	MsgRversion      MsgType = 101
+	MsgTauth         MsgType = 102
+	MsgRauth         MsgType = 103
+	MsgTattach       MsgType = 104
+	MsgRattach       MsgType = 105
+	MsgTflush        MsgType = 108
+	MsgRflush        MsgType = 109
+	MsgTwalk         MsgType = 110
+	MsgRwalk         MsgType = 111
+	MsgTread         MsgType = 116
+	MsgRread         MsgType = 117
+	MsgTwrite        MsgType = 118
+	MsgRwrite        MsgType = 119
+	MsgTclunk        MsgType = 120
+	MsgRclunk        MsgType = 121
+	MsgTremove       MsgType = 122
+	MsgRremove       MsgType = 123
+	MsgTflushf       MsgType = 124
+	MsgRflushf       MsgType = 125
+	MsgTwalkgetattr  MsgType = 126
+	MsgRwalkgetattr  MsgType = 127
+	MsgTucreate      MsgType = 128
+	MsgRucreate      MsgType = 129
+	MsgTumkdir       MsgType = 130
+	MsgRumkdir       MsgType = 131
+	MsgTumknod       MsgType = 132
+	MsgRumknod       MsgType = 133
+	MsgTusymlink     MsgType = 134
+	MsgRusymlink     MsgType = 135
+	MsgTlconnect     MsgType = 136
+	MsgRlconnect     MsgType = 137
+	MsgTallocate     MsgType = 138
+	MsgRallocate     MsgType = 139
+	MsgTsetattrclunk MsgType = 140
+	MsgRsetattrclunk MsgType = 141
+	MsgTchannel      MsgType = 250
+	MsgRchannel      MsgType = 251
 )
 
 // QIDType represents the file type for QIDs.
diff --git a/pkg/p9/p9test/client_test.go b/pkg/p9/p9test/client_test.go
index 6e7bb3db21..6e605b14cc 100644
--- a/pkg/p9/p9test/client_test.go
+++ b/pkg/p9/p9test/client_test.go
@@ -1225,22 +1225,31 @@ func TestOpen(t *testing.T) {
 func TestClose(t *testing.T) {
 	type closeTest struct {
 		name    string
-		closeFn func(backend *Mock, f p9.File)
+		closeFn func(backend *Mock, f p9.File) error
 	}
 
 	cases := []closeTest{
 		{
 			name: "close",
-			closeFn: func(_ *Mock, f p9.File) {
-				f.Close()
+			closeFn: func(_ *Mock, f p9.File) error {
+				return f.Close()
 			},
 		},
 		{
 			name: "remove",
-			closeFn: func(backend *Mock, f p9.File) {
+			closeFn: func(backend *Mock, f p9.File) error {
 				// Allow the rename call in the parent, automatically translated.
 				backend.parent.EXPECT().UnlinkAt(gomock.Any(), gomock.Any()).Times(1)
-				f.(deprecatedRemover).Remove()
+				return f.(deprecatedRemover).Remove()
+			},
+		},
+		{
+			name: "setAttrClose",
+			closeFn: func(backend *Mock, f p9.File) error {
+				valid := p9.SetAttrMask{ATime: true}
+				attr := p9.SetAttr{ATimeSeconds: 1, ATimeNanoSeconds: 2}
+				backend.EXPECT().SetAttr(valid, attr).Times(1)
+				return f.SetAttrClose(valid, attr)
 			},
 		},
 	}
@@ -1258,7 +1267,9 @@ func TestClose(t *testing.T) {
 				_, backend, f := walkHelper(h, name, root)
 
 				// Close via the prescribed method.
-				tc.closeFn(backend, f)
+				if err := tc.closeFn(backend, f); err != nil {
+					t.Fatalf("closeFn failed: %v", err)
+				}
 
 				// Everything should fail with EBADF.
 				if _, _, err := f.Walk(nil); err != syscall.EBADF {
diff --git a/pkg/p9/version.go b/pkg/p9/version.go
index 09cde9f5a0..8d7168ef5f 100644
--- a/pkg/p9/version.go
+++ b/pkg/p9/version.go
@@ -26,7 +26,7 @@ const (
 	//
 	// Clients are expected to start requesting this version number and
 	// to continuously decrement it until a Tversion request succeeds.
-	highestSupportedVersion uint32 = 11
+	highestSupportedVersion uint32 = 12
 
 	// lowestSupportedVersion is the lowest supported version X in a
 	// version string of the format 9P2000.L.Google.X.
@@ -173,3 +173,9 @@ func versionSupportsGetSetXattr(v uint32) bool {
 func versionSupportsListRemoveXattr(v uint32) bool {
 	return v >= 11
 }
+
+// versionSupportsTsetattrclunk returns true if version v supports
+// the Tsetattrclunk message.
+func versionSupportsTsetattrclunk(v uint32) bool {
+	return v >= 12
+}
diff --git a/pkg/sentry/fsimpl/gofer/gofer.go b/pkg/sentry/fsimpl/gofer/gofer.go
index 57bff17894..73d9e772d1 100644
--- a/pkg/sentry/fsimpl/gofer/gofer.go
+++ b/pkg/sentry/fsimpl/gofer/gofer.go
@@ -1300,30 +1300,36 @@ func (d *dentry) destroyLocked(ctx context.Context) {
 	d.handleMu.Unlock()
 
 	if !d.file.isNil() {
+		valid := p9.SetAttrMask{}
+		attr := p9.SetAttr{}
 		if !d.isDeleted() {
 			// Write dirty timestamps back to the remote filesystem.
-			atimeDirty := atomic.LoadUint32(&d.atimeDirty) != 0
-			mtimeDirty := atomic.LoadUint32(&d.mtimeDirty) != 0
-			if atimeDirty || mtimeDirty {
+			if atomic.LoadUint32(&d.atimeDirty) != 0 {
+				valid.ATime = true
+				valid.ATimeNotSystemTime = true
 				atime := atomic.LoadInt64(&d.atime)
+				attr.ATimeSeconds = uint64(atime / 1e9)
+				attr.ATimeNanoSeconds = uint64(atime % 1e9)
+			}
+			if atomic.LoadUint32(&d.mtimeDirty) != 0 {
+				valid.MTime = true
+				valid.MTimeNotSystemTime = true
 				mtime := atomic.LoadInt64(&d.mtime)
-				if err := d.file.setAttr(ctx, p9.SetAttrMask{
-					ATime:              atimeDirty,
-					ATimeNotSystemTime: atimeDirty,
-					MTime:              mtimeDirty,
-					MTimeNotSystemTime: mtimeDirty,
-				}, p9.SetAttr{
-					ATimeSeconds:     uint64(atime / 1e9),
-					ATimeNanoSeconds: uint64(atime % 1e9),
-					MTimeSeconds:     uint64(mtime / 1e9),
-					MTimeNanoSeconds: uint64(mtime % 1e9),
-				}); err != nil {
-					log.Warningf("gofer.dentry.destroyLocked: failed to write dirty timestamps back: %v", err)
-				}
+				attr.MTimeSeconds = uint64(mtime / 1e9)
+				attr.MTimeNanoSeconds = uint64(mtime % 1e9)
+			}
+		}
+
+		// Check if attributes need to be changed before closing the file.
+		if valid.ATime || valid.MTime {
+			if err := d.file.setAttrClose(ctx, valid, attr); err != nil {
+				log.Warningf("gofer.dentry.destroyLocked: failed to close file with write dirty timestamps: %v", err)
 			}
+		} else if err := d.file.close(ctx); err != nil {
+			log.Warningf("gofer.dentry.destroyLocked: failed to close file: %v", err)
 		}
-		d.file.close(ctx)
 		d.file = p9file{}
+
 		// Remove d from the set of syncable dentries.
 		d.fs.syncMu.Lock()
 		delete(d.fs.syncableDentries, d)
diff --git a/pkg/sentry/fsimpl/gofer/p9file.go b/pkg/sentry/fsimpl/gofer/p9file.go
index 87f0b877f3..21b4a96fef 100644
--- a/pkg/sentry/fsimpl/gofer/p9file.go
+++ b/pkg/sentry/fsimpl/gofer/p9file.go
@@ -127,6 +127,13 @@ func (f p9file) close(ctx context.Context) error {
 	return err
 }
 
+func (f p9file) setAttrClose(ctx context.Context, valid p9.SetAttrMask, attr p9.SetAttr) error {
+	ctx.UninterruptibleSleepStart(false)
+	err := f.file.SetAttrClose(valid, attr)
+	ctx.UninterruptibleSleepFinish(false)
+	return err
+}
+
 func (f p9file) open(ctx context.Context, flags p9.OpenFlags) (*fd.FD, p9.QID, uint32, error) {
 	ctx.UninterruptibleSleepStart(false)
 	fdobj, qid, iounit, err := f.file.Open(flags)
diff --git a/runsc/fsgofer/fsgofer.go b/runsc/fsgofer/fsgofer.go
index b0788bd231..4268d97a15 100644
--- a/runsc/fsgofer/fsgofer.go
+++ b/runsc/fsgofer/fsgofer.go
@@ -181,6 +181,8 @@ func (a *attachPoint) makeQID(stat unix.Stat_t) p9.QID {
 // The few exceptions where it cannot be done are: utimensat on symlinks, and
 // Connect() for the socket address.
 type localFile struct {
+	p9.DisallowClientCalls
+
 	// attachPoint is the attachPoint that serves this localFile.
 	attachPoint *attachPoint
 
diff --git a/test/perf/linux/BUILD b/test/perf/linux/BUILD
index b4e9078269..dd1d2438cb 100644
--- a/test/perf/linux/BUILD
+++ b/test/perf/linux/BUILD
@@ -354,3 +354,19 @@ cc_binary(
         "//test/util:test_util",
     ],
 )
+
+cc_binary(
+    name = "open_read_close_benchmark",
+    testonly = 1,
+    srcs = [
+        "open_read_close_benchmark.cc",
+    ],
+    deps = [
+        gbenchmark,
+        gtest,
+        "//test/util:fs_util",
+        "//test/util:logging",
+        "//test/util:temp_path",
+        "//test/util:test_main",
+    ],
+)
diff --git a/test/perf/linux/open_read_close_benchmark.cc b/test/perf/linux/open_read_close_benchmark.cc
new file mode 100644
index 0000000000..8b023a3d81
--- /dev/null
+++ b/test/perf/linux/open_read_close_benchmark.cc
@@ -0,0 +1,61 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <fcntl.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "benchmark/benchmark.h"
+#include "test/util/fs_util.h"
+#include "test/util/logging.h"
+#include "test/util/temp_path.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+void BM_OpenReadClose(benchmark::State& state) {
+  const int size = state.range(0);
+  std::vector<TempPath> cache;
+  for (int i = 0; i < size; i++) {
+    auto path = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+        GetAbsoluteTestTmpdir(), "some content", 0644));
+    cache.emplace_back(std::move(path));
+  }
+
+  char buf[1];
+  unsigned int seed = 1;
+  for (auto _ : state) {
+    const int chosen = rand_r(&seed) % size;
+    int fd = open(cache[chosen].path().c_str(), O_RDONLY);
+    TEST_CHECK(fd != -1);
+    TEST_CHECK(read(fd, buf, 1) == 1);
+    close(fd);
+  }
+}
+
+// Gofer dentry cache is 1000 by default. Go over it to force files to be closed
+// for real.
+BENCHMARK(BM_OpenReadClose)->Range(1000, 16384)->UseRealTime();
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor

From d19d0d44f6b910f777d3ddd880cef21324758bf2 Mon Sep 17 00:00:00 2001
From: Zach Koopmans <zkoopmans@google.com>
Date: Tue, 1 Sep 2020 21:01:33 -0700
Subject: [PATCH 133/211] Fix statfs test for opensource.

PiperOrigin-RevId: 329638946
---
 test/syscalls/linux/statfs.cc | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/test/syscalls/linux/statfs.cc b/test/syscalls/linux/statfs.cc
index 49f2f156c7..99ab280fd2 100644
--- a/test/syscalls/linux/statfs.cc
+++ b/test/syscalls/linux/statfs.cc
@@ -43,9 +43,6 @@ TEST(StatfsTest, InternalTmpfs) {
 
   struct statfs st;
   EXPECT_THAT(statfs(temp_file.path().c_str(), &st), SyscallSucceeds());
-  // Note: We could be an overlay or goferfs on some configurations.
-  EXPECT_TRUE(st.f_type == TMPFS_MAGIC || st.f_type == OVERLAYFS_SUPER_MAGIC ||
-              st.f_type == V9FS_MAGIC);
 }
 
 TEST(StatfsTest, InternalDevShm) {

From e56e26eda28c14a3237678ce5610f03989b19736 Mon Sep 17 00:00:00 2001
From: Tiwei Bie <tiwei.btw@antgroup.com>
Date: Tue, 1 Sep 2020 14:55:57 +0800
Subject: [PATCH 134/211] Dup stdio FDs for VFS2 when starting a child
 container

Currently the stdio FDs are not dupped and will be closed
unexpectedly in VFS2 when starting a child container. This
patch fixes this issue.

Fixes: #3821

Signed-off-by: Tiwei Bie <tiwei.btw@antgroup.com>
---
 runsc/boot/loader.go | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go
index c3c754046c..882cf270b3 100644
--- a/runsc/boot/loader.go
+++ b/runsc/boot/loader.go
@@ -689,9 +689,18 @@ func (l *Loader) startContainer(spec *specs.Spec, conf *config.Config, cid strin
 		return fmt.Errorf("creating new process: %v", err)
 	}
 
-	// setupContainerFS() dups stdioFDs, so we don't need to dup them here.
+	// VFS1 dups stdioFDs, so we don't need to dup them here. VFS2 takes
+	// ownership of the passed FDs, and we need to dup them here.
 	for _, f := range files[:3] {
-		info.stdioFDs = append(info.stdioFDs, int(f.Fd()))
+		if !kernel.VFS2Enabled {
+			info.stdioFDs = append(info.stdioFDs, int(f.Fd()))
+		} else {
+			fd, err := unix.Dup(int(f.Fd()))
+			if err != nil {
+				return fmt.Errorf("failed to dup file: %v", err)
+			}
+			info.stdioFDs = append(info.stdioFDs, fd)
+		}
 	}
 
 	// Can't take ownership away from os.File. dup them to get a new FDs.

From 1b5bbc0946aca8cc9bdf92aa66e1c6d09d10168a Mon Sep 17 00:00:00 2001
From: Ayush Ranjan <ayushranjan@google.com>
Date: Wed, 2 Sep 2020 11:11:23 -0700
Subject: [PATCH 135/211] [runtime tests] Exclude flaky nodejs test

PiperOrigin-RevId: 329749191
---
 test/runtimes/exclude_nodejs12.4.0.csv | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/test/runtimes/exclude_nodejs12.4.0.csv b/test/runtimes/exclude_nodejs12.4.0.csv
index 5866ee56dd..1740dbb76b 100644
--- a/test/runtimes/exclude_nodejs12.4.0.csv
+++ b/test/runtimes/exclude_nodejs12.4.0.csv
@@ -11,8 +11,9 @@ parallel/test-dgram-socket-buffer-size.js,b/68847921,
 parallel/test-dns-channel-timeout.js,b/161893056,
 parallel/test-fs-access.js,,
 parallel/test-fs-watchfile.js,,Flaky - File already exists error
-parallel/test-fs-write-stream.js,,Flaky
-parallel/test-fs-write-stream-throw-type-error.js,b/110226209,
+parallel/test-fs-write-stream.js,b/166819807,Flaky
+parallel/test-fs-write-stream-double-close,b/166819807,Flaky
+parallel/test-fs-write-stream-throw-type-error.js,b/166819807,Flaky
 parallel/test-http-writable-true-after-close.js,,Flaky - Mismatched <anonymous> function calls. Expected exactly 1 actual 2
 parallel/test-os.js,b/63997097,
 parallel/test-net-server-listen-options.js,,Flaky - EADDRINUSE

From 661a08e188b1d8d7bf4d5ad4c2eafa61057ffd6a Mon Sep 17 00:00:00 2001
From: Zach Koopmans <zkoopmans@google.com>
Date: Wed, 2 Sep 2020 11:20:45 -0700
Subject: [PATCH 136/211] Add Docs to nginx benchmark.

Adds docs to nginx and refactors both Httpd and Nginx benchmarks.

Key changes:
- Add docs and make nginx tests the same as httpd (reverse, all docs, etc.).
- Make requests scale on c * b.N -> a request per thread. This works well
with both --test.benchtime=10m (do a run that lasts at least 10m) and
--test.benchtime=10x (do b.N = 10).
-- Remove a doc from both tests (1000Kb) as 1024Kb exists.

PiperOrigin-RevId: 329751091
---
 images/benchmarks/httpd/Dockerfile       |   2 +-
 images/benchmarks/nginx/Dockerfile       |  10 ++
 images/benchmarks/nginx/nginx.conf       |  19 ++++
 test/benchmarks/network/BUILD            |  11 ++-
 test/benchmarks/network/httpd_test.go    | 110 +++-------------------
 test/benchmarks/network/nginx_test.go    | 113 ++++++++++-------------
 test/benchmarks/network/static_server.go |  88 ++++++++++++++++++
 7 files changed, 193 insertions(+), 160 deletions(-)
 create mode 100644 images/benchmarks/nginx/nginx.conf
 create mode 100644 test/benchmarks/network/static_server.go

diff --git a/images/benchmarks/httpd/Dockerfile b/images/benchmarks/httpd/Dockerfile
index b724060126..e95538a403 100644
--- a/images/benchmarks/httpd/Dockerfile
+++ b/images/benchmarks/httpd/Dockerfile
@@ -8,7 +8,7 @@ RUN set -x \
 
 # Generate a bunch of relevant files.
 RUN mkdir -p /local && \
-        for size in 1 10 100 1000 1024 10240; do \
+        for size in 1 10 100 1024 10240; do \
                 dd if=/dev/zero of=/local/latin${size}k.txt count=${size} bs=1024; \
         done
 
diff --git a/images/benchmarks/nginx/Dockerfile b/images/benchmarks/nginx/Dockerfile
index b64eb52aed..2444d04b19 100644
--- a/images/benchmarks/nginx/Dockerfile
+++ b/images/benchmarks/nginx/Dockerfile
@@ -1 +1,11 @@
 FROM nginx:1.15.10
+
+# Generate a bunch of relevant files.
+RUN mkdir -p /local && \
+        for size in 1 10 100 1024 10240; do \
+                dd if=/dev/zero of=/local/latin${size}k.txt count=${size} bs=1024; \
+        done
+
+RUN touch /local/index.html
+
+COPY ./nginx.conf /etc/nginx/nginx.conf
diff --git a/images/benchmarks/nginx/nginx.conf b/images/benchmarks/nginx/nginx.conf
new file mode 100644
index 0000000000..2c43c0cda8
--- /dev/null
+++ b/images/benchmarks/nginx/nginx.conf
@@ -0,0 +1,19 @@
+user  nginx;
+worker_processes  1;
+daemon off;
+
+error_log  /var/log/nginx/error.log warn;
+pid        /var/run/nginx.pid;
+
+events {
+    worker_connections  1024;
+}
+
+
+http {
+    server {
+      location / {
+        root /tmp/html;
+      }
+    }
+}
diff --git a/test/benchmarks/network/BUILD b/test/benchmarks/network/BUILD
index bd3f6245c9..472b5c3872 100644
--- a/test/benchmarks/network/BUILD
+++ b/test/benchmarks/network/BUILD
@@ -5,8 +5,15 @@ package(licenses = ["notice"])
 go_library(
     name = "network",
     testonly = 1,
-    srcs = ["network.go"],
-    deps = ["//test/benchmarks/harness"],
+    srcs = [
+        "network.go",
+        "static_server.go",
+    ],
+    deps = [
+        "//pkg/test/dockerutil",
+        "//test/benchmarks/harness",
+        "//test/benchmarks/tools",
+    ],
 )
 
 go_test(
diff --git a/test/benchmarks/network/httpd_test.go b/test/benchmarks/network/httpd_test.go
index 336e04c919..369ab326e2 100644
--- a/test/benchmarks/network/httpd_test.go
+++ b/test/benchmarks/network/httpd_test.go
@@ -14,22 +14,19 @@
 package network
 
 import (
-	"context"
 	"fmt"
 	"testing"
 
 	"gvisor.dev/gvisor/pkg/test/dockerutil"
-	"gvisor.dev/gvisor/test/benchmarks/harness"
 	"gvisor.dev/gvisor/test/benchmarks/tools"
 )
 
 // see Dockerfile '//images/benchmarks/httpd'.
-var docs = map[string]string{
+var httpdDocs = map[string]string{
 	"notfound": "notfound",
 	"1Kb":      "latin1k.txt",
 	"10Kb":     "latin10k.txt",
 	"100Kb":    "latin100k.txt",
-	"1000Kb":   "latin1000k.txt",
 	"1Mb":      "latin1024k.txt",
 	"10Mb":     "latin10240k.txt",
 }
@@ -37,30 +34,17 @@ var docs = map[string]string{
 // BenchmarkHttpdConcurrency iterates the concurrency argument and tests
 // how well the runtime under test handles requests in parallel.
 func BenchmarkHttpdConcurrency(b *testing.B) {
-	// Grab a machine for the client and server.
-	clientMachine, err := h.GetMachine()
-	if err != nil {
-		b.Fatalf("failed to get client: %v", err)
-	}
-	defer clientMachine.CleanUp()
-
-	serverMachine, err := h.GetMachine()
-	if err != nil {
-		b.Fatalf("failed to get server: %v", err)
-	}
-	defer serverMachine.CleanUp()
-
 	// The test iterates over client concurrency, so set other parameters.
 	concurrency := []int{1, 25, 50, 100, 1000}
 
 	for _, c := range concurrency {
 		b.Run(fmt.Sprintf("%d", c), func(b *testing.B) {
 			hey := &tools.Hey{
-				Requests:    10000,
+				Requests:    c * b.N,
 				Concurrency: c,
-				Doc:         docs["10Kb"],
+				Doc:         httpdDocs["10Kb"],
 			}
-			runHttpd(b, clientMachine, serverMachine, hey, false /* reverse */)
+			runHttpd(b, hey, false /* reverse */)
 		})
 	}
 }
@@ -77,57 +61,30 @@ func BenchmarkReverseHttpdDocSize(b *testing.B) {
 	benchmarkHttpdDocSize(b, true /* reverse */)
 }
 
+// benchmarkHttpdDocSize iterates through all doc sizes, running subbenchmarks
+// for each size.
 func benchmarkHttpdDocSize(b *testing.B, reverse bool) {
 	b.Helper()
-
-	clientMachine, err := h.GetMachine()
-	if err != nil {
-		b.Fatalf("failed to get machine: %v", err)
-	}
-	defer clientMachine.CleanUp()
-
-	serverMachine, err := h.GetMachine()
-	if err != nil {
-		b.Fatalf("failed to get machine: %v", err)
-	}
-	defer serverMachine.CleanUp()
-
-	for name, filename := range docs {
+	for name, filename := range httpdDocs {
 		concurrency := []int{1, 25, 50, 100, 1000}
 		for _, c := range concurrency {
 			b.Run(fmt.Sprintf("%s_%d", name, c), func(b *testing.B) {
 				hey := &tools.Hey{
-					Requests:    10000,
+					Requests:    c * b.N,
 					Concurrency: c,
 					Doc:         filename,
 				}
-				runHttpd(b, clientMachine, serverMachine, hey, reverse)
+				runHttpd(b, hey, reverse)
 			})
 		}
 	}
 }
 
-// runHttpd runs a single test run.
-func runHttpd(b *testing.B, clientMachine, serverMachine harness.Machine, hey *tools.Hey, reverse bool) {
-	b.Helper()
-
-	// Grab a container from the server.
-	ctx := context.Background()
-	var server *dockerutil.Container
-	if reverse {
-		server = serverMachine.GetNativeContainer(ctx, b)
-	} else {
-		server = serverMachine.GetContainer(ctx, b)
-	}
-
-	defer server.CleanUp(ctx)
-
-	// Copy the docs to /tmp and serve from there.
-	cmd := "mkdir -p /tmp/html; cp -r /local/* /tmp/html/.; apache2 -X"
+// runHttpd configures the static serving methods to run httpd.
+func runHttpd(b *testing.B, hey *tools.Hey, reverse bool) {
+	// httpd runs on port 80.
 	port := 80
-
-	// Start the server.
-	if err := server.Spawn(ctx, dockerutil.RunOpts{
+	httpdRunOpts := dockerutil.RunOpts{
 		Image: "benchmarks/httpd",
 		Ports: []int{port},
 		Env: []string{
@@ -138,44 +95,7 @@ func runHttpd(b *testing.B, clientMachine, serverMachine harness.Machine, hey *t
 			"APACHE_LOG_DIR=/tmp",
 			"APACHE_PID_FILE=/tmp/apache.pid",
 		},
-	}, "sh", "-c", cmd); err != nil {
-		b.Fatalf("failed to start server: %v", err)
-	}
-
-	ip, err := serverMachine.IPAddress()
-	if err != nil {
-		b.Fatalf("failed to find server ip: %v", err)
-	}
-
-	servingPort, err := server.FindPort(ctx, port)
-	if err != nil {
-		b.Fatalf("failed to find server port %d: %v", port, err)
-	}
-
-	// Check the server is serving.
-	harness.WaitUntilServing(ctx, clientMachine, ip, servingPort)
-
-	var client *dockerutil.Container
-	// Grab a client.
-	if reverse {
-		client = clientMachine.GetContainer(ctx, b)
-	} else {
-		client = clientMachine.GetNativeContainer(ctx, b)
-	}
-	defer client.CleanUp(ctx)
-
-	b.ResetTimer()
-	server.RestartProfiles()
-	for i := 0; i < b.N; i++ {
-		out, err := client.Run(ctx, dockerutil.RunOpts{
-			Image: "benchmarks/hey",
-		}, hey.MakeCmd(ip, servingPort)...)
-		if err != nil {
-			b.Fatalf("run failed with: %v", err)
-		}
-
-		b.StopTimer()
-		hey.Report(b, out)
-		b.StartTimer()
 	}
+	httpdCmd := []string{"sh", "-c", "mkdir -p /tmp/html; cp -r /local/* /tmp/html/.; apache2 -X"}
+	runStaticServer(b, httpdRunOpts, httpdCmd, port, hey, reverse)
 }
diff --git a/test/benchmarks/network/nginx_test.go b/test/benchmarks/network/nginx_test.go
index 2bf1a36245..036fd666fe 100644
--- a/test/benchmarks/network/nginx_test.go
+++ b/test/benchmarks/network/nginx_test.go
@@ -14,91 +14,80 @@
 package network
 
 import (
-	"context"
 	"fmt"
 	"testing"
 
 	"gvisor.dev/gvisor/pkg/test/dockerutil"
-	"gvisor.dev/gvisor/test/benchmarks/harness"
 	"gvisor.dev/gvisor/test/benchmarks/tools"
 )
 
+// see Dockerfile '//images/benchmarks/nginx'.
+var nginxDocs = map[string]string{
+	"notfound": "notfound",
+	"1Kb":      "latin1k.txt",
+	"10Kb":     "latin10k.txt",
+	"100Kb":    "latin100k.txt",
+	"1Mb":      "latin1024k.txt",
+	"10Mb":     "latin10240k.txt",
+}
+
 // BenchmarkNginxConcurrency iterates the concurrency argument and tests
 // how well the runtime under test handles requests in parallel.
-// TODO(gvisor.dev/issue/3536): Update with different doc sizes like Httpd.
 func BenchmarkNginxConcurrency(b *testing.B) {
-	// Grab a machine for the client and server.
-	clientMachine, err := h.GetMachine()
-	if err != nil {
-		b.Fatalf("failed to get client: %v", err)
-	}
-	defer clientMachine.CleanUp()
-
-	serverMachine, err := h.GetMachine()
-	if err != nil {
-		b.Fatalf("failed to get server: %v", err)
-	}
-	defer serverMachine.CleanUp()
-
-	concurrency := []int{1, 5, 10, 25}
+	concurrency := []int{1, 25, 100, 1000}
 	for _, c := range concurrency {
 		b.Run(fmt.Sprintf("%d", c), func(b *testing.B) {
 			hey := &tools.Hey{
-				Requests:    10000,
+				Requests:    c * b.N,
 				Concurrency: c,
+				Doc:         nginxDocs["10kb"], // see Dockerfile '//images/benchmarks/nginx' and httpd_test.
 			}
-			runNginx(b, clientMachine, serverMachine, hey)
+			runNginx(b, hey, false /* reverse */)
 		})
 	}
 }
 
-// runHttpd runs a single test run.
-func runNginx(b *testing.B, clientMachine, serverMachine harness.Machine, hey *tools.Hey) {
-	b.Helper()
-
-	// Grab a container from the server.
-	ctx := context.Background()
-	server := serverMachine.GetContainer(ctx, b)
-	defer server.CleanUp(ctx)
+// BenchmarkNginxDocSize iterates over different sized payloads, testing how
+// well the runtime handles sending different payload sizes.
+func BenchmarkNginxDocSize(b *testing.B) {
+	benchmarkHttpdDocSize(b, false /* reverse */)
+}
 
-	port := 80
-	// Start the server.
-	if err := server.Spawn(ctx,
-		dockerutil.RunOpts{
-			Image: "benchmarks/nginx",
-			Ports: []int{port},
-		}); err != nil {
-		b.Fatalf("server failed to start: %v", err)
-	}
+// BenchmarkReverseNginxDocSize iterates over different sized payloads, testing
+// how well the runtime handles receiving different payload sizes.
+func BenchmarkReverseNginxDocSize(b *testing.B) {
+	benchmarkHttpdDocSize(b, true /* reverse */)
+}
 
-	ip, err := serverMachine.IPAddress()
-	if err != nil {
-		b.Fatalf("failed to find server ip: %v", err)
+// benchmarkNginxDocSize iterates through all doc sizes, running subbenchmarks
+// for each size.
+func benchmarkNginxDocSize(b *testing.B, reverse bool) {
+	b.Helper()
+	for name, filename := range nginxDocs {
+		concurrency := []int{1, 25, 50, 100, 1000}
+		for _, c := range concurrency {
+			b.Run(fmt.Sprintf("%s_%d", name, c), func(b *testing.B) {
+				hey := &tools.Hey{
+					Requests:    c * b.N,
+					Concurrency: c,
+					Doc:         filename,
+				}
+				runNginx(b, hey, reverse)
+			})
+		}
 	}
+}
 
-	servingPort, err := server.FindPort(ctx, port)
-	if err != nil {
-		b.Fatalf("failed to find server port %d: %v", port, err)
+// runNginx configures the static serving methods to run httpd.
+func runNginx(b *testing.B, hey *tools.Hey, reverse bool) {
+	// nginx runs on port 80.
+	port := 80
+	nginxRunOpts := dockerutil.RunOpts{
+		Image: "benchmarks/nginx",
+		Ports: []int{port},
 	}
 
-	// Check the server is serving.
-	harness.WaitUntilServing(ctx, clientMachine, ip, servingPort)
-
-	// Grab a client.
-	client := clientMachine.GetNativeContainer(ctx, b)
-	defer client.CleanUp(ctx)
-
-	b.ResetTimer()
-	server.RestartProfiles()
-	for i := 0; i < b.N; i++ {
-		out, err := client.Run(ctx, dockerutil.RunOpts{
-			Image: "benchmarks/hey",
-		}, hey.MakeCmd(ip, servingPort)...)
-		if err != nil {
-			b.Fatalf("run failed with: %v", err)
-		}
-		b.StopTimer()
-		hey.Report(b, out)
-		b.StartTimer()
-	}
+	// Command copies nginxDocs to tmpfs serving directory and runs nginx.
+	nginxCmd := []string{"sh", "-c", "mkdir -p /tmp/html && cp -a /local/* /tmp/html && nginx"}
+	runStaticServer(b, nginxRunOpts, nginxCmd, port, hey, reverse)
 }
diff --git a/test/benchmarks/network/static_server.go b/test/benchmarks/network/static_server.go
new file mode 100644
index 0000000000..3ef62a71fa
--- /dev/null
+++ b/test/benchmarks/network/static_server.go
@@ -0,0 +1,88 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package network
+
+import (
+	"context"
+	"testing"
+
+	"gvisor.dev/gvisor/pkg/test/dockerutil"
+	"gvisor.dev/gvisor/test/benchmarks/harness"
+	"gvisor.dev/gvisor/test/benchmarks/tools"
+)
+
+// runStaticServer runs static serving workloads (httpd, nginx).
+func runStaticServer(b *testing.B, serverOpts dockerutil.RunOpts, serverCmd []string, port int, hey *tools.Hey, reverse bool) {
+	b.Helper()
+	ctx := context.Background()
+
+	// Get two machines: a client and server.
+	clientMachine, err := h.GetMachine()
+	if err != nil {
+		b.Fatalf("failed to get machine: %v", err)
+	}
+	defer clientMachine.CleanUp()
+
+	serverMachine, err := h.GetMachine()
+	if err != nil {
+		b.Fatalf("failed to get machine: %v", err)
+	}
+	defer serverMachine.CleanUp()
+
+	// Make the containers. 'reverse=true' specifies that the client should use the
+	// runtime under test.
+	var client, server *dockerutil.Container
+	if reverse {
+		client = clientMachine.GetContainer(ctx, b)
+		server = serverMachine.GetNativeContainer(ctx, b)
+	} else {
+		client = clientMachine.GetNativeContainer(ctx, b)
+		server = serverMachine.GetContainer(ctx, b)
+	}
+	defer client.CleanUp(ctx)
+	defer server.CleanUp(ctx)
+
+	// Start the server.
+	if err := server.Spawn(ctx, serverOpts, serverCmd...); err != nil {
+		b.Fatalf("failed to start server: %v", err)
+	}
+
+	// Get its IP.
+	ip, err := serverMachine.IPAddress()
+	if err != nil {
+		b.Fatalf("failed to find server ip: %v", err)
+	}
+
+	// Get the published port.
+	servingPort, err := server.FindPort(ctx, port)
+	if err != nil {
+		b.Fatalf("failed to find server port %d: %v", port, err)
+	}
+
+	// Make sure the server is serving.
+	harness.WaitUntilServing(ctx, clientMachine, ip, servingPort)
+	b.ResetTimer()
+	server.RestartProfiles()
+	out, err := client.Run(ctx, dockerutil.RunOpts{
+		Image: "benchmarks/hey",
+	}, hey.MakeCmd(ip, servingPort)...)
+	if err != nil {
+		b.Fatalf("run failed with: %v", err)
+	}
+
+	b.StopTimer()
+	hey.Report(b, out)
+	b.StartTimer()
+}

From 095c3ad9b1c64b36578c2ead86b62dcbafc268e2 Mon Sep 17 00:00:00 2001
From: Jamie Liu <jamieliu@google.com>
Date: Wed, 2 Sep 2020 11:35:18 -0700
Subject: [PATCH 137/211] Improve sync.SeqCount performance.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Make sync.SeqCountEpoch not a struct. This allows sync.SeqCount.BeginRead()
  to be inlined.

- Mark sync.SeqAtomicLoad<T> nosplit to mitigate the Go compiler's refusal to
  inline it. (Best I could get was "cost 92 exceeds budget 80".)

- Use runtime-guided spinning in SeqCount.BeginRead().

Benchmarks:
name                               old time/op  new time/op   delta
pkg:pkg/sync/sync goos:linux goarch:amd64
SeqCountWriteUncontended-12        8.24ns ± 0%  11.40ns ± 0%  +38.35%  (p=0.000 n=10+10)
SeqCountReadUncontended-12         0.33ns ± 0%   0.14ns ± 3%  -57.77%  (p=0.000 n=7+8)
pkg:pkg/sync/seqatomictest/seqatomic goos:linux goarch:amd64
SeqAtomicLoadIntUncontended-12     0.64ns ± 1%   0.41ns ± 1%  -36.40%  (p=0.000 n=10+8)
SeqAtomicTryLoadIntUncontended-12  0.18ns ± 4%   0.18ns ± 1%     ~     (p=0.206 n=10+8)
AtomicValueLoadIntUncontended-12   0.27ns ± 3%   0.27ns ± 0%   -1.77%  (p=0.000 n=10+8)

(atomic.Value.Load is, of course, inlined. We would expect an uncontended
inline SeqAtomicLoad<int> to perform identically to SeqAtomicTryLoad<int>.) The
"regression" in BenchmarkSeqCountWriteUncontended, despite this CL changing
nothing in that path, is attributed to microarchitectural subtlety; the
benchmark loop is unchanged except for its address:

Before this CL:
  :0                    0x4e62d1                48ffc2                  INCQ DX
  :0                    0x4e62d4                48399110010000          CMPQ DX, 0x110(CX)
  :0                    0x4e62db                7e26                    JLE 0x4e6303
  :0                    0x4e62dd                90                      NOPL
  :0                    0x4e62de                bb01000000              MOVL $0x1, BX
  :0                    0x4e62e3                f00fc118                LOCK XADDL BX, 0(AX)
  :0                    0x4e62e7                ffc3                    INCL BX
  :0                    0x4e62e9                0fbae300                BTL $0x0, BX
  :0                    0x4e62ed                733a                    JAE 0x4e6329
  :0                    0x4e62ef                90                      NOPL
  :0                    0x4e62f0                bb01000000              MOVL $0x1, BX
  :0                    0x4e62f5                f00fc118                LOCK XADDL BX, 0(AX)
  :0                    0x4e62f9                ffc3                    INCL BX
  :0                    0x4e62fb                0fbae300                BTL $0x0, BX
  :0                    0x4e62ff                73d0                    JAE 0x4e62d1

After this CL:
  :0                    0x4e6361                48ffc2                  INCQ DX
  :0                    0x4e6364                48399110010000          CMPQ DX, 0x110(CX)
  :0                    0x4e636b                7e26                    JLE 0x4e6393
  :0                    0x4e636d                90                      NOPL
  :0                    0x4e636e                bb01000000              MOVL $0x1, BX
  :0                    0x4e6373                f00fc118                LOCK XADDL BX, 0(AX)
  :0                    0x4e6377                ffc3                    INCL BX
  :0                    0x4e6379                0fbae300                BTL $0x0, BX
  :0                    0x4e637d                733a                    JAE 0x4e63b9
  :0                    0x4e637f                90                      NOPL
  :0                    0x4e6380                bb01000000              MOVL $0x1, BX
  :0                    0x4e6385                f00fc118                LOCK XADDL BX, 0(AX)
  :0                    0x4e6389                ffc3                    INCL BX
  :0                    0x4e638b                0fbae300                BTL $0x0, BX
  :0                    0x4e638f                73d0                    JAE 0x4e6361

PiperOrigin-RevId: 329754148
---
 pkg/sync/BUILD               |  1 +
 pkg/sync/seqatomic_unsafe.go | 40 +++++++++++++++---------------------
 pkg/sync/seqcount.go         | 30 ++++++++++++++++++---------
 pkg/sync/spin_unsafe.go      | 24 ++++++++++++++++++++++
 4 files changed, 62 insertions(+), 33 deletions(-)
 create mode 100644 pkg/sync/spin_unsafe.go

diff --git a/pkg/sync/BUILD b/pkg/sync/BUILD
index 4d47207f79..68535c3b15 100644
--- a/pkg/sync/BUILD
+++ b/pkg/sync/BUILD
@@ -38,6 +38,7 @@ go_library(
         "race_unsafe.go",
         "rwmutex_unsafe.go",
         "seqcount.go",
+        "spin_unsafe.go",
         "sync.go",
     ],
     marshal = False,
diff --git a/pkg/sync/seqatomic_unsafe.go b/pkg/sync/seqatomic_unsafe.go
index eda6fb131f..2184cb5ab2 100644
--- a/pkg/sync/seqatomic_unsafe.go
+++ b/pkg/sync/seqatomic_unsafe.go
@@ -25,41 +25,35 @@ import (
 type Value struct{}
 
 // SeqAtomicLoad returns a copy of *ptr, ensuring that the read does not race
-// with any writer critical sections in sc.
-func SeqAtomicLoad(sc *sync.SeqCount, ptr *Value) Value {
-	// This function doesn't use SeqAtomicTryLoad because doing so is
-	// measurably, significantly (~20%) slower; Go is awful at inlining.
-	var val Value
+// with any writer critical sections in seq.
+//
+//go:nosplit
+func SeqAtomicLoad(seq *sync.SeqCount, ptr *Value) Value {
 	for {
-		epoch := sc.BeginRead()
-		if sync.RaceEnabled {
-			// runtime.RaceDisable() doesn't actually stop the race detector,
-			// so it can't help us here. Instead, call runtime.memmove
-			// directly, which is not instrumented by the race detector.
-			sync.Memmove(unsafe.Pointer(&val), unsafe.Pointer(ptr), unsafe.Sizeof(val))
-		} else {
-			// This is ~40% faster for short reads than going through memmove.
-			val = *ptr
-		}
-		if sc.ReadOk(epoch) {
-			break
+		if val, ok := SeqAtomicTryLoad(seq, seq.BeginRead(), ptr); ok {
+			return val
 		}
 	}
-	return val
 }
 
 // SeqAtomicTryLoad returns a copy of *ptr while in a reader critical section
-// in sc initiated by a call to sc.BeginRead() that returned epoch. If the read
-// would race with a writer critical section, SeqAtomicTryLoad returns
+// in seq initiated by a call to seq.BeginRead() that returned epoch. If the
+// read would race with a writer critical section, SeqAtomicTryLoad returns
 // (unspecified, false).
-func SeqAtomicTryLoad(sc *sync.SeqCount, epoch sync.SeqCountEpoch, ptr *Value) (Value, bool) {
-	var val Value
+//
+//go:nosplit
+func SeqAtomicTryLoad(seq *sync.SeqCount, epoch sync.SeqCountEpoch, ptr *Value) (val Value, ok bool) {
 	if sync.RaceEnabled {
+		// runtime.RaceDisable() doesn't actually stop the race detector, so it
+		// can't help us here. Instead, call runtime.memmove directly, which is
+		// not instrumented by the race detector.
 		sync.Memmove(unsafe.Pointer(&val), unsafe.Pointer(ptr), unsafe.Sizeof(val))
 	} else {
+		// This is ~40% faster for short reads than going through memmove.
 		val = *ptr
 	}
-	return val, sc.ReadOk(epoch)
+	ok = seq.ReadOk(epoch)
+	return
 }
 
 func init() {
diff --git a/pkg/sync/seqcount.go b/pkg/sync/seqcount.go
index a1e8953521..2c5d3df99b 100644
--- a/pkg/sync/seqcount.go
+++ b/pkg/sync/seqcount.go
@@ -8,7 +8,6 @@ package sync
 import (
 	"fmt"
 	"reflect"
-	"runtime"
 	"sync/atomic"
 )
 
@@ -43,9 +42,7 @@ type SeqCount struct {
 }
 
 // SeqCountEpoch tracks writer critical sections in a SeqCount.
-type SeqCountEpoch struct {
-	val uint32
-}
+type SeqCountEpoch uint32
 
 // We assume that:
 //
@@ -83,12 +80,25 @@ type SeqCountEpoch struct {
 // using this pattern. Most users of SeqCount will need to use the
 // SeqAtomicLoad function template in seqatomic.go.
 func (s *SeqCount) BeginRead() SeqCountEpoch {
-	epoch := atomic.LoadUint32(&s.epoch)
-	for epoch&1 != 0 {
-		runtime.Gosched()
-		epoch = atomic.LoadUint32(&s.epoch)
+	if epoch := atomic.LoadUint32(&s.epoch); epoch&1 == 0 {
+		return SeqCountEpoch(epoch)
+	}
+	return s.beginReadSlow()
+}
+
+func (s *SeqCount) beginReadSlow() SeqCountEpoch {
+	i := 0
+	for {
+		if canSpin(i) {
+			i++
+			doSpin()
+		} else {
+			goyield()
+		}
+		if epoch := atomic.LoadUint32(&s.epoch); epoch&1 == 0 {
+			return SeqCountEpoch(epoch)
+		}
 	}
-	return SeqCountEpoch{epoch}
 }
 
 // ReadOk returns true if the reader critical section initiated by a previous
@@ -99,7 +109,7 @@ func (s *SeqCount) BeginRead() SeqCountEpoch {
 // Reader critical sections do not need to be explicitly terminated; the last
 // call to ReadOk is implicitly the end of the reader critical section.
 func (s *SeqCount) ReadOk(epoch SeqCountEpoch) bool {
-	return atomic.LoadUint32(&s.epoch) == epoch.val
+	return atomic.LoadUint32(&s.epoch) == uint32(epoch)
 }
 
 // BeginWrite indicates the beginning of a writer critical section.
diff --git a/pkg/sync/spin_unsafe.go b/pkg/sync/spin_unsafe.go
new file mode 100644
index 0000000000..f721449e33
--- /dev/null
+++ b/pkg/sync/spin_unsafe.go
@@ -0,0 +1,24 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build go1.13
+// +build !go1.16
+
+// Check go:linkname function signatures when updating Go version.
+
+package sync
+
+import (
+	_ "unsafe" // for go:linkname
+)
+
+//go:linkname canSpin sync.runtime_canSpin
+func canSpin(i int) bool
+
+//go:linkname doSpin sync.runtime_doSpin
+func doSpin()
+
+//go:linkname goyield runtime.goyield
+func goyield()

From 9b7f3ce38ac818dcb9edde7eb7288de665d3de10 Mon Sep 17 00:00:00 2001
From: Jamie Liu <jamieliu@google.com>
Date: Wed, 2 Sep 2020 15:35:34 -0700
Subject: [PATCH 138/211] Update Go version constraint on sync/spin_unsafe.go.

PiperOrigin-RevId: 329801584
---
 pkg/sync/spin_unsafe.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pkg/sync/spin_unsafe.go b/pkg/sync/spin_unsafe.go
index f721449e33..cafb2d0654 100644
--- a/pkg/sync/spin_unsafe.go
+++ b/pkg/sync/spin_unsafe.go
@@ -4,7 +4,7 @@
 // license that can be found in the LICENSE file.
 
 // +build go1.13
-// +build !go1.16
+// +build !go1.17
 
 // Check go:linkname function signatures when updating Go version.
 

From 3e87c8e14d1552ec918256639797b07aa999bd0a Mon Sep 17 00:00:00 2001
From: Ayush Ranjan <ayushranjan@google.com>
Date: Wed, 2 Sep 2020 15:39:51 -0700
Subject: [PATCH 139/211] [vfs] Fix error handling in overlayfs OpenAt.

Updates #1199

PiperOrigin-RevId: 329802274
---
 pkg/sentry/fsimpl/overlay/filesystem.go | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/pkg/sentry/fsimpl/overlay/filesystem.go b/pkg/sentry/fsimpl/overlay/filesystem.go
index e720bfb0be..63df86481a 100644
--- a/pkg/sentry/fsimpl/overlay/filesystem.go
+++ b/pkg/sentry/fsimpl/overlay/filesystem.go
@@ -743,6 +743,9 @@ func (fs *filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vf
 
 	start := rp.Start().Impl().(*dentry)
 	if rp.Done() {
+		if mayCreate && rp.MustBeDir() {
+			return nil, syserror.EISDIR
+		}
 		if mustCreate {
 			return nil, syserror.EEXIST
 		}
@@ -766,6 +769,10 @@ afterTrailingSymlink:
 	if err := parent.checkPermissions(rp.Credentials(), vfs.MayExec); err != nil {
 		return nil, err
 	}
+	// Reject attempts to open directories with O_CREAT.
+	if mayCreate && rp.MustBeDir() {
+		return nil, syserror.EISDIR
+	}
 	// Determine whether or not we need to create a file.
 	parent.dirMu.Lock()
 	child, err := fs.stepLocked(ctx, rp, parent, false /* mayFollowSymlinks */, &ds)
@@ -774,12 +781,11 @@ afterTrailingSymlink:
 		parent.dirMu.Unlock()
 		return fd, err
 	}
+	parent.dirMu.Unlock()
 	if err != nil {
-		parent.dirMu.Unlock()
 		return nil, err
 	}
 	// Open existing child or follow symlink.
-	parent.dirMu.Unlock()
 	if mustCreate {
 		return nil, syserror.EEXIST
 	}
@@ -794,6 +800,9 @@ afterTrailingSymlink:
 		start = parent
 		goto afterTrailingSymlink
 	}
+	if rp.MustBeDir() && !child.isDir() {
+		return nil, syserror.ENOTDIR
+	}
 	if mayWrite {
 		if err := child.copyUpLocked(ctx); err != nil {
 			return nil, err

From f33077c875523d5f33edf8c395300e29e7c9e609 Mon Sep 17 00:00:00 2001
From: Ayush Ranjan <ayushranjan@google.com>
Date: Wed, 2 Sep 2020 17:56:30 -0700
Subject: [PATCH 140/211] [vfs] Implement xattr for overlayfs.

PiperOrigin-RevId: 329825497
---
 pkg/sentry/fs/inode_overlay.go          |   9 +-
 pkg/sentry/fsimpl/gofer/gofer.go        |  42 ++++-----
 pkg/sentry/fsimpl/overlay/copy_up.go    |  52 +++++++++-
 pkg/sentry/fsimpl/overlay/filesystem.go | 120 +++++++++++++++++++++---
 pkg/sentry/fsimpl/overlay/overlay.go    |  36 +++++++
 pkg/sentry/fsimpl/tmpfs/tmpfs.go        |  46 ++-------
 pkg/sentry/vfs/permissions.go           |  38 ++++++++
 7 files changed, 263 insertions(+), 80 deletions(-)

diff --git a/pkg/sentry/fs/inode_overlay.go b/pkg/sentry/fs/inode_overlay.go
index dc2e353d92..0a2d64e3a6 100644
--- a/pkg/sentry/fs/inode_overlay.go
+++ b/pkg/sentry/fs/inode_overlay.go
@@ -16,7 +16,6 @@ package fs
 
 import (
 	"fmt"
-	"strings"
 
 	"gvisor.dev/gvisor/pkg/abi/linux"
 	"gvisor.dev/gvisor/pkg/context"
@@ -539,7 +538,7 @@ func overlayGetXattr(ctx context.Context, o *overlayEntry, name string, size uin
 
 	// Don't forward the value of the extended attribute if it would
 	// unexpectedly change the behavior of a wrapping overlay layer.
-	if strings.HasPrefix(XattrOverlayPrefix, name) {
+	if isXattrOverlay(name) {
 		return "", syserror.ENODATA
 	}
 
@@ -555,7 +554,7 @@ func overlayGetXattr(ctx context.Context, o *overlayEntry, name string, size uin
 
 func overlaySetxattr(ctx context.Context, o *overlayEntry, d *Dirent, name, value string, flags uint32) error {
 	// Don't allow changes to overlay xattrs through a setxattr syscall.
-	if strings.HasPrefix(XattrOverlayPrefix, name) {
+	if isXattrOverlay(name) {
 		return syserror.EPERM
 	}
 
@@ -578,7 +577,7 @@ func overlayListXattr(ctx context.Context, o *overlayEntry, size uint64) (map[st
 	for name := range names {
 		// Same as overlayGetXattr, we shouldn't forward along
 		// overlay attributes.
-		if strings.HasPrefix(XattrOverlayPrefix, name) {
+		if isXattrOverlay(name) {
 			delete(names, name)
 		}
 	}
@@ -587,7 +586,7 @@ func overlayListXattr(ctx context.Context, o *overlayEntry, size uint64) (map[st
 
 func overlayRemoveXattr(ctx context.Context, o *overlayEntry, d *Dirent, name string) error {
 	// Don't allow changes to overlay xattrs through a removexattr syscall.
-	if strings.HasPrefix(XattrOverlayPrefix, name) {
+	if isXattrOverlay(name) {
 		return syserror.EPERM
 	}
 
diff --git a/pkg/sentry/fsimpl/gofer/gofer.go b/pkg/sentry/fsimpl/gofer/gofer.go
index 73d9e772d1..78b07f1b3d 100644
--- a/pkg/sentry/fsimpl/gofer/gofer.go
+++ b/pkg/sentry/fsimpl/gofer/gofer.go
@@ -1067,6 +1067,21 @@ func (d *dentry) checkPermissions(creds *auth.Credentials, ats vfs.AccessTypes)
 	return vfs.GenericCheckPermissions(creds, ats, linux.FileMode(atomic.LoadUint32(&d.mode)), auth.KUID(atomic.LoadUint32(&d.uid)), auth.KGID(atomic.LoadUint32(&d.gid)))
 }
 
+func (d *dentry) checkXattrPermissions(creds *auth.Credentials, name string, ats vfs.AccessTypes) error {
+	// We only support xattrs prefixed with "user." (see b/148380782). Currently,
+	// there is no need to expose any other xattrs through a gofer.
+	if !strings.HasPrefix(name, linux.XATTR_USER_PREFIX) {
+		return syserror.EOPNOTSUPP
+	}
+	mode := linux.FileMode(atomic.LoadUint32(&d.mode))
+	kuid := auth.KUID(atomic.LoadUint32(&d.uid))
+	kgid := auth.KGID(atomic.LoadUint32(&d.gid))
+	if err := vfs.GenericCheckPermissions(creds, ats, mode, kuid, kgid); err != nil {
+		return err
+	}
+	return vfs.CheckXattrPermissions(creds, ats, mode, kuid, name)
+}
+
 func (d *dentry) mayDelete(creds *auth.Credentials, child *dentry) error {
 	return vfs.CheckDeleteSticky(creds, linux.FileMode(atomic.LoadUint32(&d.mode)), auth.KUID(atomic.LoadUint32(&child.uid)))
 }
@@ -1357,8 +1372,6 @@ func (d *dentry) setDeleted() {
 	atomic.StoreUint32(&d.deleted, 1)
 }
 
-// We only support xattrs prefixed with "user." (see b/148380782). Currently,
-// there is no need to expose any other xattrs through a gofer.
 func (d *dentry) listxattr(ctx context.Context, creds *auth.Credentials, size uint64) ([]string, error) {
 	if d.file.isNil() || !d.userXattrSupported() {
 		return nil, nil
@@ -1369,6 +1382,7 @@ func (d *dentry) listxattr(ctx context.Context, creds *auth.Credentials, size ui
 	}
 	xattrs := make([]string, 0, len(xattrMap))
 	for x := range xattrMap {
+		// We only support xattrs in the user.* namespace.
 		if strings.HasPrefix(x, linux.XATTR_USER_PREFIX) {
 			xattrs = append(xattrs, x)
 		}
@@ -1380,15 +1394,9 @@ func (d *dentry) getxattr(ctx context.Context, creds *auth.Credentials, opts *vf
 	if d.file.isNil() {
 		return "", syserror.ENODATA
 	}
-	if err := d.checkPermissions(creds, vfs.MayRead); err != nil {
+	if err := d.checkXattrPermissions(creds, opts.Name, vfs.MayRead); err != nil {
 		return "", err
 	}
-	if !strings.HasPrefix(opts.Name, linux.XATTR_USER_PREFIX) {
-		return "", syserror.EOPNOTSUPP
-	}
-	if !d.userXattrSupported() {
-		return "", syserror.ENODATA
-	}
 	return d.file.getXattr(ctx, opts.Name, opts.Size)
 }
 
@@ -1396,15 +1404,9 @@ func (d *dentry) setxattr(ctx context.Context, creds *auth.Credentials, opts *vf
 	if d.file.isNil() {
 		return syserror.EPERM
 	}
-	if err := d.checkPermissions(creds, vfs.MayWrite); err != nil {
+	if err := d.checkXattrPermissions(creds, opts.Name, vfs.MayWrite); err != nil {
 		return err
 	}
-	if !strings.HasPrefix(opts.Name, linux.XATTR_USER_PREFIX) {
-		return syserror.EOPNOTSUPP
-	}
-	if !d.userXattrSupported() {
-		return syserror.EPERM
-	}
 	return d.file.setXattr(ctx, opts.Name, opts.Value, opts.Flags)
 }
 
@@ -1412,15 +1414,9 @@ func (d *dentry) removexattr(ctx context.Context, creds *auth.Credentials, name
 	if d.file.isNil() {
 		return syserror.EPERM
 	}
-	if err := d.checkPermissions(creds, vfs.MayWrite); err != nil {
+	if err := d.checkXattrPermissions(creds, name, vfs.MayWrite); err != nil {
 		return err
 	}
-	if !strings.HasPrefix(name, linux.XATTR_USER_PREFIX) {
-		return syserror.EOPNOTSUPP
-	}
-	if !d.userXattrSupported() {
-		return syserror.EPERM
-	}
 	return d.file.removeXattr(ctx, name)
 }
 
diff --git a/pkg/sentry/fsimpl/overlay/copy_up.go b/pkg/sentry/fsimpl/overlay/copy_up.go
index 13735eb052..ba7b8495a7 100644
--- a/pkg/sentry/fsimpl/overlay/copy_up.go
+++ b/pkg/sentry/fsimpl/overlay/copy_up.go
@@ -91,6 +91,10 @@ func (d *dentry) copyUpLocked(ctx context.Context) error {
 		if err != nil {
 			ctx.Warningf("Unrecoverable overlayfs inconsistency: failed to delete upper layer file after copy-up error: %v", err)
 		}
+		if d.upperVD.Ok() {
+			d.upperVD.DecRef(ctx)
+			d.upperVD = vfs.VirtualDentry{}
+		}
 	}
 	switch ftype {
 	case linux.S_IFREG:
@@ -234,7 +238,10 @@ func (d *dentry) copyUpLocked(ctx context.Context) error {
 		panic(fmt.Sprintf("unexpected file type %o", ftype))
 	}
 
-	// TODO(gvisor.dev/issue/1199): copy up xattrs
+	if err := d.copyXattrsLocked(ctx); err != nil {
+		cleanupUndoCopyUp()
+		return err
+	}
 
 	// Update the dentry's device and inode numbers (except for directories,
 	// for which these remain overlay-assigned).
@@ -246,14 +253,10 @@ func (d *dentry) copyUpLocked(ctx context.Context) error {
 			Mask: linux.STATX_INO,
 		})
 		if err != nil {
-			d.upperVD.DecRef(ctx)
-			d.upperVD = vfs.VirtualDentry{}
 			cleanupUndoCopyUp()
 			return err
 		}
 		if upperStat.Mask&linux.STATX_INO == 0 {
-			d.upperVD.DecRef(ctx)
-			d.upperVD = vfs.VirtualDentry{}
 			cleanupUndoCopyUp()
 			return syserror.EREMOTE
 		}
@@ -265,3 +268,42 @@ func (d *dentry) copyUpLocked(ctx context.Context) error {
 	atomic.StoreUint32(&d.copiedUp, 1)
 	return nil
 }
+
+// copyXattrsLocked copies a subset of lower's extended attributes to upper.
+// Attributes that configure an overlay in the lower are not copied up.
+//
+// Preconditions: d.copyMu must be locked for writing.
+func (d *dentry) copyXattrsLocked(ctx context.Context) error {
+	vfsObj := d.fs.vfsfs.VirtualFilesystem()
+	lowerPop := &vfs.PathOperation{Root: d.lowerVDs[0], Start: d.lowerVDs[0]}
+	upperPop := &vfs.PathOperation{Root: d.upperVD, Start: d.upperVD}
+
+	lowerXattrs, err := vfsObj.ListxattrAt(ctx, d.fs.creds, lowerPop, 0)
+	if err != nil {
+		if err == syserror.EOPNOTSUPP {
+			// There are no guarantees as to the contents of lowerXattrs.
+			return nil
+		}
+		ctx.Warningf("failed to copy up xattrs because ListxattrAt failed: %v", err)
+		return err
+	}
+
+	for _, name := range lowerXattrs {
+		// Do not copy up overlay attributes.
+		if isOverlayXattr(name) {
+			continue
+		}
+
+		value, err := vfsObj.GetxattrAt(ctx, d.fs.creds, lowerPop, &vfs.GetxattrOptions{Name: name, Size: 0})
+		if err != nil {
+			ctx.Warningf("failed to copy up xattrs because GetxattrAt failed: %v", err)
+			return err
+		}
+
+		if err := vfsObj.SetxattrAt(ctx, d.fs.creds, upperPop, &vfs.SetxattrOptions{Name: name, Value: value}); err != nil {
+			ctx.Warningf("failed to copy up xattrs because SetxattrAt failed: %v", err)
+			return err
+		}
+	}
+	return nil
+}
diff --git a/pkg/sentry/fsimpl/overlay/filesystem.go b/pkg/sentry/fsimpl/overlay/filesystem.go
index 63df86481a..46528c99c4 100644
--- a/pkg/sentry/fsimpl/overlay/filesystem.go
+++ b/pkg/sentry/fsimpl/overlay/filesystem.go
@@ -15,6 +15,7 @@
 package overlay
 
 import (
+	"strings"
 	"sync/atomic"
 
 	"gvisor.dev/gvisor/pkg/abi/linux"
@@ -27,10 +28,15 @@ import (
 	"gvisor.dev/gvisor/pkg/syserror"
 )
 
+// _OVL_XATTR_PREFIX is an extended attribute key prefix to identify overlayfs
+// attributes.
+// Linux: fs/overlayfs/overlayfs.h:OVL_XATTR_PREFIX
+const _OVL_XATTR_PREFIX = linux.XATTR_TRUSTED_PREFIX + "overlay."
+
 // _OVL_XATTR_OPAQUE is an extended attribute key whose value is set to "y" for
 // opaque directories.
 // Linux: fs/overlayfs/overlayfs.h:OVL_XATTR_OPAQUE
-const _OVL_XATTR_OPAQUE = linux.XATTR_TRUSTED_PREFIX + "overlay.opaque"
+const _OVL_XATTR_OPAQUE = _OVL_XATTR_PREFIX + "opaque"
 
 func isWhiteout(stat *linux.Statx) bool {
 	return stat.Mode&linux.S_IFMT == linux.S_IFCHR && stat.RdevMajor == 0 && stat.RdevMinor == 0
@@ -1347,18 +1353,42 @@ func (fs *filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error
 	return nil
 }
 
+// isOverlayXattr returns whether the given extended attribute configures the
+// overlay.
+func isOverlayXattr(name string) bool {
+	return strings.HasPrefix(name, _OVL_XATTR_PREFIX)
+}
+
 // ListxattrAt implements vfs.FilesystemImpl.ListxattrAt.
 func (fs *filesystem) ListxattrAt(ctx context.Context, rp *vfs.ResolvingPath, size uint64) ([]string, error) {
 	var ds *[]*dentry
 	fs.renameMu.RLock()
 	defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
-	_, err := fs.resolveLocked(ctx, rp, &ds)
+	d, err := fs.resolveLocked(ctx, rp, &ds)
 	if err != nil {
 		return nil, err
 	}
-	// TODO(gvisor.dev/issue/1199): Linux overlayfs actually allows listxattr,
-	// but not any other xattr syscalls. For now we just reject all of them.
-	return nil, syserror.ENOTSUP
+
+	return fs.listXattr(ctx, d, size)
+}
+
+func (fs *filesystem) listXattr(ctx context.Context, d *dentry, size uint64) ([]string, error) {
+	vfsObj := d.fs.vfsfs.VirtualFilesystem()
+	top := d.topLayer()
+	names, err := vfsObj.ListxattrAt(ctx, fs.creds, &vfs.PathOperation{Root: top, Start: top}, size)
+	if err != nil {
+		return nil, err
+	}
+
+	// Filter out all overlay attributes.
+	n := 0
+	for _, name := range names {
+		if !isOverlayXattr(name) {
+			names[n] = name
+			n++
+		}
+	}
+	return names[:n], err
 }
 
 // GetxattrAt implements vfs.FilesystemImpl.GetxattrAt.
@@ -1366,11 +1396,29 @@ func (fs *filesystem) GetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, opt
 	var ds *[]*dentry
 	fs.renameMu.RLock()
 	defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
-	_, err := fs.resolveLocked(ctx, rp, &ds)
+	d, err := fs.resolveLocked(ctx, rp, &ds)
 	if err != nil {
 		return "", err
 	}
-	return "", syserror.ENOTSUP
+
+	return fs.getXattr(ctx, d, rp.Credentials(), &opts)
+}
+
+func (fs *filesystem) getXattr(ctx context.Context, d *dentry, creds *auth.Credentials, opts *vfs.GetxattrOptions) (string, error) {
+	if err := d.checkXattrPermissions(creds, opts.Name, vfs.MayRead); err != nil {
+		return "", err
+	}
+
+	// Return EOPNOTSUPP when fetching an overlay attribute.
+	// See fs/overlayfs/super.c:ovl_own_xattr_get().
+	if isOverlayXattr(opts.Name) {
+		return "", syserror.EOPNOTSUPP
+	}
+
+	// Analogous to fs/overlayfs/super.c:ovl_other_xattr_get().
+	vfsObj := d.fs.vfsfs.VirtualFilesystem()
+	top := d.topLayer()
+	return vfsObj.GetxattrAt(ctx, fs.creds, &vfs.PathOperation{Root: top, Start: top}, opts)
 }
 
 // SetxattrAt implements vfs.FilesystemImpl.SetxattrAt.
@@ -1378,11 +1426,36 @@ func (fs *filesystem) SetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, opt
 	var ds *[]*dentry
 	fs.renameMu.RLock()
 	defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
-	_, err := fs.resolveLocked(ctx, rp, &ds)
+	d, err := fs.resolveLocked(ctx, rp, &ds)
 	if err != nil {
 		return err
 	}
-	return syserror.ENOTSUP
+
+	return fs.setXattrLocked(ctx, d, rp.Mount(), rp.Credentials(), &opts)
+}
+
+// Precondition: fs.renameMu must be locked.
+func (fs *filesystem) setXattrLocked(ctx context.Context, d *dentry, mnt *vfs.Mount, creds *auth.Credentials, opts *vfs.SetxattrOptions) error {
+	if err := d.checkXattrPermissions(creds, opts.Name, vfs.MayWrite); err != nil {
+		return err
+	}
+
+	// Return EOPNOTSUPP when setting an overlay attribute.
+	// See fs/overlayfs/super.c:ovl_own_xattr_set().
+	if isOverlayXattr(opts.Name) {
+		return syserror.EOPNOTSUPP
+	}
+
+	// Analogous to fs/overlayfs/super.c:ovl_other_xattr_set().
+	if err := mnt.CheckBeginWrite(); err != nil {
+		return err
+	}
+	defer mnt.EndWrite()
+	if err := d.copyUpLocked(ctx); err != nil {
+		return err
+	}
+	vfsObj := d.fs.vfsfs.VirtualFilesystem()
+	return vfsObj.SetxattrAt(ctx, fs.creds, &vfs.PathOperation{Root: d.upperVD, Start: d.upperVD}, opts)
 }
 
 // RemovexattrAt implements vfs.FilesystemImpl.RemovexattrAt.
@@ -1390,11 +1463,36 @@ func (fs *filesystem) RemovexattrAt(ctx context.Context, rp *vfs.ResolvingPath,
 	var ds *[]*dentry
 	fs.renameMu.RLock()
 	defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
-	_, err := fs.resolveLocked(ctx, rp, &ds)
+	d, err := fs.resolveLocked(ctx, rp, &ds)
 	if err != nil {
 		return err
 	}
-	return syserror.ENOTSUP
+
+	return fs.removeXattrLocked(ctx, d, rp.Mount(), rp.Credentials(), name)
+}
+
+// Precondition: fs.renameMu must be locked.
+func (fs *filesystem) removeXattrLocked(ctx context.Context, d *dentry, mnt *vfs.Mount, creds *auth.Credentials, name string) error {
+	if err := d.checkXattrPermissions(creds, name, vfs.MayWrite); err != nil {
+		return err
+	}
+
+	// Like SetxattrAt, return EOPNOTSUPP when removing an overlay attribute.
+	// Linux passes the remove request to xattr_handler->set.
+	// See fs/xattr.c:vfs_removexattr().
+	if isOverlayXattr(name) {
+		return syserror.EOPNOTSUPP
+	}
+
+	if err := mnt.CheckBeginWrite(); err != nil {
+		return err
+	}
+	defer mnt.EndWrite()
+	if err := d.copyUpLocked(ctx); err != nil {
+		return err
+	}
+	vfsObj := d.fs.vfsfs.VirtualFilesystem()
+	return vfsObj.RemovexattrAt(ctx, fs.creds, &vfs.PathOperation{Root: d.upperVD, Start: d.upperVD}, name)
 }
 
 // PrependPath implements vfs.FilesystemImpl.PrependPath.
diff --git a/pkg/sentry/fsimpl/overlay/overlay.go b/pkg/sentry/fsimpl/overlay/overlay.go
index 00562667fa..e706f9d4ea 100644
--- a/pkg/sentry/fsimpl/overlay/overlay.go
+++ b/pkg/sentry/fsimpl/overlay/overlay.go
@@ -570,6 +570,16 @@ func (d *dentry) checkPermissions(creds *auth.Credentials, ats vfs.AccessTypes)
 	return vfs.GenericCheckPermissions(creds, ats, linux.FileMode(atomic.LoadUint32(&d.mode)), auth.KUID(atomic.LoadUint32(&d.uid)), auth.KGID(atomic.LoadUint32(&d.gid)))
 }
 
+func (d *dentry) checkXattrPermissions(creds *auth.Credentials, name string, ats vfs.AccessTypes) error {
+	mode := linux.FileMode(atomic.LoadUint32(&d.mode))
+	kuid := auth.KUID(atomic.LoadUint32(&d.uid))
+	kgid := auth.KGID(atomic.LoadUint32(&d.gid))
+	if err := vfs.GenericCheckPermissions(creds, ats, mode, kuid, kgid); err != nil {
+		return err
+	}
+	return vfs.CheckXattrPermissions(creds, ats, mode, kuid, name)
+}
+
 // statInternalMask is the set of stat fields that is set by
 // dentry.statInternalTo().
 const statInternalMask = linux.STATX_TYPE | linux.STATX_MODE | linux.STATX_UID | linux.STATX_GID | linux.STATX_INO
@@ -622,6 +632,32 @@ func (fd *fileDescription) dentry() *dentry {
 	return fd.vfsfd.Dentry().Impl().(*dentry)
 }
 
+// Listxattr implements vfs.FileDescriptionImpl.Listxattr.
+func (fd *fileDescription) Listxattr(ctx context.Context, size uint64) ([]string, error) {
+	return fd.filesystem().listXattr(ctx, fd.dentry(), size)
+}
+
+// Getxattr implements vfs.FileDescriptionImpl.Getxattr.
+func (fd *fileDescription) Getxattr(ctx context.Context, opts vfs.GetxattrOptions) (string, error) {
+	return fd.filesystem().getXattr(ctx, fd.dentry(), auth.CredentialsFromContext(ctx), &opts)
+}
+
+// Setxattr implements vfs.FileDescriptionImpl.Setxattr.
+func (fd *fileDescription) Setxattr(ctx context.Context, opts vfs.SetxattrOptions) error {
+	fs := fd.filesystem()
+	fs.renameMu.RLock()
+	defer fs.renameMu.RUnlock()
+	return fs.setXattrLocked(ctx, fd.dentry(), fd.vfsfd.Mount(), auth.CredentialsFromContext(ctx), &opts)
+}
+
+// Removexattr implements vfs.FileDescriptionImpl.Removexattr.
+func (fd *fileDescription) Removexattr(ctx context.Context, name string) error {
+	fs := fd.filesystem()
+	fs.renameMu.RLock()
+	defer fs.renameMu.RUnlock()
+	return fs.removeXattrLocked(ctx, fd.dentry(), fd.vfsfd.Mount(), auth.CredentialsFromContext(ctx), name)
+}
+
 // LockPOSIX implements vfs.FileDescriptionImpl.LockPOSIX.
 func (fd *fileDescription) LockPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, start, length uint64, whence int16, block fslock.Blocker) error {
 	return fd.Locks().LockPOSIX(ctx, &fd.vfsfd, uid, t, start, length, whence, block)
diff --git a/pkg/sentry/fsimpl/tmpfs/tmpfs.go b/pkg/sentry/fsimpl/tmpfs/tmpfs.go
index c4cec41308..d6074f20f5 100644
--- a/pkg/sentry/fsimpl/tmpfs/tmpfs.go
+++ b/pkg/sentry/fsimpl/tmpfs/tmpfs.go
@@ -652,44 +652,18 @@ func (i *inode) removexattr(creds *auth.Credentials, name string) error {
 }
 
 func (i *inode) checkXattrPermissions(creds *auth.Credentials, name string, ats vfs.AccessTypes) error {
-	switch {
-	case ats&vfs.MayRead == vfs.MayRead:
-		if err := i.checkPermissions(creds, vfs.MayRead); err != nil {
-			return err
-		}
-	case ats&vfs.MayWrite == vfs.MayWrite:
-		if err := i.checkPermissions(creds, vfs.MayWrite); err != nil {
-			return err
-		}
-	default:
-		panic(fmt.Sprintf("checkXattrPermissions called with impossible AccessTypes: %v", ats))
+	// We currently only support extended attributes in the user.* and
+	// trusted.* namespaces. See b/148380782.
+	if !strings.HasPrefix(name, linux.XATTR_USER_PREFIX) && !strings.HasPrefix(name, linux.XATTR_TRUSTED_PREFIX) {
+		return syserror.EOPNOTSUPP
 	}
-
-	switch {
-	case strings.HasPrefix(name, linux.XATTR_TRUSTED_PREFIX):
-		// The trusted.* namespace can only be accessed by privileged
-		// users.
-		if creds.HasCapability(linux.CAP_SYS_ADMIN) {
-			return nil
-		}
-		if ats&vfs.MayWrite == vfs.MayWrite {
-			return syserror.EPERM
-		}
-		return syserror.ENODATA
-	case strings.HasPrefix(name, linux.XATTR_USER_PREFIX):
-		// Extended attributes in the user.* namespace are only
-		// supported for regular files and directories.
-		filetype := linux.S_IFMT & atomic.LoadUint32(&i.mode)
-		if filetype == linux.S_IFREG || filetype == linux.S_IFDIR {
-			return nil
-		}
-		if ats&vfs.MayWrite == vfs.MayWrite {
-			return syserror.EPERM
-		}
-		return syserror.ENODATA
-
+	mode := linux.FileMode(atomic.LoadUint32(&i.mode))
+	kuid := auth.KUID(atomic.LoadUint32(&i.uid))
+	kgid := auth.KGID(atomic.LoadUint32(&i.gid))
+	if err := vfs.GenericCheckPermissions(creds, ats, mode, kuid, kgid); err != nil {
+		return err
 	}
-	return syserror.EOPNOTSUPP
+	return vfs.CheckXattrPermissions(creds, ats, mode, kuid, name)
 }
 
 // fileDescription is embedded by tmpfs implementations of
diff --git a/pkg/sentry/vfs/permissions.go b/pkg/sentry/vfs/permissions.go
index 014b928eda..00eeb8842d 100644
--- a/pkg/sentry/vfs/permissions.go
+++ b/pkg/sentry/vfs/permissions.go
@@ -16,6 +16,7 @@ package vfs
 
 import (
 	"math"
+	"strings"
 
 	"gvisor.dev/gvisor/pkg/abi/linux"
 	"gvisor.dev/gvisor/pkg/context"
@@ -284,3 +285,40 @@ func CheckLimit(ctx context.Context, offset, size int64) (int64, error) {
 	}
 	return size, nil
 }
+
+// CheckXattrPermissions checks permissions for extended attribute access.
+// This is analogous to fs/xattr.c:xattr_permission(). Some key differences:
+// * Does not check for read-only filesystem property.
+// * Does not check inode immutability or append only mode. In both cases EPERM
+//   must be returned by filesystem implementations.
+// * Does not do inode permission checks. Filesystem implementations should
+//   handle inode permission checks as they may differ across implementations.
+func CheckXattrPermissions(creds *auth.Credentials, ats AccessTypes, mode linux.FileMode, kuid auth.KUID, name string) error {
+	switch {
+	case strings.HasPrefix(name, linux.XATTR_TRUSTED_PREFIX):
+		// The trusted.* namespace can only be accessed by privileged
+		// users.
+		if creds.HasCapability(linux.CAP_SYS_ADMIN) {
+			return nil
+		}
+		if ats.MayWrite() {
+			return syserror.EPERM
+		}
+		return syserror.ENODATA
+	case strings.HasPrefix(name, linux.XATTR_USER_PREFIX):
+		// In the user.* namespace, only regular files and directories can have
+		// extended attributes. For sticky directories, only the owner and
+		// privileged users can write attributes.
+		filetype := mode.FileType()
+		if filetype != linux.ModeRegular && filetype != linux.ModeDirectory {
+			if ats.MayWrite() {
+				return syserror.EPERM
+			}
+			return syserror.ENODATA
+		}
+		if filetype == linux.ModeDirectory && mode&linux.ModeSticky != 0 && ats.MayWrite() && !CanActAsOwner(creds, kuid) {
+			return syserror.EPERM
+		}
+	}
+	return nil
+}

From 89185098fae9cfe2725e7645aeb2ebd57aa26320 Mon Sep 17 00:00:00 2001
From: Bhasker Hariharan <bhaskerh@google.com>
Date: Wed, 2 Sep 2020 18:19:50 -0700
Subject: [PATCH 141/211] Fix Accept to not return error for sockets in accept
 queue.

Accept on gVisor will return an error if a socket in the accept queue was closed
before Accept() was called. Linux will return the new fd even if the returned
socket is already closed by the peer say due to a RST being sent by the peer.

This seems to be intentional in linux more details on the github issue.

Fixes #3780

PiperOrigin-RevId: 329828404
---
 pkg/sentry/socket/netstack/netstack.go        | 22 ++---
 pkg/sentry/socket/netstack/netstack_vfs2.go   | 16 ++--
 .../socket/unix/transport/connectioned.go     | 14 ++-
 .../socket/unix/transport/connectionless.go   |  4 +-
 pkg/sentry/socket/unix/transport/unix.go      |  5 +-
 pkg/sentry/socket/unix/unix.go                | 22 ++---
 pkg/sentry/socket/unix/unix_vfs2.go           | 22 ++---
 pkg/tcpip/adapters/gonet/gonet.go             |  4 +-
 pkg/tcpip/sample/tun_tcp_echo/main.go         |  2 +-
 pkg/tcpip/stack/transport_test.go             |  4 +-
 pkg/tcpip/tcpip.go                            |  5 +-
 pkg/tcpip/transport/icmp/endpoint.go          |  2 +-
 pkg/tcpip/transport/packet/endpoint.go        | 20 ++--
 pkg/tcpip/transport/raw/endpoint.go           |  8 +-
 pkg/tcpip/transport/tcp/dual_stack_test.go    | 30 +++---
 pkg/tcpip/transport/tcp/endpoint.go           | 13 ++-
 pkg/tcpip/transport/tcp/tcp_test.go           | 96 +++++++++----------
 .../transport/tcp/testing/context/context.go  |  4 +-
 pkg/tcpip/transport/udp/endpoint.go           |  2 +-
 test/syscalls/linux/socket_inet_loopback.cc   | 20 ++--
 20 files changed, 163 insertions(+), 152 deletions(-)

diff --git a/pkg/sentry/socket/netstack/netstack.go b/pkg/sentry/socket/netstack/netstack.go
index 36c17d1baa..91790834b4 100644
--- a/pkg/sentry/socket/netstack/netstack.go
+++ b/pkg/sentry/socket/netstack/netstack.go
@@ -830,7 +830,7 @@ func (s *socketOpsCommon) Listen(t *kernel.Task, backlog int) *syserr.Error {
 
 // blockingAccept implements a blocking version of accept(2), that is, if no
 // connections are ready to be accept, it will block until one becomes ready.
-func (s *socketOpsCommon) blockingAccept(t *kernel.Task) (tcpip.Endpoint, *waiter.Queue, *syserr.Error) {
+func (s *socketOpsCommon) blockingAccept(t *kernel.Task, peerAddr *tcpip.FullAddress) (tcpip.Endpoint, *waiter.Queue, *syserr.Error) {
 	// Register for notifications.
 	e, ch := waiter.NewChannelEntry(nil)
 	s.EventRegister(&e, waiter.EventIn)
@@ -839,7 +839,7 @@ func (s *socketOpsCommon) blockingAccept(t *kernel.Task) (tcpip.Endpoint, *waite
 	// Try to accept the connection again; if it fails, then wait until we
 	// get a notification.
 	for {
-		if ep, wq, err := s.Endpoint.Accept(); err != tcpip.ErrWouldBlock {
+		if ep, wq, err := s.Endpoint.Accept(peerAddr); err != tcpip.ErrWouldBlock {
 			return ep, wq, syserr.TranslateNetstackError(err)
 		}
 
@@ -852,15 +852,18 @@ func (s *socketOpsCommon) blockingAccept(t *kernel.Task) (tcpip.Endpoint, *waite
 // Accept implements the linux syscall accept(2) for sockets backed by
 // tcpip.Endpoint.
 func (s *SocketOperations) Accept(t *kernel.Task, peerRequested bool, flags int, blocking bool) (int32, linux.SockAddr, uint32, *syserr.Error) {
-	// Issue the accept request to get the new endpoint.
-	ep, wq, terr := s.Endpoint.Accept()
+	var peerAddr *tcpip.FullAddress
+	if peerRequested {
+		peerAddr = &tcpip.FullAddress{}
+	}
+	ep, wq, terr := s.Endpoint.Accept(peerAddr)
 	if terr != nil {
 		if terr != tcpip.ErrWouldBlock || !blocking {
 			return 0, nil, 0, syserr.TranslateNetstackError(terr)
 		}
 
 		var err *syserr.Error
-		ep, wq, err = s.blockingAccept(t)
+		ep, wq, err = s.blockingAccept(t, peerAddr)
 		if err != nil {
 			return 0, nil, 0, err
 		}
@@ -880,13 +883,8 @@ func (s *SocketOperations) Accept(t *kernel.Task, peerRequested bool, flags int,
 
 	var addr linux.SockAddr
 	var addrLen uint32
-	if peerRequested {
-		// Get address of the peer and write it to peer slice.
-		var err *syserr.Error
-		addr, addrLen, err = ns.FileOperations.(*SocketOperations).GetPeerName(t)
-		if err != nil {
-			return 0, nil, 0, err
-		}
+	if peerAddr != nil {
+		addr, addrLen = ConvertAddress(s.family, *peerAddr)
 	}
 
 	fd, e := t.NewFDFrom(0, ns, kernel.FDFlags{
diff --git a/pkg/sentry/socket/netstack/netstack_vfs2.go b/pkg/sentry/socket/netstack/netstack_vfs2.go
index 1f7d17f5fc..0f342e6553 100644
--- a/pkg/sentry/socket/netstack/netstack_vfs2.go
+++ b/pkg/sentry/socket/netstack/netstack_vfs2.go
@@ -151,14 +151,18 @@ func (s *SocketVFS2) Write(ctx context.Context, src usermem.IOSequence, opts vfs
 // tcpip.Endpoint.
 func (s *SocketVFS2) Accept(t *kernel.Task, peerRequested bool, flags int, blocking bool) (int32, linux.SockAddr, uint32, *syserr.Error) {
 	// Issue the accept request to get the new endpoint.
-	ep, wq, terr := s.Endpoint.Accept()
+	var peerAddr *tcpip.FullAddress
+	if peerRequested {
+		peerAddr = &tcpip.FullAddress{}
+	}
+	ep, wq, terr := s.Endpoint.Accept(peerAddr)
 	if terr != nil {
 		if terr != tcpip.ErrWouldBlock || !blocking {
 			return 0, nil, 0, syserr.TranslateNetstackError(terr)
 		}
 
 		var err *syserr.Error
-		ep, wq, err = s.blockingAccept(t)
+		ep, wq, err = s.blockingAccept(t, peerAddr)
 		if err != nil {
 			return 0, nil, 0, err
 		}
@@ -176,13 +180,9 @@ func (s *SocketVFS2) Accept(t *kernel.Task, peerRequested bool, flags int, block
 
 	var addr linux.SockAddr
 	var addrLen uint32
-	if peerRequested {
+	if peerAddr != nil {
 		// Get address of the peer and write it to peer slice.
-		var err *syserr.Error
-		addr, addrLen, err = ns.Impl().(*SocketVFS2).GetPeerName(t)
-		if err != nil {
-			return 0, nil, 0, err
-		}
+		addr, addrLen = ConvertAddress(s.family, *peerAddr)
 	}
 
 	fd, e := t.NewFDFromVFS2(0, ns, kernel.FDFlags{
diff --git a/pkg/sentry/socket/unix/transport/connectioned.go b/pkg/sentry/socket/unix/transport/connectioned.go
index e3a75b519a..aa4f3c04d5 100644
--- a/pkg/sentry/socket/unix/transport/connectioned.go
+++ b/pkg/sentry/socket/unix/transport/connectioned.go
@@ -391,7 +391,7 @@ func (e *connectionedEndpoint) Listen(backlog int) *syserr.Error {
 }
 
 // Accept accepts a new connection.
-func (e *connectionedEndpoint) Accept() (Endpoint, *syserr.Error) {
+func (e *connectionedEndpoint) Accept(peerAddr *tcpip.FullAddress) (Endpoint, *syserr.Error) {
 	e.Lock()
 	defer e.Unlock()
 
@@ -401,6 +401,18 @@ func (e *connectionedEndpoint) Accept() (Endpoint, *syserr.Error) {
 
 	select {
 	case ne := <-e.acceptedChan:
+		if peerAddr != nil {
+			ne.Lock()
+			c := ne.connected
+			ne.Unlock()
+			if c != nil {
+				addr, err := c.GetLocalAddress()
+				if err != nil {
+					return nil, syserr.TranslateNetstackError(err)
+				}
+				*peerAddr = addr
+			}
+		}
 		return ne, nil
 
 	default:
diff --git a/pkg/sentry/socket/unix/transport/connectionless.go b/pkg/sentry/socket/unix/transport/connectionless.go
index 4751b2fd87..f8aacca13c 100644
--- a/pkg/sentry/socket/unix/transport/connectionless.go
+++ b/pkg/sentry/socket/unix/transport/connectionless.go
@@ -144,12 +144,12 @@ func (e *connectionlessEndpoint) Connect(ctx context.Context, server BoundEndpoi
 }
 
 // Listen starts listening on the connection.
-func (e *connectionlessEndpoint) Listen(int) *syserr.Error {
+func (*connectionlessEndpoint) Listen(int) *syserr.Error {
 	return syserr.ErrNotSupported
 }
 
 // Accept accepts a new connection.
-func (e *connectionlessEndpoint) Accept() (Endpoint, *syserr.Error) {
+func (*connectionlessEndpoint) Accept(*tcpip.FullAddress) (Endpoint, *syserr.Error) {
 	return nil, syserr.ErrNotSupported
 }
 
diff --git a/pkg/sentry/socket/unix/transport/unix.go b/pkg/sentry/socket/unix/transport/unix.go
index 1200cf9bb8..cbbdd000f1 100644
--- a/pkg/sentry/socket/unix/transport/unix.go
+++ b/pkg/sentry/socket/unix/transport/unix.go
@@ -151,7 +151,10 @@ type Endpoint interface {
 	// block if no new connections are available.
 	//
 	// The returned Queue is the wait queue for the newly created endpoint.
-	Accept() (Endpoint, *syserr.Error)
+	//
+	// peerAddr if not nil will be populated with the address of the connected
+	// peer on a successful accept.
+	Accept(peerAddr *tcpip.FullAddress) (Endpoint, *syserr.Error)
 
 	// Bind binds the endpoint to a specific local address and port.
 	// Specifying a NIC is optional.
diff --git a/pkg/sentry/socket/unix/unix.go b/pkg/sentry/socket/unix/unix.go
index 0a7a26495b..616530eb6a 100644
--- a/pkg/sentry/socket/unix/unix.go
+++ b/pkg/sentry/socket/unix/unix.go
@@ -205,7 +205,7 @@ func (s *socketOpsCommon) Listen(t *kernel.Task, backlog int) *syserr.Error {
 
 // blockingAccept implements a blocking version of accept(2), that is, if no
 // connections are ready to be accept, it will block until one becomes ready.
-func (s *SocketOperations) blockingAccept(t *kernel.Task) (transport.Endpoint, *syserr.Error) {
+func (s *SocketOperations) blockingAccept(t *kernel.Task, peerAddr *tcpip.FullAddress) (transport.Endpoint, *syserr.Error) {
 	// Register for notifications.
 	e, ch := waiter.NewChannelEntry(nil)
 	s.EventRegister(&e, waiter.EventIn)
@@ -214,7 +214,7 @@ func (s *SocketOperations) blockingAccept(t *kernel.Task) (transport.Endpoint, *
 	// Try to accept the connection; if it fails, then wait until we get a
 	// notification.
 	for {
-		if ep, err := s.ep.Accept(); err != syserr.ErrWouldBlock {
+		if ep, err := s.ep.Accept(peerAddr); err != syserr.ErrWouldBlock {
 			return ep, err
 		}
 
@@ -227,15 +227,18 @@ func (s *SocketOperations) blockingAccept(t *kernel.Task) (transport.Endpoint, *
 // Accept implements the linux syscall accept(2) for sockets backed by
 // a transport.Endpoint.
 func (s *SocketOperations) Accept(t *kernel.Task, peerRequested bool, flags int, blocking bool) (int32, linux.SockAddr, uint32, *syserr.Error) {
-	// Issue the accept request to get the new endpoint.
-	ep, err := s.ep.Accept()
+	var peerAddr *tcpip.FullAddress
+	if peerRequested {
+		peerAddr = &tcpip.FullAddress{}
+	}
+	ep, err := s.ep.Accept(peerAddr)
 	if err != nil {
 		if err != syserr.ErrWouldBlock || !blocking {
 			return 0, nil, 0, err
 		}
 
 		var err *syserr.Error
-		ep, err = s.blockingAccept(t)
+		ep, err = s.blockingAccept(t, peerAddr)
 		if err != nil {
 			return 0, nil, 0, err
 		}
@@ -252,13 +255,8 @@ func (s *SocketOperations) Accept(t *kernel.Task, peerRequested bool, flags int,
 
 	var addr linux.SockAddr
 	var addrLen uint32
-	if peerRequested {
-		// Get address of the peer.
-		var err *syserr.Error
-		addr, addrLen, err = ns.FileOperations.(*SocketOperations).GetPeerName(t)
-		if err != nil {
-			return 0, nil, 0, err
-		}
+	if peerAddr != nil {
+		addr, addrLen = netstack.ConvertAddress(linux.AF_UNIX, *peerAddr)
 	}
 
 	fd, e := t.NewFDFrom(0, ns, kernel.FDFlags{
diff --git a/pkg/sentry/socket/unix/unix_vfs2.go b/pkg/sentry/socket/unix/unix_vfs2.go
index 65a285b8ff..e25c7e84a2 100644
--- a/pkg/sentry/socket/unix/unix_vfs2.go
+++ b/pkg/sentry/socket/unix/unix_vfs2.go
@@ -96,7 +96,7 @@ func (s *SocketVFS2) GetSockOpt(t *kernel.Task, level, name int, outPtr usermem.
 
 // blockingAccept implements a blocking version of accept(2), that is, if no
 // connections are ready to be accept, it will block until one becomes ready.
-func (s *SocketVFS2) blockingAccept(t *kernel.Task) (transport.Endpoint, *syserr.Error) {
+func (s *SocketVFS2) blockingAccept(t *kernel.Task, peerAddr *tcpip.FullAddress) (transport.Endpoint, *syserr.Error) {
 	// Register for notifications.
 	e, ch := waiter.NewChannelEntry(nil)
 	s.socketOpsCommon.EventRegister(&e, waiter.EventIn)
@@ -105,7 +105,7 @@ func (s *SocketVFS2) blockingAccept(t *kernel.Task) (transport.Endpoint, *syserr
 	// Try to accept the connection; if it fails, then wait until we get a
 	// notification.
 	for {
-		if ep, err := s.ep.Accept(); err != syserr.ErrWouldBlock {
+		if ep, err := s.ep.Accept(peerAddr); err != syserr.ErrWouldBlock {
 			return ep, err
 		}
 
@@ -118,15 +118,18 @@ func (s *SocketVFS2) blockingAccept(t *kernel.Task) (transport.Endpoint, *syserr
 // Accept implements the linux syscall accept(2) for sockets backed by
 // a transport.Endpoint.
 func (s *SocketVFS2) Accept(t *kernel.Task, peerRequested bool, flags int, blocking bool) (int32, linux.SockAddr, uint32, *syserr.Error) {
-	// Issue the accept request to get the new endpoint.
-	ep, err := s.ep.Accept()
+	var peerAddr *tcpip.FullAddress
+	if peerRequested {
+		peerAddr = &tcpip.FullAddress{}
+	}
+	ep, err := s.ep.Accept(peerAddr)
 	if err != nil {
 		if err != syserr.ErrWouldBlock || !blocking {
 			return 0, nil, 0, err
 		}
 
 		var err *syserr.Error
-		ep, err = s.blockingAccept(t)
+		ep, err = s.blockingAccept(t, peerAddr)
 		if err != nil {
 			return 0, nil, 0, err
 		}
@@ -144,13 +147,8 @@ func (s *SocketVFS2) Accept(t *kernel.Task, peerRequested bool, flags int, block
 
 	var addr linux.SockAddr
 	var addrLen uint32
-	if peerRequested {
-		// Get address of the peer.
-		var err *syserr.Error
-		addr, addrLen, err = ns.Impl().(*SocketVFS2).GetPeerName(t)
-		if err != nil {
-			return 0, nil, 0, err
-		}
+	if peerAddr != nil {
+		addr, addrLen = netstack.ConvertAddress(linux.AF_UNIX, *peerAddr)
 	}
 
 	fd, e := t.NewFDFromVFS2(0, ns, kernel.FDFlags{
diff --git a/pkg/tcpip/adapters/gonet/gonet.go b/pkg/tcpip/adapters/gonet/gonet.go
index 68a954a101..4f551cd925 100644
--- a/pkg/tcpip/adapters/gonet/gonet.go
+++ b/pkg/tcpip/adapters/gonet/gonet.go
@@ -245,7 +245,7 @@ func NewTCPConn(wq *waiter.Queue, ep tcpip.Endpoint) *TCPConn {
 
 // Accept implements net.Conn.Accept.
 func (l *TCPListener) Accept() (net.Conn, error) {
-	n, wq, err := l.ep.Accept()
+	n, wq, err := l.ep.Accept(nil)
 
 	if err == tcpip.ErrWouldBlock {
 		// Create wait queue entry that notifies a channel.
@@ -254,7 +254,7 @@ func (l *TCPListener) Accept() (net.Conn, error) {
 		defer l.wq.EventUnregister(&waitEntry)
 
 		for {
-			n, wq, err = l.ep.Accept()
+			n, wq, err = l.ep.Accept(nil)
 
 			if err != tcpip.ErrWouldBlock {
 				break
diff --git a/pkg/tcpip/sample/tun_tcp_echo/main.go b/pkg/tcpip/sample/tun_tcp_echo/main.go
index 9e37cab187..3f58a15ea9 100644
--- a/pkg/tcpip/sample/tun_tcp_echo/main.go
+++ b/pkg/tcpip/sample/tun_tcp_echo/main.go
@@ -188,7 +188,7 @@ func main() {
 	defer wq.EventUnregister(&waitEntry)
 
 	for {
-		n, wq, err := ep.Accept()
+		n, wq, err := ep.Accept(nil)
 		if err != nil {
 			if err == tcpip.ErrWouldBlock {
 				<-notifyCh
diff --git a/pkg/tcpip/stack/transport_test.go b/pkg/tcpip/stack/transport_test.go
index a1458c899e..9292bfccba 100644
--- a/pkg/tcpip/stack/transport_test.go
+++ b/pkg/tcpip/stack/transport_test.go
@@ -180,7 +180,7 @@ func (*fakeTransportEndpoint) Listen(int) *tcpip.Error {
 	return nil
 }
 
-func (f *fakeTransportEndpoint) Accept() (tcpip.Endpoint, *waiter.Queue, *tcpip.Error) {
+func (f *fakeTransportEndpoint) Accept(*tcpip.FullAddress) (tcpip.Endpoint, *waiter.Queue, *tcpip.Error) {
 	if len(f.acceptQueue) == 0 {
 		return nil, nil, nil
 	}
@@ -631,7 +631,7 @@ func TestTransportForwarding(t *testing.T) {
 		Data: req.ToVectorisedView(),
 	}))
 
-	aep, _, err := ep.Accept()
+	aep, _, err := ep.Accept(nil)
 	if err != nil || aep == nil {
 		t.Fatalf("Accept failed: %v, %v", aep, err)
 	}
diff --git a/pkg/tcpip/tcpip.go b/pkg/tcpip/tcpip.go
index b113d86134..8ba6155217 100644
--- a/pkg/tcpip/tcpip.go
+++ b/pkg/tcpip/tcpip.go
@@ -561,7 +561,10 @@ type Endpoint interface {
 	// block if no new connections are available.
 	//
 	// The returned Queue is the wait queue for the newly created endpoint.
-	Accept() (Endpoint, *waiter.Queue, *Error)
+	//
+	// If peerAddr is not nil then it is populated with the peer address of the
+	// returned endpoint.
+	Accept(peerAddr *FullAddress) (Endpoint, *waiter.Queue, *Error)
 
 	// Bind binds the endpoint to a specific local address and port.
 	// Specifying a NIC is optional.
diff --git a/pkg/tcpip/transport/icmp/endpoint.go b/pkg/tcpip/transport/icmp/endpoint.go
index 346ca4bdaa..ad71ff3b67 100644
--- a/pkg/tcpip/transport/icmp/endpoint.go
+++ b/pkg/tcpip/transport/icmp/endpoint.go
@@ -597,7 +597,7 @@ func (*endpoint) Listen(int) *tcpip.Error {
 }
 
 // Accept is not supported by UDP, it just fails.
-func (*endpoint) Accept() (tcpip.Endpoint, *waiter.Queue, *tcpip.Error) {
+func (*endpoint) Accept(*tcpip.FullAddress) (tcpip.Endpoint, *waiter.Queue, *tcpip.Error) {
 	return nil, nil, tcpip.ErrNotSupported
 }
 
diff --git a/pkg/tcpip/transport/packet/endpoint.go b/pkg/tcpip/transport/packet/endpoint.go
index 81093e9ca6..8bd4e5e37e 100644
--- a/pkg/tcpip/transport/packet/endpoint.go
+++ b/pkg/tcpip/transport/packet/endpoint.go
@@ -192,13 +192,13 @@ func (ep *endpoint) Read(addr *tcpip.FullAddress) (buffer.View, tcpip.ControlMes
 	return ep.ReadPacket(addr, nil)
 }
 
-func (ep *endpoint) Write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, <-chan struct{}, *tcpip.Error) {
+func (*endpoint) Write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, <-chan struct{}, *tcpip.Error) {
 	// TODO(gvisor.dev/issue/173): Implement.
 	return 0, nil, tcpip.ErrInvalidOptionValue
 }
 
 // Peek implements tcpip.Endpoint.Peek.
-func (ep *endpoint) Peek([][]byte) (int64, tcpip.ControlMessages, *tcpip.Error) {
+func (*endpoint) Peek([][]byte) (int64, tcpip.ControlMessages, *tcpip.Error) {
 	return 0, tcpip.ControlMessages{}, nil
 }
 
@@ -210,25 +210,25 @@ func (*endpoint) Disconnect() *tcpip.Error {
 
 // Connect implements tcpip.Endpoint.Connect. Packet sockets cannot be
 // connected, and this function always returnes tcpip.ErrNotSupported.
-func (ep *endpoint) Connect(addr tcpip.FullAddress) *tcpip.Error {
+func (*endpoint) Connect(addr tcpip.FullAddress) *tcpip.Error {
 	return tcpip.ErrNotSupported
 }
 
 // Shutdown implements tcpip.Endpoint.Shutdown. Packet sockets cannot be used
 // with Shutdown, and this function always returns tcpip.ErrNotSupported.
-func (ep *endpoint) Shutdown(flags tcpip.ShutdownFlags) *tcpip.Error {
+func (*endpoint) Shutdown(flags tcpip.ShutdownFlags) *tcpip.Error {
 	return tcpip.ErrNotSupported
 }
 
 // Listen implements tcpip.Endpoint.Listen. Packet sockets cannot be used with
 // Listen, and this function always returns tcpip.ErrNotSupported.
-func (ep *endpoint) Listen(backlog int) *tcpip.Error {
+func (*endpoint) Listen(backlog int) *tcpip.Error {
 	return tcpip.ErrNotSupported
 }
 
 // Accept implements tcpip.Endpoint.Accept. Packet sockets cannot be used with
 // Accept, and this function always returns tcpip.ErrNotSupported.
-func (ep *endpoint) Accept() (tcpip.Endpoint, *waiter.Queue, *tcpip.Error) {
+func (*endpoint) Accept(*tcpip.FullAddress) (tcpip.Endpoint, *waiter.Queue, *tcpip.Error) {
 	return nil, nil, tcpip.ErrNotSupported
 }
 
@@ -267,12 +267,12 @@ func (ep *endpoint) Bind(addr tcpip.FullAddress) *tcpip.Error {
 }
 
 // GetLocalAddress implements tcpip.Endpoint.GetLocalAddress.
-func (ep *endpoint) GetLocalAddress() (tcpip.FullAddress, *tcpip.Error) {
+func (*endpoint) GetLocalAddress() (tcpip.FullAddress, *tcpip.Error) {
 	return tcpip.FullAddress{}, tcpip.ErrNotSupported
 }
 
 // GetRemoteAddress implements tcpip.Endpoint.GetRemoteAddress.
-func (ep *endpoint) GetRemoteAddress() (tcpip.FullAddress, *tcpip.Error) {
+func (*endpoint) GetRemoteAddress() (tcpip.FullAddress, *tcpip.Error) {
 	// Even a connected socket doesn't return a remote address.
 	return tcpip.FullAddress{}, tcpip.ErrNotConnected
 }
@@ -371,7 +371,7 @@ func (*endpoint) GetSockOpt(tcpip.GettableSocketOption) *tcpip.Error {
 }
 
 // GetSockOptBool implements tcpip.Endpoint.GetSockOptBool.
-func (ep *endpoint) GetSockOptBool(opt tcpip.SockOptBool) (bool, *tcpip.Error) {
+func (*endpoint) GetSockOptBool(opt tcpip.SockOptBool) (bool, *tcpip.Error) {
 	return false, tcpip.ErrNotSupported
 }
 
@@ -508,7 +508,7 @@ func (ep *endpoint) HandlePacket(nicID tcpip.NICID, localAddr tcpip.LinkAddress,
 }
 
 // State implements socket.Socket.State.
-func (ep *endpoint) State() uint32 {
+func (*endpoint) State() uint32 {
 	return 0
 }
 
diff --git a/pkg/tcpip/transport/raw/endpoint.go b/pkg/tcpip/transport/raw/endpoint.go
index 71feeb748a..fb03e6047d 100644
--- a/pkg/tcpip/transport/raw/endpoint.go
+++ b/pkg/tcpip/transport/raw/endpoint.go
@@ -446,12 +446,12 @@ func (e *endpoint) Shutdown(flags tcpip.ShutdownFlags) *tcpip.Error {
 }
 
 // Listen implements tcpip.Endpoint.Listen.
-func (e *endpoint) Listen(backlog int) *tcpip.Error {
+func (*endpoint) Listen(backlog int) *tcpip.Error {
 	return tcpip.ErrNotSupported
 }
 
 // Accept implements tcpip.Endpoint.Accept.
-func (e *endpoint) Accept() (tcpip.Endpoint, *waiter.Queue, *tcpip.Error) {
+func (*endpoint) Accept(*tcpip.FullAddress) (tcpip.Endpoint, *waiter.Queue, *tcpip.Error) {
 	return nil, nil, tcpip.ErrNotSupported
 }
 
@@ -482,12 +482,12 @@ func (e *endpoint) Bind(addr tcpip.FullAddress) *tcpip.Error {
 }
 
 // GetLocalAddress implements tcpip.Endpoint.GetLocalAddress.
-func (e *endpoint) GetLocalAddress() (tcpip.FullAddress, *tcpip.Error) {
+func (*endpoint) GetLocalAddress() (tcpip.FullAddress, *tcpip.Error) {
 	return tcpip.FullAddress{}, tcpip.ErrNotSupported
 }
 
 // GetRemoteAddress implements tcpip.Endpoint.GetRemoteAddress.
-func (e *endpoint) GetRemoteAddress() (tcpip.FullAddress, *tcpip.Error) {
+func (*endpoint) GetRemoteAddress() (tcpip.FullAddress, *tcpip.Error) {
 	// Even a connected socket doesn't return a remote address.
 	return tcpip.FullAddress{}, tcpip.ErrNotConnected
 }
diff --git a/pkg/tcpip/transport/tcp/dual_stack_test.go b/pkg/tcpip/transport/tcp/dual_stack_test.go
index 6074cc24e0..80e9dd4652 100644
--- a/pkg/tcpip/transport/tcp/dual_stack_test.go
+++ b/pkg/tcpip/transport/tcp/dual_stack_test.go
@@ -371,12 +371,12 @@ func testV4Accept(t *testing.T, c *context.Context) {
 	c.WQ.EventRegister(&we, waiter.EventIn)
 	defer c.WQ.EventUnregister(&we)
 
-	nep, _, err := c.EP.Accept()
+	nep, _, err := c.EP.Accept(nil)
 	if err == tcpip.ErrWouldBlock {
 		// Wait for connection to be established.
 		select {
 		case <-ch:
-			nep, _, err = c.EP.Accept()
+			nep, _, err = c.EP.Accept(nil)
 			if err != nil {
 				t.Fatalf("Accept failed: %v", err)
 			}
@@ -510,13 +510,13 @@ func TestV6AcceptOnV6(t *testing.T) {
 	we, ch := waiter.NewChannelEntry(nil)
 	c.WQ.EventRegister(&we, waiter.EventIn)
 	defer c.WQ.EventUnregister(&we)
-
-	nep, _, err := c.EP.Accept()
+	var addr tcpip.FullAddress
+	nep, _, err := c.EP.Accept(&addr)
 	if err == tcpip.ErrWouldBlock {
 		// Wait for connection to be established.
 		select {
 		case <-ch:
-			nep, _, err = c.EP.Accept()
+			nep, _, err = c.EP.Accept(&addr)
 			if err != nil {
 				t.Fatalf("Accept failed: %v", err)
 			}
@@ -526,20 +526,14 @@ func TestV6AcceptOnV6(t *testing.T) {
 		}
 	}
 
+	if addr.Addr != context.TestV6Addr {
+		t.Errorf("Unexpected remote address: got %s, want %s", addr.Addr, context.TestV6Addr)
+	}
+
 	// Make sure we can still query the v6 only status of the new endpoint,
 	// that is, that it is in fact a v6 socket.
 	if _, err := nep.GetSockOptBool(tcpip.V6OnlyOption); err != nil {
-		t.Fatalf("GetSockOpt failed failed: %v", err)
-	}
-
-	// Check the peer address.
-	addr, err := nep.GetRemoteAddress()
-	if err != nil {
-		t.Fatalf("GetRemoteAddress failed failed: %v", err)
-	}
-
-	if addr.Addr != context.TestV6Addr {
-		t.Fatalf("Unexpected remote address: got %v, want %v", addr.Addr, context.TestV6Addr)
+		t.Errorf("GetSockOptBool(tcpip.V6OnlyOption) failed: %s", err)
 	}
 }
 
@@ -610,12 +604,12 @@ func testV4ListenClose(t *testing.T, c *context.Context) {
 	we, ch := waiter.NewChannelEntry(nil)
 	c.WQ.EventRegister(&we, waiter.EventIn)
 	defer c.WQ.EventUnregister(&we)
-	nep, _, err := c.EP.Accept()
+	nep, _, err := c.EP.Accept(nil)
 	if err == tcpip.ErrWouldBlock {
 		// Wait for connection to be established.
 		select {
 		case <-ch:
-			nep, _, err = c.EP.Accept()
+			nep, _, err = c.EP.Accept(nil)
 			if err != nil {
 				t.Fatalf("Accept failed: %v", err)
 			}
diff --git a/pkg/tcpip/transport/tcp/endpoint.go b/pkg/tcpip/transport/tcp/endpoint.go
index 3f18efeefc..4cf966b65f 100644
--- a/pkg/tcpip/transport/tcp/endpoint.go
+++ b/pkg/tcpip/transport/tcp/endpoint.go
@@ -2453,7 +2453,9 @@ func (e *endpoint) startAcceptedLoop() {
 
 // Accept returns a new endpoint if a peer has established a connection
 // to an endpoint previously set to listen mode.
-func (e *endpoint) Accept() (tcpip.Endpoint, *waiter.Queue, *tcpip.Error) {
+//
+// addr if not-nil will contain the peer address of the returned endpoint.
+func (e *endpoint) Accept(peerAddr *tcpip.FullAddress) (tcpip.Endpoint, *waiter.Queue, *tcpip.Error) {
 	e.LockUser()
 	defer e.UnlockUser()
 
@@ -2475,6 +2477,9 @@ func (e *endpoint) Accept() (tcpip.Endpoint, *waiter.Queue, *tcpip.Error) {
 	default:
 		return nil, nil, tcpip.ErrWouldBlock
 	}
+	if peerAddr != nil {
+		*peerAddr = n.getRemoteAddress()
+	}
 	return n, n.waiterQueue, nil
 }
 
@@ -2577,11 +2582,15 @@ func (e *endpoint) GetRemoteAddress() (tcpip.FullAddress, *tcpip.Error) {
 		return tcpip.FullAddress{}, tcpip.ErrNotConnected
 	}
 
+	return e.getRemoteAddress(), nil
+}
+
+func (e *endpoint) getRemoteAddress() tcpip.FullAddress {
 	return tcpip.FullAddress{
 		Addr: e.ID.RemoteAddress,
 		Port: e.ID.RemotePort,
 		NIC:  e.boundNICID,
-	}, nil
+	}
 }
 
 func (e *endpoint) HandlePacket(r *stack.Route, id stack.TransportEndpointID, pkt *stack.PacketBuffer) {
diff --git a/pkg/tcpip/transport/tcp/tcp_test.go b/pkg/tcpip/transport/tcp/tcp_test.go
index adb32e4288..3d09d6def6 100644
--- a/pkg/tcpip/transport/tcp/tcp_test.go
+++ b/pkg/tcpip/transport/tcp/tcp_test.go
@@ -291,12 +291,12 @@ func TestTCPResetSentForACKWhenNotUsingSynCookies(t *testing.T) {
 	wq.EventRegister(&we, waiter.EventIn)
 	defer wq.EventUnregister(&we)
 
-	c.EP, _, err = ep.Accept()
+	c.EP, _, err = ep.Accept(nil)
 	if err == tcpip.ErrWouldBlock {
 		// Wait for connection to be established.
 		select {
 		case <-ch:
-			c.EP, _, err = ep.Accept()
+			c.EP, _, err = ep.Accept(nil)
 			if err != nil {
 				t.Fatalf("Accept failed: %s", err)
 			}
@@ -2203,12 +2203,12 @@ func TestScaledWindowAccept(t *testing.T) {
 	wq.EventRegister(&we, waiter.EventIn)
 	defer wq.EventUnregister(&we)
 
-	c.EP, _, err = ep.Accept()
+	c.EP, _, err = ep.Accept(nil)
 	if err == tcpip.ErrWouldBlock {
 		// Wait for connection to be established.
 		select {
 		case <-ch:
-			c.EP, _, err = ep.Accept()
+			c.EP, _, err = ep.Accept(nil)
 			if err != nil {
 				t.Fatalf("Accept failed: %s", err)
 			}
@@ -2277,12 +2277,12 @@ func TestNonScaledWindowAccept(t *testing.T) {
 	wq.EventRegister(&we, waiter.EventIn)
 	defer wq.EventUnregister(&we)
 
-	c.EP, _, err = ep.Accept()
+	c.EP, _, err = ep.Accept(nil)
 	if err == tcpip.ErrWouldBlock {
 		// Wait for connection to be established.
 		select {
 		case <-ch:
-			c.EP, _, err = ep.Accept()
+			c.EP, _, err = ep.Accept(nil)
 			if err != nil {
 				t.Fatalf("Accept failed: %s", err)
 			}
@@ -2840,12 +2840,12 @@ func TestPassiveSendMSSLessThanMTU(t *testing.T) {
 	wq.EventRegister(&we, waiter.EventIn)
 	defer wq.EventUnregister(&we)
 
-	c.EP, _, err = ep.Accept()
+	c.EP, _, err = ep.Accept(nil)
 	if err == tcpip.ErrWouldBlock {
 		// Wait for connection to be established.
 		select {
 		case <-ch:
-			c.EP, _, err = ep.Accept()
+			c.EP, _, err = ep.Accept(nil)
 			if err != nil {
 				t.Fatalf("Accept failed: %s", err)
 			}
@@ -2895,12 +2895,12 @@ func TestSynCookiePassiveSendMSSLessThanMTU(t *testing.T) {
 	wq.EventRegister(&we, waiter.EventIn)
 	defer wq.EventUnregister(&we)
 
-	c.EP, _, err = ep.Accept()
+	c.EP, _, err = ep.Accept(nil)
 	if err == tcpip.ErrWouldBlock {
 		// Wait for connection to be established.
 		select {
 		case <-ch:
-			c.EP, _, err = ep.Accept()
+			c.EP, _, err = ep.Accept(nil)
 			if err != nil {
 				t.Fatalf("Accept failed: %s", err)
 			}
@@ -5135,12 +5135,12 @@ func TestListenBacklogFull(t *testing.T) {
 	defer c.WQ.EventUnregister(&we)
 
 	for i := 0; i < listenBacklog; i++ {
-		_, _, err = c.EP.Accept()
+		_, _, err = c.EP.Accept(nil)
 		if err == tcpip.ErrWouldBlock {
 			// Wait for connection to be established.
 			select {
 			case <-ch:
-				_, _, err = c.EP.Accept()
+				_, _, err = c.EP.Accept(nil)
 				if err != nil {
 					t.Fatalf("Accept failed: %s", err)
 				}
@@ -5152,7 +5152,7 @@ func TestListenBacklogFull(t *testing.T) {
 	}
 
 	// Now verify that there are no more connections that can be accepted.
-	_, _, err = c.EP.Accept()
+	_, _, err = c.EP.Accept(nil)
 	if err != tcpip.ErrWouldBlock {
 		select {
 		case <-ch:
@@ -5164,12 +5164,12 @@ func TestListenBacklogFull(t *testing.T) {
 	// Now a new handshake must succeed.
 	executeHandshake(t, c, context.TestPort+2, false /*synCookieInUse */)
 
-	newEP, _, err := c.EP.Accept()
+	newEP, _, err := c.EP.Accept(nil)
 	if err == tcpip.ErrWouldBlock {
 		// Wait for connection to be established.
 		select {
 		case <-ch:
-			newEP, _, err = c.EP.Accept()
+			newEP, _, err = c.EP.Accept(nil)
 			if err != nil {
 				t.Fatalf("Accept failed: %s", err)
 			}
@@ -5476,12 +5476,12 @@ func TestListenSynRcvdQueueFull(t *testing.T) {
 	c.WQ.EventRegister(&we, waiter.EventIn)
 	defer c.WQ.EventUnregister(&we)
 
-	newEP, _, err := c.EP.Accept()
+	newEP, _, err := c.EP.Accept(nil)
 	if err == tcpip.ErrWouldBlock {
 		// Wait for connection to be established.
 		select {
 		case <-ch:
-			newEP, _, err = c.EP.Accept()
+			newEP, _, err = c.EP.Accept(nil)
 			if err != nil {
 				t.Fatalf("Accept failed: %s", err)
 			}
@@ -5552,12 +5552,12 @@ func TestListenBacklogFullSynCookieInUse(t *testing.T) {
 	c.WQ.EventRegister(&we, waiter.EventIn)
 	defer c.WQ.EventUnregister(&we)
 
-	_, _, err = c.EP.Accept()
+	_, _, err = c.EP.Accept(nil)
 	if err == tcpip.ErrWouldBlock {
 		// Wait for connection to be established.
 		select {
 		case <-ch:
-			_, _, err = c.EP.Accept()
+			_, _, err = c.EP.Accept(nil)
 			if err != nil {
 				t.Fatalf("Accept failed: %s", err)
 			}
@@ -5568,7 +5568,7 @@ func TestListenBacklogFullSynCookieInUse(t *testing.T) {
 	}
 
 	// Now verify that there are no more connections that can be accepted.
-	_, _, err = c.EP.Accept()
+	_, _, err = c.EP.Accept(nil)
 	if err != tcpip.ErrWouldBlock {
 		select {
 		case <-ch:
@@ -5657,7 +5657,7 @@ func TestSynRcvdBadSeqNumber(t *testing.T) {
 		RcvWnd:  30000,
 	})
 
-	newEP, _, err := c.EP.Accept()
+	newEP, _, err := c.EP.Accept(nil)
 
 	if err != nil && err != tcpip.ErrWouldBlock {
 		t.Fatalf("Accept failed: %s", err)
@@ -5672,7 +5672,7 @@ func TestSynRcvdBadSeqNumber(t *testing.T) {
 		// Wait for connection to be established.
 		select {
 		case <-ch:
-			newEP, _, err = c.EP.Accept()
+			newEP, _, err = c.EP.Accept(nil)
 			if err != nil {
 				t.Fatalf("Accept failed: %s", err)
 			}
@@ -5730,12 +5730,12 @@ func TestPassiveConnectionAttemptIncrement(t *testing.T) {
 	defer c.WQ.EventUnregister(&we)
 
 	// Verify that there is only one acceptable connection at this point.
-	_, _, err = c.EP.Accept()
+	_, _, err = c.EP.Accept(nil)
 	if err == tcpip.ErrWouldBlock {
 		// Wait for connection to be established.
 		select {
 		case <-ch:
-			_, _, err = c.EP.Accept()
+			_, _, err = c.EP.Accept(nil)
 			if err != nil {
 				t.Fatalf("Accept failed: %s", err)
 			}
@@ -5800,12 +5800,12 @@ func TestPassiveFailedConnectionAttemptIncrement(t *testing.T) {
 	defer c.WQ.EventUnregister(&we)
 
 	// Now check that there is one acceptable connections.
-	_, _, err = c.EP.Accept()
+	_, _, err = c.EP.Accept(nil)
 	if err == tcpip.ErrWouldBlock {
 		// Wait for connection to be established.
 		select {
 		case <-ch:
-			_, _, err = c.EP.Accept()
+			_, _, err = c.EP.Accept(nil)
 			if err != nil {
 				t.Fatalf("Accept failed: %s", err)
 			}
@@ -5853,12 +5853,12 @@ func TestEndpointBindListenAcceptState(t *testing.T) {
 	wq.EventRegister(&we, waiter.EventIn)
 	defer wq.EventUnregister(&we)
 
-	aep, _, err := ep.Accept()
+	aep, _, err := ep.Accept(nil)
 	if err == tcpip.ErrWouldBlock {
 		// Wait for connection to be established.
 		select {
 		case <-ch:
-			aep, _, err = ep.Accept()
+			aep, _, err = ep.Accept(nil)
 			if err != nil {
 				t.Fatalf("Accept failed: %s", err)
 			}
@@ -6293,12 +6293,12 @@ func TestTCPTimeWaitRSTIgnored(t *testing.T) {
 	wq.EventRegister(&we, waiter.EventIn)
 	defer wq.EventUnregister(&we)
 
-	c.EP, _, err = ep.Accept()
+	c.EP, _, err = ep.Accept(nil)
 	if err == tcpip.ErrWouldBlock {
 		// Wait for connection to be established.
 		select {
 		case <-ch:
-			c.EP, _, err = ep.Accept()
+			c.EP, _, err = ep.Accept(nil)
 			if err != nil {
 				t.Fatalf("Accept failed: %s", err)
 			}
@@ -6412,12 +6412,12 @@ func TestTCPTimeWaitOutOfOrder(t *testing.T) {
 	wq.EventRegister(&we, waiter.EventIn)
 	defer wq.EventUnregister(&we)
 
-	c.EP, _, err = ep.Accept()
+	c.EP, _, err = ep.Accept(nil)
 	if err == tcpip.ErrWouldBlock {
 		// Wait for connection to be established.
 		select {
 		case <-ch:
-			c.EP, _, err = ep.Accept()
+			c.EP, _, err = ep.Accept(nil)
 			if err != nil {
 				t.Fatalf("Accept failed: %s", err)
 			}
@@ -6519,12 +6519,12 @@ func TestTCPTimeWaitNewSyn(t *testing.T) {
 	wq.EventRegister(&we, waiter.EventIn)
 	defer wq.EventUnregister(&we)
 
-	c.EP, _, err = ep.Accept()
+	c.EP, _, err = ep.Accept(nil)
 	if err == tcpip.ErrWouldBlock {
 		// Wait for connection to be established.
 		select {
 		case <-ch:
-			c.EP, _, err = ep.Accept()
+			c.EP, _, err = ep.Accept(nil)
 			if err != nil {
 				t.Fatalf("Accept failed: %s", err)
 			}
@@ -6602,12 +6602,12 @@ func TestTCPTimeWaitNewSyn(t *testing.T) {
 	c.SendPacket(nil, ackHeaders)
 
 	// Try to accept the connection.
-	c.EP, _, err = ep.Accept()
+	c.EP, _, err = ep.Accept(nil)
 	if err == tcpip.ErrWouldBlock {
 		// Wait for connection to be established.
 		select {
 		case <-ch:
-			c.EP, _, err = ep.Accept()
+			c.EP, _, err = ep.Accept(nil)
 			if err != nil {
 				t.Fatalf("Accept failed: %s", err)
 			}
@@ -6675,12 +6675,12 @@ func TestTCPTimeWaitDuplicateFINExtendsTimeWait(t *testing.T) {
 	wq.EventRegister(&we, waiter.EventIn)
 	defer wq.EventUnregister(&we)
 
-	c.EP, _, err = ep.Accept()
+	c.EP, _, err = ep.Accept(nil)
 	if err == tcpip.ErrWouldBlock {
 		// Wait for connection to be established.
 		select {
 		case <-ch:
-			c.EP, _, err = ep.Accept()
+			c.EP, _, err = ep.Accept(nil)
 			if err != nil {
 				t.Fatalf("Accept failed: %s", err)
 			}
@@ -6824,12 +6824,12 @@ func TestTCPCloseWithData(t *testing.T) {
 	wq.EventRegister(&we, waiter.EventIn)
 	defer wq.EventUnregister(&we)
 
-	c.EP, _, err = ep.Accept()
+	c.EP, _, err = ep.Accept(nil)
 	if err == tcpip.ErrWouldBlock {
 		// Wait for connection to be established.
 		select {
 		case <-ch:
-			c.EP, _, err = ep.Accept()
+			c.EP, _, err = ep.Accept(nil)
 			if err != nil {
 				t.Fatalf("Accept failed: %s", err)
 			}
@@ -7271,8 +7271,8 @@ func TestTCPDeferAccept(t *testing.T) {
 
 	irs, iss := executeHandshake(t, c, context.TestPort, false /* synCookiesInUse */)
 
-	if _, _, err := c.EP.Accept(); err != tcpip.ErrWouldBlock {
-		t.Fatalf("c.EP.Accept() returned unexpected error got: %s, want: %s", err, tcpip.ErrWouldBlock)
+	if _, _, err := c.EP.Accept(nil); err != tcpip.ErrWouldBlock {
+		t.Fatalf("got c.EP.Accept(nil) = %s, want: %s", err, tcpip.ErrWouldBlock)
 	}
 
 	// Send data. This should result in an acceptable endpoint.
@@ -7293,9 +7293,9 @@ func TestTCPDeferAccept(t *testing.T) {
 
 	// Give a bit of time for the socket to be delivered to the accept queue.
 	time.Sleep(50 * time.Millisecond)
-	aep, _, err := c.EP.Accept()
+	aep, _, err := c.EP.Accept(nil)
 	if err != nil {
-		t.Fatalf("c.EP.Accept() returned unexpected error got: %s, want: nil", err)
+		t.Fatalf("got c.EP.Accept(nil) = %s, want: nil", err)
 	}
 
 	aep.Close()
@@ -7329,8 +7329,8 @@ func TestTCPDeferAcceptTimeout(t *testing.T) {
 
 	irs, iss := executeHandshake(t, c, context.TestPort, false /* synCookiesInUse */)
 
-	if _, _, err := c.EP.Accept(); err != tcpip.ErrWouldBlock {
-		t.Fatalf("c.EP.Accept() returned unexpected error got: %s, want: %s", err, tcpip.ErrWouldBlock)
+	if _, _, err := c.EP.Accept(nil); err != tcpip.ErrWouldBlock {
+		t.Fatalf("got c.EP.Accept(nil) = %s, want: %s", err, tcpip.ErrWouldBlock)
 	}
 
 	// Sleep for a little of the tcpDeferAccept timeout.
@@ -7362,9 +7362,9 @@ func TestTCPDeferAcceptTimeout(t *testing.T) {
 
 	// Give sometime for the endpoint to be delivered to the accept queue.
 	time.Sleep(50 * time.Millisecond)
-	aep, _, err := c.EP.Accept()
+	aep, _, err := c.EP.Accept(nil)
 	if err != nil {
-		t.Fatalf("c.EP.Accept() returned unexpected error got: %s, want: nil", err)
+		t.Fatalf("got c.EP.Accept(nil) = %s, want: nil", err)
 	}
 
 	aep.Close()
diff --git a/pkg/tcpip/transport/tcp/testing/context/context.go b/pkg/tcpip/transport/tcp/testing/context/context.go
index 1f5340cd03..8bb5e5f6da 100644
--- a/pkg/tcpip/transport/tcp/testing/context/context.go
+++ b/pkg/tcpip/transport/tcp/testing/context/context.go
@@ -948,12 +948,12 @@ func (c *Context) AcceptWithOptions(wndScale int, synOptions header.TCPSynOption
 	wq.EventRegister(&we, waiter.EventIn)
 	defer wq.EventUnregister(&we)
 
-	c.EP, _, err = ep.Accept()
+	c.EP, _, err = ep.Accept(nil)
 	if err == tcpip.ErrWouldBlock {
 		// Wait for connection to be established.
 		select {
 		case <-ch:
-			c.EP, _, err = ep.Accept()
+			c.EP, _, err = ep.Accept(nil)
 			if err != nil {
 				c.t.Fatalf("Accept failed: %v", err)
 			}
diff --git a/pkg/tcpip/transport/udp/endpoint.go b/pkg/tcpip/transport/udp/endpoint.go
index c74bc4d946..2828b2c01d 100644
--- a/pkg/tcpip/transport/udp/endpoint.go
+++ b/pkg/tcpip/transport/udp/endpoint.go
@@ -1218,7 +1218,7 @@ func (*endpoint) Listen(int) *tcpip.Error {
 }
 
 // Accept is not supported by UDP, it just fails.
-func (*endpoint) Accept() (tcpip.Endpoint, *waiter.Queue, *tcpip.Error) {
+func (*endpoint) Accept(*tcpip.FullAddress) (tcpip.Endpoint, *waiter.Queue, *tcpip.Error) {
 	return nil, nil, tcpip.ErrNotSupported
 }
 
diff --git a/test/syscalls/linux/socket_inet_loopback.cc b/test/syscalls/linux/socket_inet_loopback.cc
index ffcd904752..54fee2e828 100644
--- a/test/syscalls/linux/socket_inet_loopback.cc
+++ b/test/syscalls/linux/socket_inet_loopback.cc
@@ -1161,30 +1161,26 @@ TEST_P(SocketInetLoopbackTest, TCPAcceptAfterReset) {
       SyscallSucceeds());
   ASSERT_THAT(close(conn_fd.release()), SyscallSucceeds());
 
-  // TODO(gvisor.dev/issue/3780): Remove this.
   if (IsRunningOnGvisor()) {
-    // Wait for the RST to be observed.
+    // Gvisor packet procssing is asynchronous and can take a bit of time in
+    // some cases so we give it a bit of time to process the RST packet before
+    // calling accept.
+    //
+    // There is nothing to poll() on so we have no choice but to use a sleep
+    // here.
     absl::SleepFor(absl::Milliseconds(100));
   }
 
   sockaddr_storage accept_addr;
   socklen_t addrlen = sizeof(accept_addr);
 
-  // TODO(gvisor.dev/issue/3780): Remove this.
-  if (IsRunningOnGvisor()) {
-    ASSERT_THAT(accept(listen_fd.get(),
-                       reinterpret_cast<sockaddr*>(&accept_addr), &addrlen),
-                SyscallFailsWithErrno(ENOTCONN));
-    return;
-  }
-
-  conn_fd = ASSERT_NO_ERRNO_AND_VALUE(Accept(
+  auto accept_fd = ASSERT_NO_ERRNO_AND_VALUE(Accept(
       listen_fd.get(), reinterpret_cast<sockaddr*>(&accept_addr), &addrlen));
   ASSERT_EQ(addrlen, listener.addr_len);
 
   int err;
   socklen_t optlen = sizeof(err);
-  ASSERT_THAT(getsockopt(conn_fd.get(), SOL_SOCKET, SO_ERROR, &err, &optlen),
+  ASSERT_THAT(getsockopt(accept_fd.get(), SOL_SOCKET, SO_ERROR, &err, &optlen),
               SyscallSucceeds());
   ASSERT_EQ(err, ECONNRESET);
   ASSERT_EQ(optlen, sizeof(err));

From 6ff4234587a7509db17262c7a64db17daee12806 Mon Sep 17 00:00:00 2001
From: Zeling Feng <zeling@google.com>
Date: Wed, 2 Sep 2020 19:17:32 -0700
Subject: [PATCH 142/211] Add support to run packetimpact tests against Fuchsia

blaze test <test_name>_fuchsia_test will run the corresponding packetimpact
test against fuchsia.

PiperOrigin-RevId: 329835290
---
 images/Makefile                               |   4 +-
 images/packetimpact/Dockerfile                |   8 +-
 pkg/test/dockerutil/container.go              |  12 +-
 test/packetimpact/dut/BUILD                   |  10 +
 test/packetimpact/dut/posix_server.cc         |   5 +-
 test/packetimpact/runner/BUILD                |  26 +-
 test/packetimpact/runner/defs.bzl             |   5 +-
 test/packetimpact/runner/dut.go               | 438 ++++++++++++++++++
 test/packetimpact/runner/packetimpact_test.go | 359 +-------------
 9 files changed, 492 insertions(+), 375 deletions(-)
 create mode 100644 test/packetimpact/runner/dut.go

diff --git a/images/Makefile b/images/Makefile
index 278dec02f4..d183155a80 100644
--- a/images/Makefile
+++ b/images/Makefile
@@ -59,9 +59,9 @@ local_image = $(LOCAL_IMAGE_PREFIX)/$(subst _,/,$(1))
 # we need to explicitly repull the base layer in order to ensure that the
 # architecture is correct. Note that we use the term "rebuild" here to avoid
 # conflicting with the bazel "build" terminology, which is used elsewhere.
-rebuild-%: FROM=$(shell grep FROM $(call path,$*)/Dockerfile } cut -d' ' -f2)
+rebuild-%: FROM=$(shell grep FROM $(call path,$*)/Dockerfile | cut -d' ' -f2)
 rebuild-%: register-cross
-	$(foreach IMAGE,$(FROM),docker $(DOCKER_PLATFORM_ARGS) $(IMAGE); &&) true
+	$(foreach IMAGE,$(FROM),docker pull $(DOCKER_PLATFORM_ARGS) $(IMAGE) &&) true && \
 	T=$$(mktemp -d) && cp -a $(call path,$*)/* $$T && \
 		docker build $(DOCKER_PLATFORM_ARGS) -t $(call remote_image,$*) $$T && \
 		rm -rf $$T
diff --git a/images/packetimpact/Dockerfile b/images/packetimpact/Dockerfile
index 87aa99ef2e..82b7e8abd4 100644
--- a/images/packetimpact/Dockerfile
+++ b/images/packetimpact/Dockerfile
@@ -1,4 +1,4 @@
-FROM ubuntu:bionic
+FROM ubuntu:focal
 RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \
         # iptables to disable OS native packet processing.
         iptables \
@@ -11,6 +11,10 @@ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \
         # tshark to log verbose packet sniffing.
         tshark \
         # killall for cleanup.
-        psmisc
+        psmisc \
+        # qemu-system-x86 to emulate fuchsia.
+        qemu-system-x86 \
+        # sha1sum to generate entropy.
+        libdigest-sha-perl
 RUN hash -r
 CMD /bin/bash
diff --git a/pkg/test/dockerutil/container.go b/pkg/test/dockerutil/container.go
index 052b6b99d6..64d17f661e 100644
--- a/pkg/test/dockerutil/container.go
+++ b/pkg/test/dockerutil/container.go
@@ -22,6 +22,7 @@ import (
 	"net"
 	"os"
 	"path"
+	"path/filepath"
 	"regexp"
 	"strconv"
 	"strings"
@@ -403,10 +404,13 @@ func (c *Container) CopyFiles(opts *RunOpts, target string, sources ...string) {
 		return
 	}
 	for _, name := range sources {
-		src, err := testutil.FindFile(name)
-		if err != nil {
-			c.copyErr = fmt.Errorf("testutil.FindFile(%q) failed: %v", name, err)
-			return
+		src := name
+		if !filepath.IsAbs(src) {
+			src, err = testutil.FindFile(name)
+			if err != nil {
+				c.copyErr = fmt.Errorf("testutil.FindFile(%q) failed: %w", name, err)
+				return
+			}
 		}
 		dst := path.Join(dir, path.Base(name))
 		if err := testutil.Copy(src, dst); err != nil {
diff --git a/test/packetimpact/dut/BUILD b/test/packetimpact/dut/BUILD
index 3ce63c2c6d..ccf1c735f4 100644
--- a/test/packetimpact/dut/BUILD
+++ b/test/packetimpact/dut/BUILD
@@ -16,3 +16,13 @@ cc_binary(
         "//test/packetimpact/proto:posix_server_cc_proto",
     ],
 )
+
+cc_binary(
+    name = "posix_server_dynamic",
+    srcs = ["posix_server.cc"],
+    deps = [
+        grpcpp,
+        "//test/packetimpact/proto:posix_server_cc_grpc_proto",
+        "//test/packetimpact/proto:posix_server_cc_proto",
+    ],
+)
diff --git a/test/packetimpact/dut/posix_server.cc b/test/packetimpact/dut/posix_server.cc
index de5b4be93b..2f3becfba4 100644
--- a/test/packetimpact/dut/posix_server.cc
+++ b/test/packetimpact/dut/posix_server.cc
@@ -21,6 +21,7 @@
 #include <string.h>
 #include <sys/socket.h>
 #include <sys/types.h>
+#include <time.h>
 #include <unistd.h>
 
 #include <iostream>
@@ -307,9 +308,9 @@ class PosixImpl final : public posix_server::Posix::Service {
         break;
       }
       case ::posix_server::SockOptVal::kTimeval: {
-        timeval tv = {.tv_sec = static_cast<__time_t>(
+        timeval tv = {.tv_sec = static_cast<time_t>(
                           request->optval().timeval().seconds()),
-                      .tv_usec = static_cast<__suseconds_t>(
+                      .tv_usec = static_cast<suseconds_t>(
                           request->optval().timeval().microseconds())};
         response->set_ret(setsockopt(request->sockfd(), request->level(),
                                      request->optname(), &tv, sizeof(tv)));
diff --git a/test/packetimpact/runner/BUILD b/test/packetimpact/runner/BUILD
index ff2be9b306..605dd49729 100644
--- a/test/packetimpact/runner/BUILD
+++ b/test/packetimpact/runner/BUILD
@@ -1,4 +1,4 @@
-load("//tools:defs.bzl", "bzl_library", "go_test")
+load("//tools:defs.bzl", "bzl_library", "go_library", "go_test")
 
 package(
     default_visibility = ["//test/packetimpact:__subpackages__"],
@@ -7,21 +7,31 @@ package(
 
 go_test(
     name = "packetimpact_test",
-    srcs = ["packetimpact_test.go"],
+    srcs = [
+        "packetimpact_test.go",
+    ],
     tags = [
         # Not intended to be run directly.
         "local",
         "manual",
     ],
-    deps = [
-        "//pkg/test/dockerutil",
-        "//test/packetimpact/netdevs",
-        "@com_github_docker_docker//api/types/mount:go_default_library",
-    ],
+    deps = [":runner"],
 )
 
 bzl_library(
     name = "defs_bzl",
     srcs = ["defs.bzl"],
-    visibility = ["//visibility:private"],
+    visibility = ["//test/packetimpact:__subpackages__"],
+)
+
+go_library(
+    name = "runner",
+    testonly = True,
+    srcs = ["dut.go"],
+    visibility = ["//test/packetimpact:__subpackages__"],
+    deps = [
+        "//pkg/test/dockerutil",
+        "//test/packetimpact/netdevs",
+        "@com_github_docker_docker//api/types/mount:go_default_library",
+    ],
 )
diff --git a/test/packetimpact/runner/defs.bzl b/test/packetimpact/runner/defs.bzl
index d72c63fe6d..f56d3c42e4 100644
--- a/test/packetimpact/runner/defs.bzl
+++ b/test/packetimpact/runner/defs.bzl
@@ -23,8 +23,9 @@ def _packetimpact_test_impl(ctx):
     transitive_files = []
     if hasattr(ctx.attr._test_runner, "data_runfiles"):
         transitive_files.append(ctx.attr._test_runner.data_runfiles.files)
+    files = [test_runner] + ctx.files.testbench_binary + ctx.files._posix_server
     runfiles = ctx.runfiles(
-        files = [test_runner] + ctx.files.testbench_binary + ctx.files._posix_server_binary,
+        files = files,
         transitive_files = depset(transitive = transitive_files),
         collect_default = True,
         collect_data = True,
@@ -38,7 +39,7 @@ _packetimpact_test = rule(
             cfg = "target",
             default = ":packetimpact_test",
         ),
-        "_posix_server_binary": attr.label(
+        "_posix_server": attr.label(
             cfg = "target",
             default = "//test/packetimpact/dut:posix_server",
         ),
diff --git a/test/packetimpact/runner/dut.go b/test/packetimpact/runner/dut.go
new file mode 100644
index 0000000000..be7b52f189
--- /dev/null
+++ b/test/packetimpact/runner/dut.go
@@ -0,0 +1,438 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package runner starts docker containers and networking for a packetimpact test.
+package runner
+
+import (
+	"context"
+	"flag"
+	"fmt"
+	"io/ioutil"
+	"log"
+	"math/rand"
+	"net"
+	"os"
+	"os/exec"
+	"path"
+	"path/filepath"
+	"strings"
+	"testing"
+	"time"
+
+	"github.com/docker/docker/api/types/mount"
+	"gvisor.dev/gvisor/pkg/test/dockerutil"
+	"gvisor.dev/gvisor/test/packetimpact/netdevs"
+)
+
+// stringList implements flag.Value.
+type stringList []string
+
+// String implements flag.Value.String.
+func (l *stringList) String() string {
+	return strings.Join(*l, ",")
+}
+
+// Set implements flag.Value.Set.
+func (l *stringList) Set(value string) error {
+	*l = append(*l, value)
+	return nil
+}
+
+var (
+	native          = false
+	testbenchBinary = ""
+	tshark          = false
+	extraTestArgs   = stringList{}
+	expectFailure   = false
+
+	// DutAddr is the IP addres for DUT.
+	DutAddr       = net.IPv4(0, 0, 0, 10)
+	testbenchAddr = net.IPv4(0, 0, 0, 20)
+)
+
+// RegisterFlags defines flags and associates them with the package-level
+// exported variables above. It should be called by tests in their init
+// functions.
+func RegisterFlags(fs *flag.FlagSet) {
+	fs.BoolVar(&native, "native", false, "whether the test should be run natively")
+	fs.StringVar(&testbenchBinary, "testbench_binary", "", "path to the testbench binary")
+	fs.BoolVar(&tshark, "tshark", false, "use more verbose tshark in logs instead of tcpdump")
+	flag.Var(&extraTestArgs, "extra_test_arg", "extra arguments to pass to the testbench")
+	flag.BoolVar(&expectFailure, "expect_failure", false, "expect that the test will fail when run")
+}
+
+// CtrlPort is the port that posix_server listens on.
+const CtrlPort = "40000"
+
+// logger implements testutil.Logger.
+//
+// Labels logs based on their source and formats multi-line logs.
+type logger string
+
+// Name implements testutil.Logger.Name.
+func (l logger) Name() string {
+	return string(l)
+}
+
+// Logf implements testutil.Logger.Logf.
+func (l logger) Logf(format string, args ...interface{}) {
+	lines := strings.Split(fmt.Sprintf(format, args...), "\n")
+	log.Printf("%s: %s", l, lines[0])
+	for _, line := range lines[1:] {
+		log.Printf("%*s  %s", len(l), "", line)
+	}
+}
+
+// TestWithDUT runs a packetimpact test with the given information.
+func TestWithDUT(ctx context.Context, t *testing.T, mkDevice func(*dockerutil.Container) DUT, containerAddr net.IP) {
+	if testbenchBinary == "" {
+		t.Fatal("--testbench_binary is missing")
+	}
+	dockerutil.EnsureSupportedDockerVersion()
+
+	// Create the networks needed for the test. One control network is needed for
+	// the gRPC control packets and one test network on which to transmit the test
+	// packets.
+	ctrlNet := dockerutil.NewNetwork(ctx, logger("ctrlNet"))
+	testNet := dockerutil.NewNetwork(ctx, logger("testNet"))
+	for _, dn := range []*dockerutil.Network{ctrlNet, testNet} {
+		for {
+			if err := createDockerNetwork(ctx, dn); err != nil {
+				t.Log("creating docker network:", err)
+				const wait = 100 * time.Millisecond
+				t.Logf("sleeping %s and will try creating docker network again", wait)
+				// This can fail if another docker network claimed the same IP so we'll
+				// just try again.
+				time.Sleep(wait)
+				continue
+			}
+			break
+		}
+		dn := dn
+		t.Cleanup(func() {
+			if err := dn.Cleanup(ctx); err != nil {
+				t.Errorf("unable to cleanup container %s: %s", dn.Name, err)
+			}
+		})
+		// Sanity check.
+		if inspect, err := dn.Inspect(ctx); err != nil {
+			t.Fatalf("failed to inspect network %s: %v", dn.Name, err)
+		} else if inspect.Name != dn.Name {
+			t.Fatalf("name mismatch for network want: %s got: %s", dn.Name, inspect.Name)
+		}
+	}
+
+	tmpDir, err := ioutil.TempDir("", "container-output")
+	if err != nil {
+		t.Fatal("creating temp dir:", err)
+	}
+	t.Cleanup(func() {
+		if err := exec.Command("/bin/cp", "-r", tmpDir, os.Getenv("TEST_UNDECLARED_OUTPUTS_DIR")).Run(); err != nil {
+			t.Errorf("unable to copy container output files: %s", err)
+		}
+		if err := os.RemoveAll(tmpDir); err != nil {
+			t.Errorf("failed to remove tmpDir %s: %s", tmpDir, err)
+		}
+	})
+
+	const testOutputDir = "/tmp/testoutput"
+
+	// Create the Docker container for the DUT.
+	var dut *dockerutil.Container
+	if native {
+		dut = dockerutil.MakeNativeContainer(ctx, logger("dut"))
+	} else {
+		dut = dockerutil.MakeContainer(ctx, logger("dut"))
+	}
+	t.Cleanup(func() {
+		dut.CleanUp(ctx)
+	})
+
+	runOpts := dockerutil.RunOpts{
+		Image:  "packetimpact",
+		CapAdd: []string{"NET_ADMIN"},
+		Mounts: []mount.Mount{{
+			Type:     mount.TypeBind,
+			Source:   tmpDir,
+			Target:   testOutputDir,
+			ReadOnly: false,
+		}},
+	}
+
+	// Add ctrlNet as eth1 and testNet as eth2.
+	const testNetDev = "eth2"
+
+	device := mkDevice(dut)
+	remoteIPv6, remoteMAC, dutDeviceID := device.Prepare(ctx, t, runOpts, ctrlNet, testNet, containerAddr)
+
+	// Create the Docker container for the testbench.
+	testbench := dockerutil.MakeNativeContainer(ctx, logger("testbench"))
+
+	tbb := path.Base(testbenchBinary)
+	containerTestbenchBinary := filepath.Join("/packetimpact", tbb)
+	testbench.CopyFiles(&runOpts, "/packetimpact", filepath.Join("test/packetimpact/tests", tbb))
+
+	// Run tcpdump in the test bench unbuffered, without DNS resolution, just on
+	// the interface with the test packets.
+	snifferArgs := []string{
+		"tcpdump",
+		"-S", "-vvv", "-U", "-n",
+		"-i", testNetDev,
+		"-w", testOutputDir + "/dump.pcap",
+	}
+	snifferRegex := "tcpdump: listening.*\n"
+	if tshark {
+		// Run tshark in the test bench unbuffered, without DNS resolution, just on
+		// the interface with the test packets.
+		snifferArgs = []string{
+			"tshark", "-V", "-l", "-n", "-i", testNetDev,
+			"-o", "tcp.check_checksum:TRUE",
+			"-o", "udp.check_checksum:TRUE",
+		}
+		snifferRegex = "Capturing on.*\n"
+	}
+
+	if err := StartContainer(
+		ctx,
+		runOpts,
+		testbench,
+		testbenchAddr,
+		[]*dockerutil.Network{ctrlNet, testNet},
+		snifferArgs...,
+	); err != nil {
+		t.Fatalf("failed to start docker container for testbench sniffer: %s", err)
+	}
+	// Kill so that it will flush output.
+	t.Cleanup(func() {
+		time.Sleep(1 * time.Second)
+		testbench.Exec(ctx, dockerutil.ExecOpts{}, "killall", snifferArgs[0])
+	})
+
+	if _, err := testbench.WaitForOutput(ctx, snifferRegex, 60*time.Second); err != nil {
+		t.Fatalf("sniffer on %s never listened: %s", dut.Name, err)
+	}
+
+	// When the Linux kernel receives a SYN-ACK for a SYN it didn't send, it
+	// will respond with an RST. In most packetimpact tests, the SYN is sent
+	// by the raw socket and the kernel knows nothing about the connection, this
+	// behavior will break lots of TCP related packetimpact tests. To prevent
+	// this, we can install the following iptables rules. The raw socket that
+	// packetimpact tests use will still be able to see everything.
+	for _, bin := range []string{"iptables", "ip6tables"} {
+		if logs, err := testbench.Exec(ctx, dockerutil.ExecOpts{}, bin, "-A", "INPUT", "-i", testNetDev, "-p", "tcp", "-j", "DROP"); err != nil {
+			t.Fatalf("unable to Exec %s on container %s: %s, logs from testbench:\n%s", bin, testbench.Name, err, logs)
+		}
+	}
+
+	// FIXME(b/156449515): Some piece of the system has a race. The old
+	// bash script version had a sleep, so we have one too. The race should
+	// be fixed and this sleep removed.
+	time.Sleep(time.Second)
+
+	// Start a packetimpact test on the test bench. The packetimpact test sends
+	// and receives packets and also sends POSIX socket commands to the
+	// posix_server to be executed on the DUT.
+	testArgs := []string{containerTestbenchBinary}
+	testArgs = append(testArgs, extraTestArgs...)
+	testArgs = append(testArgs,
+		"--posix_server_ip", AddressInSubnet(DutAddr, *ctrlNet.Subnet).String(),
+		"--posix_server_port", CtrlPort,
+		"--remote_ipv4", AddressInSubnet(DutAddr, *testNet.Subnet).String(),
+		"--local_ipv4", AddressInSubnet(testbenchAddr, *testNet.Subnet).String(),
+		"--remote_ipv6", remoteIPv6.String(),
+		"--remote_mac", remoteMAC.String(),
+		"--remote_interface_id", fmt.Sprintf("%d", dutDeviceID),
+		"--device", testNetDev,
+		fmt.Sprintf("--native=%t", native),
+	)
+	testbenchLogs, err := testbench.Exec(ctx, dockerutil.ExecOpts{}, testArgs...)
+	if (err != nil) != expectFailure {
+		var dutLogs string
+		if logs, err := device.Logs(ctx); err != nil {
+			dutLogs = fmt.Sprintf("failed to fetch DUT logs: %s", err)
+		} else {
+			dutLogs = logs
+		}
+
+		t.Errorf(`test error: %v, expect failure: %t
+
+%s
+
+====== Begin of Testbench Logs ======
+
+%s
+
+====== End of Testbench Logs ======`,
+			err, expectFailure, dutLogs, testbenchLogs)
+	}
+}
+
+// DUT describes how to setup/teardown the dut for packetimpact tests.
+type DUT interface {
+	// Prepare prepares the dut, starts posix_server and returns the IPv6, MAC
+	// address and the interface ID for the testNet on DUT.
+	Prepare(ctx context.Context, t *testing.T, runOpts dockerutil.RunOpts, ctrlNet, testNet *dockerutil.Network, containerAddr net.IP) (net.IP, net.HardwareAddr, uint32)
+	// Logs retrieves the logs from the dut.
+	Logs(ctx context.Context) (string, error)
+}
+
+// DockerDUT describes a docker based DUT.
+type DockerDUT struct {
+	c *dockerutil.Container
+}
+
+// NewDockerDUT creates a docker based DUT.
+func NewDockerDUT(c *dockerutil.Container) DUT {
+	return &DockerDUT{
+		c: c,
+	}
+}
+
+// Prepare implements DUT.Prepare.
+func (dut *DockerDUT) Prepare(ctx context.Context, t *testing.T, runOpts dockerutil.RunOpts, ctrlNet, testNet *dockerutil.Network, containerAddr net.IP) (net.IP, net.HardwareAddr, uint32) {
+	const containerPosixServerBinary = "/packetimpact/posix_server"
+	dut.c.CopyFiles(&runOpts, "/packetimpact", "test/packetimpact/dut/posix_server")
+
+	if err := StartContainer(
+		ctx,
+		runOpts,
+		dut.c,
+		containerAddr,
+		[]*dockerutil.Network{ctrlNet, testNet},
+		containerPosixServerBinary,
+		"--ip=0.0.0.0",
+		"--port="+CtrlPort,
+	); err != nil {
+		t.Fatalf("failed to start docker container for DUT: %s", err)
+	}
+
+	if _, err := dut.c.WaitForOutput(ctx, "Server listening.*\n", 60*time.Second); err != nil {
+		t.Fatalf("%s on container %s never listened: %s", containerPosixServerBinary, dut.c.Name, err)
+	}
+
+	dutTestDevice, dutDeviceInfo, err := deviceByIP(ctx, dut.c, AddressInSubnet(containerAddr, *testNet.Subnet))
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	remoteMAC := dutDeviceInfo.MAC
+	remoteIPv6 := dutDeviceInfo.IPv6Addr
+	// Netstack as DUT doesn't assign IPv6 addresses automatically so do it if
+	// needed.
+	if remoteIPv6 == nil {
+		if _, err := dut.c.Exec(ctx, dockerutil.ExecOpts{}, "ip", "addr", "add", netdevs.MACToIP(remoteMAC).String(), "scope", "link", "dev", dutTestDevice); err != nil {
+			t.Fatalf("unable to ip addr add on container %s: %s", dut.c.Name, err)
+		}
+		// Now try again, to make sure that it worked.
+		_, dutDeviceInfo, err = deviceByIP(ctx, dut.c, AddressInSubnet(containerAddr, *testNet.Subnet))
+		if err != nil {
+			t.Fatal(err)
+		}
+		remoteIPv6 = dutDeviceInfo.IPv6Addr
+		if remoteIPv6 == nil {
+			t.Fatalf("unable to set IPv6 address on container %s", dut.c.Name)
+		}
+	}
+	return remoteIPv6, dutDeviceInfo.MAC, dutDeviceInfo.ID
+}
+
+// Logs implements DUT.Logs.
+func (dut *DockerDUT) Logs(ctx context.Context) (string, error) {
+	logs, err := dut.c.Logs(ctx)
+	if err != nil {
+		return "", err
+	}
+	return fmt.Sprintf(`====== Begin of DUT Logs ======
+
+%s
+
+====== End of DUT Logs ======`, logs), nil
+}
+
+// AddNetworks connects docker network with the container and assigns the specific IP.
+func AddNetworks(ctx context.Context, d *dockerutil.Container, addr net.IP, networks []*dockerutil.Network) error {
+	for _, dn := range networks {
+		ip := AddressInSubnet(addr, *dn.Subnet)
+		// Connect to the network with the specified IP address.
+		if err := dn.Connect(ctx, d, ip.String(), ""); err != nil {
+			return fmt.Errorf("unable to connect container %s to network %s: %w", d.Name, dn.Name, err)
+		}
+	}
+	return nil
+}
+
+// AddressInSubnet combines the subnet provided with the address and returns a
+// new address. The return address bits come from the subnet where the mask is 1
+// and from the ip address where the mask is 0.
+func AddressInSubnet(addr net.IP, subnet net.IPNet) net.IP {
+	var octets []byte
+	for i := 0; i < 4; i++ {
+		octets = append(octets, (subnet.IP.To4()[i]&subnet.Mask[i])+(addr.To4()[i]&(^subnet.Mask[i])))
+	}
+	return net.IP(octets)
+}
+
+// deviceByIP finds a deviceInfo and device name from an IP address.
+func deviceByIP(ctx context.Context, d *dockerutil.Container, ip net.IP) (string, netdevs.DeviceInfo, error) {
+	out, err := d.Exec(ctx, dockerutil.ExecOpts{}, "ip", "addr", "show")
+	if err != nil {
+		return "", netdevs.DeviceInfo{}, fmt.Errorf("listing devices on %s container: %w\n%s", d.Name, err, out)
+	}
+	devs, err := netdevs.ParseDevices(out)
+	if err != nil {
+		return "", netdevs.DeviceInfo{}, fmt.Errorf("parsing devices from %s container: %w\n%s", d.Name, err, out)
+	}
+	testDevice, deviceInfo, err := netdevs.FindDeviceByIP(ip, devs)
+	if err != nil {
+		return "", netdevs.DeviceInfo{}, fmt.Errorf("can't find deviceInfo for container %s: %w", d.Name, err)
+	}
+	return testDevice, deviceInfo, nil
+}
+
+// createDockerNetwork makes a randomly-named network that will start with the
+// namePrefix. The network will be a random /24 subnet.
+func createDockerNetwork(ctx context.Context, n *dockerutil.Network) error {
+	randSource := rand.NewSource(time.Now().UnixNano())
+	r1 := rand.New(randSource)
+	// Class C, 192.0.0.0 to 223.255.255.255, transitionally has mask 24.
+	ip := net.IPv4(byte(r1.Intn(224-192)+192), byte(r1.Intn(256)), byte(r1.Intn(256)), 0)
+	n.Subnet = &net.IPNet{
+		IP:   ip,
+		Mask: ip.DefaultMask(),
+	}
+	return n.Create(ctx)
+}
+
+// StartContainer will create a container instance from runOpts, connect it
+// with the specified docker networks and start executing the specified cmd.
+func StartContainer(ctx context.Context, runOpts dockerutil.RunOpts, c *dockerutil.Container, containerAddr net.IP, ns []*dockerutil.Network, cmd ...string) error {
+	conf, hostconf, netconf := c.ConfigsFrom(runOpts, cmd...)
+	_ = netconf
+	hostconf.AutoRemove = true
+	hostconf.Sysctls = map[string]string{"net.ipv6.conf.all.disable_ipv6": "0"}
+
+	if err := c.CreateFrom(ctx, conf, hostconf, nil); err != nil {
+		return fmt.Errorf("unable to create container %s: %w", c.Name, err)
+	}
+
+	if err := AddNetworks(ctx, c, containerAddr, ns); err != nil {
+		return fmt.Errorf("unable to connect the container with the networks: %w", err)
+	}
+
+	if err := c.Start(ctx); err != nil {
+		return fmt.Errorf("unable to start container %s: %w", c.Name, err)
+	}
+	return nil
+}
diff --git a/test/packetimpact/runner/packetimpact_test.go b/test/packetimpact/runner/packetimpact_test.go
index cb9bfd5b73..c598bfc29e 100644
--- a/test/packetimpact/runner/packetimpact_test.go
+++ b/test/packetimpact/runner/packetimpact_test.go
@@ -18,366 +18,15 @@ package packetimpact_test
 import (
 	"context"
 	"flag"
-	"fmt"
-	"io/ioutil"
-	"log"
-	"math/rand"
-	"net"
-	"os"
-	"os/exec"
-	"path"
-	"strings"
 	"testing"
-	"time"
 
-	"github.com/docker/docker/api/types/mount"
-	"gvisor.dev/gvisor/pkg/test/dockerutil"
-	"gvisor.dev/gvisor/test/packetimpact/netdevs"
+	"gvisor.dev/gvisor/test/packetimpact/runner"
 )
 
-// stringList implements flag.Value.
-type stringList []string
-
-// String implements flag.Value.String.
-func (l *stringList) String() string {
-	return strings.Join(*l, ",")
-}
-
-// Set implements flag.Value.Set.
-func (l *stringList) Set(value string) error {
-	*l = append(*l, value)
-	return nil
-}
-
-var (
-	native          = flag.Bool("native", false, "whether the test should be run natively")
-	testbenchBinary = flag.String("testbench_binary", "", "path to the testbench binary")
-	tshark          = flag.Bool("tshark", false, "use more verbose tshark in logs instead of tcpdump")
-	extraTestArgs   = stringList{}
-	expectFailure   = flag.Bool("expect_failure", false, "expect that the test will fail when run")
-
-	dutAddr       = net.IPv4(0, 0, 0, 10)
-	testbenchAddr = net.IPv4(0, 0, 0, 20)
-)
-
-const ctrlPort = "40000"
-
-// logger implements testutil.Logger.
-//
-// Labels logs based on their source and formats multi-line logs.
-type logger string
-
-// Name implements testutil.Logger.Name.
-func (l logger) Name() string {
-	return string(l)
-}
-
-// Logf implements testutil.Logger.Logf.
-func (l logger) Logf(format string, args ...interface{}) {
-	lines := strings.Split(fmt.Sprintf(format, args...), "\n")
-	log.Printf("%s: %s", l, lines[0])
-	for _, line := range lines[1:] {
-		log.Printf("%*s  %s", len(l), "", line)
-	}
+func init() {
+	runner.RegisterFlags(flag.CommandLine)
 }
 
 func TestOne(t *testing.T) {
-	flag.Var(&extraTestArgs, "extra_test_arg", "extra arguments to pass to the testbench")
-	flag.Parse()
-	if *testbenchBinary == "" {
-		t.Fatal("--testbench_binary is missing")
-	}
-	dockerutil.EnsureSupportedDockerVersion()
-	ctx := context.Background()
-
-	// Create the networks needed for the test. One control network is needed for
-	// the gRPC control packets and one test network on which to transmit the test
-	// packets.
-	ctrlNet := dockerutil.NewNetwork(ctx, logger("ctrlNet"))
-	testNet := dockerutil.NewNetwork(ctx, logger("testNet"))
-	for _, dn := range []*dockerutil.Network{ctrlNet, testNet} {
-		for {
-			if err := createDockerNetwork(ctx, dn); err != nil {
-				t.Log("creating docker network:", err)
-				const wait = 100 * time.Millisecond
-				t.Logf("sleeping %s and will try creating docker network again", wait)
-				// This can fail if another docker network claimed the same IP so we'll
-				// just try again.
-				time.Sleep(wait)
-				continue
-			}
-			break
-		}
-		defer func(dn *dockerutil.Network) {
-			if err := dn.Cleanup(ctx); err != nil {
-				t.Errorf("unable to cleanup container %s: %s", dn.Name, err)
-			}
-		}(dn)
-		// Sanity check.
-		inspect, err := dn.Inspect(ctx)
-		if err != nil {
-			t.Fatalf("failed to inspect network %s: %v", dn.Name, err)
-		} else if inspect.Name != dn.Name {
-			t.Fatalf("name mismatch for network want: %s got: %s", dn.Name, inspect.Name)
-		}
-
-	}
-
-	tmpDir, err := ioutil.TempDir("", "container-output")
-	if err != nil {
-		t.Fatal("creating temp dir:", err)
-	}
-	defer os.RemoveAll(tmpDir)
-
-	const testOutputDir = "/tmp/testoutput"
-
-	// Create the Docker container for the DUT.
-	var dut *dockerutil.Container
-	if *native {
-		dut = dockerutil.MakeNativeContainer(ctx, logger("dut"))
-	} else {
-		dut = dockerutil.MakeContainer(ctx, logger("dut"))
-	}
-
-	runOpts := dockerutil.RunOpts{
-		Image:  "packetimpact",
-		CapAdd: []string{"NET_ADMIN"},
-		Mounts: []mount.Mount{mount.Mount{
-			Type:     mount.TypeBind,
-			Source:   tmpDir,
-			Target:   testOutputDir,
-			ReadOnly: false,
-		}},
-	}
-
-	const containerPosixServerBinary = "/packetimpact/posix_server"
-	dut.CopyFiles(&runOpts, "/packetimpact", "/test/packetimpact/dut/posix_server")
-
-	conf, hostconf, _ := dut.ConfigsFrom(runOpts, containerPosixServerBinary, "--ip=0.0.0.0", "--port="+ctrlPort)
-	hostconf.AutoRemove = true
-	hostconf.Sysctls = map[string]string{"net.ipv6.conf.all.disable_ipv6": "0"}
-
-	if err := dut.CreateFrom(ctx, conf, hostconf, nil); err != nil {
-		t.Fatalf("unable to create container %s: %v", dut.Name, err)
-	}
-
-	defer dut.CleanUp(ctx)
-
-	// Add ctrlNet as eth1 and testNet as eth2.
-	const testNetDev = "eth2"
-	if err := addNetworks(ctx, dut, dutAddr, []*dockerutil.Network{ctrlNet, testNet}); err != nil {
-		t.Fatal(err)
-	}
-
-	if err := dut.Start(ctx); err != nil {
-		t.Fatalf("unable to start container %s: %s", dut.Name, err)
-	}
-
-	if _, err := dut.WaitForOutput(ctx, "Server listening.*\n", 60*time.Second); err != nil {
-		t.Fatalf("%s on container %s never listened: %s", containerPosixServerBinary, dut.Name, err)
-	}
-
-	dutTestDevice, dutDeviceInfo, err := deviceByIP(ctx, dut, addressInSubnet(dutAddr, *testNet.Subnet))
-	if err != nil {
-		t.Fatal(err)
-	}
-
-	remoteMAC := dutDeviceInfo.MAC
-	remoteIPv6 := dutDeviceInfo.IPv6Addr
-	// Netstack as DUT doesn't assign IPv6 addresses automatically so do it if
-	// needed.
-	if remoteIPv6 == nil {
-		if _, err := dut.Exec(ctx, dockerutil.ExecOpts{}, "ip", "addr", "add", netdevs.MACToIP(remoteMAC).String(), "scope", "link", "dev", dutTestDevice); err != nil {
-			t.Fatalf("unable to ip addr add on container %s: %s", dut.Name, err)
-		}
-		// Now try again, to make sure that it worked.
-		_, dutDeviceInfo, err = deviceByIP(ctx, dut, addressInSubnet(dutAddr, *testNet.Subnet))
-		if err != nil {
-			t.Fatal(err)
-		}
-		remoteIPv6 = dutDeviceInfo.IPv6Addr
-		if remoteIPv6 == nil {
-			t.Fatal("unable to set IPv6 address on container", dut.Name)
-		}
-	}
-
-	// Create the Docker container for the testbench.
-	testbench := dockerutil.MakeNativeContainer(ctx, logger("testbench"))
-
-	tbb := path.Base(*testbenchBinary)
-	containerTestbenchBinary := "/packetimpact/" + tbb
-	runOpts = dockerutil.RunOpts{
-		Image:  "packetimpact",
-		CapAdd: []string{"NET_ADMIN"},
-		Mounts: []mount.Mount{mount.Mount{
-			Type:     mount.TypeBind,
-			Source:   tmpDir,
-			Target:   testOutputDir,
-			ReadOnly: false,
-		}},
-	}
-	testbench.CopyFiles(&runOpts, "/packetimpact", "/test/packetimpact/tests/"+tbb)
-
-	// Run tcpdump in the test bench unbuffered, without DNS resolution, just on
-	// the interface with the test packets.
-	snifferArgs := []string{
-		"tcpdump",
-		"-S", "-vvv", "-U", "-n",
-		"-i", testNetDev,
-		"-w", testOutputDir + "/dump.pcap",
-	}
-	snifferRegex := "tcpdump: listening.*\n"
-	if *tshark {
-		// Run tshark in the test bench unbuffered, without DNS resolution, just on
-		// the interface with the test packets.
-		snifferArgs = []string{
-			"tshark", "-V", "-l", "-n", "-i", testNetDev,
-			"-o", "tcp.check_checksum:TRUE",
-			"-o", "udp.check_checksum:TRUE",
-		}
-		snifferRegex = "Capturing on.*\n"
-	}
-
-	defer func() {
-		if err := exec.Command("/bin/cp", "-r", tmpDir, os.Getenv("TEST_UNDECLARED_OUTPUTS_DIR")).Run(); err != nil {
-			t.Error("unable to copy container output files:", err)
-		}
-	}()
-
-	conf, hostconf, _ = testbench.ConfigsFrom(runOpts, snifferArgs...)
-	hostconf.AutoRemove = true
-	hostconf.Sysctls = map[string]string{"net.ipv6.conf.all.disable_ipv6": "0"}
-
-	if err := testbench.CreateFrom(ctx, conf, hostconf, nil); err != nil {
-		t.Fatalf("unable to create container %s: %s", testbench.Name, err)
-	}
-	defer testbench.CleanUp(ctx)
-
-	// Add ctrlNet as eth1 and testNet as eth2.
-	if err := addNetworks(ctx, testbench, testbenchAddr, []*dockerutil.Network{ctrlNet, testNet}); err != nil {
-		t.Fatal(err)
-	}
-
-	if err := testbench.Start(ctx); err != nil {
-		t.Fatalf("unable to start container %s: %s", testbench.Name, err)
-	}
-
-	// Kill so that it will flush output.
-	defer func() {
-		time.Sleep(1 * time.Second)
-		testbench.Exec(ctx, dockerutil.ExecOpts{}, "killall", snifferArgs[0])
-	}()
-
-	if _, err := testbench.WaitForOutput(ctx, snifferRegex, 60*time.Second); err != nil {
-		t.Fatalf("sniffer on %s never listened: %s", dut.Name, err)
-	}
-
-	// Because the Linux kernel receives the SYN-ACK but didn't send the SYN it
-	// will issue an RST. To prevent this IPtables can be used to filter out all
-	// incoming packets. The raw socket that packetimpact tests use will still see
-	// everything.
-	for _, bin := range []string{"iptables", "ip6tables"} {
-		if logs, err := testbench.Exec(ctx, dockerutil.ExecOpts{}, bin, "-A", "INPUT", "-i", testNetDev, "-p", "tcp", "-j", "DROP"); err != nil {
-			t.Fatalf("unable to Exec %s on container %s: %s, logs from testbench:\n%s", bin, testbench.Name, err, logs)
-		}
-	}
-
-	// FIXME(b/156449515): Some piece of the system has a race. The old
-	// bash script version had a sleep, so we have one too. The race should
-	// be fixed and this sleep removed.
-	time.Sleep(time.Second)
-
-	// Start a packetimpact test on the test bench. The packetimpact test sends
-	// and receives packets and also sends POSIX socket commands to the
-	// posix_server to be executed on the DUT.
-	testArgs := []string{containerTestbenchBinary}
-	testArgs = append(testArgs, extraTestArgs...)
-	testArgs = append(testArgs,
-		"--posix_server_ip", addressInSubnet(dutAddr, *ctrlNet.Subnet).String(),
-		"--posix_server_port", ctrlPort,
-		"--remote_ipv4", addressInSubnet(dutAddr, *testNet.Subnet).String(),
-		"--local_ipv4", addressInSubnet(testbenchAddr, *testNet.Subnet).String(),
-		"--remote_ipv6", remoteIPv6.String(),
-		"--remote_mac", remoteMAC.String(),
-		"--remote_interface_id", fmt.Sprintf("%d", dutDeviceInfo.ID),
-		"--device", testNetDev,
-		fmt.Sprintf("--native=%t", *native),
-	)
-	testbenchLogs, err := testbench.Exec(ctx, dockerutil.ExecOpts{}, testArgs...)
-	if (err != nil) != *expectFailure {
-		var dutLogs string
-		if logs, err := dut.Logs(ctx); err != nil {
-			dutLogs = fmt.Sprintf("failed to fetch DUT logs: %s", err)
-		} else {
-			dutLogs = logs
-		}
-
-		t.Errorf(`test error: %v, expect failure: %t
-
-====== Begin of DUT Logs ======
-
-%s
-
-====== End of DUT Logs ======
-
-====== Begin of Testbench Logs ======
-
-%s
-
-====== End of Testbench Logs ======`,
-			err, *expectFailure, dutLogs, testbenchLogs)
-	}
-}
-
-func addNetworks(ctx context.Context, d *dockerutil.Container, addr net.IP, networks []*dockerutil.Network) error {
-	for _, dn := range networks {
-		ip := addressInSubnet(addr, *dn.Subnet)
-		// Connect to the network with the specified IP address.
-		if err := dn.Connect(ctx, d, ip.String(), ""); err != nil {
-			return fmt.Errorf("unable to connect container %s to network %s: %w", d.Name, dn.Name, err)
-		}
-	}
-	return nil
-}
-
-// addressInSubnet combines the subnet provided with the address and returns a
-// new address. The return address bits come from the subnet where the mask is 1
-// and from the ip address where the mask is 0.
-func addressInSubnet(addr net.IP, subnet net.IPNet) net.IP {
-	var octets []byte
-	for i := 0; i < 4; i++ {
-		octets = append(octets, (subnet.IP.To4()[i]&subnet.Mask[i])+(addr.To4()[i]&(^subnet.Mask[i])))
-	}
-	return net.IP(octets)
-}
-
-// createDockerNetwork makes a randomly-named network that will start with the
-// namePrefix. The network will be a random /24 subnet.
-func createDockerNetwork(ctx context.Context, n *dockerutil.Network) error {
-	randSource := rand.NewSource(time.Now().UnixNano())
-	r1 := rand.New(randSource)
-	// Class C, 192.0.0.0 to 223.255.255.255, transitionally has mask 24.
-	ip := net.IPv4(byte(r1.Intn(224-192)+192), byte(r1.Intn(256)), byte(r1.Intn(256)), 0)
-	n.Subnet = &net.IPNet{
-		IP:   ip,
-		Mask: ip.DefaultMask(),
-	}
-	return n.Create(ctx)
-}
-
-// deviceByIP finds a deviceInfo and device name from an IP address.
-func deviceByIP(ctx context.Context, d *dockerutil.Container, ip net.IP) (string, netdevs.DeviceInfo, error) {
-	out, err := d.Exec(ctx, dockerutil.ExecOpts{}, "ip", "addr", "show")
-	if err != nil {
-		return "", netdevs.DeviceInfo{}, fmt.Errorf("listing devices on %s container: %w\n%s", d.Name, err, out)
-	}
-	devs, err := netdevs.ParseDevices(out)
-	if err != nil {
-		return "", netdevs.DeviceInfo{}, fmt.Errorf("parsing devices from %s container: %w\n%s", d.Name, err, out)
-	}
-	testDevice, deviceInfo, err := netdevs.FindDeviceByIP(ip, devs)
-	if err != nil {
-		return "", netdevs.DeviceInfo{}, fmt.Errorf("can't find deviceInfo for container %s: %w", d.Name, err)
-	}
-	return testDevice, deviceInfo, nil
+	runner.TestWithDUT(context.Background(), t, runner.NewDockerDUT, runner.DutAddr)
 }

From d128abc269777db80a6debd21fddde68927639a1 Mon Sep 17 00:00:00 2001
From: Ian Lewis <ianlewis@google.com>
Date: Wed, 2 Sep 2020 19:37:06 -0700
Subject: [PATCH 143/211] Update version in cni tutorial

Update the cniVersion used in the CNI tutorial so that it works with
containerd 1.2. Containerd 1.2 includes a version of the cri plugin
(release/1.2) that, in turn, includes a version of the
cni library (0.6.0) that only supports up to 0.3.1.
https://github.com/containernetworking/cni/blob/v0.6.0/pkg/version/version.go#L38

PiperOrigin-RevId: 329837188
---
 g3doc/user_guide/tutorials/cni.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/g3doc/user_guide/tutorials/cni.md b/g3doc/user_guide/tutorials/cni.md
index ce2fd09a88..a3507c25b0 100644
--- a/g3doc/user_guide/tutorials/cni.md
+++ b/g3doc/user_guide/tutorials/cni.md
@@ -47,7 +47,7 @@ sudo mkdir -p /etc/cni/net.d
 
 sudo sh -c 'cat > /etc/cni/net.d/10-bridge.conf << EOF
 {
-  "cniVersion": "0.4.0",
+  "cniVersion": "0.3.1",
   "name": "mynet",
   "type": "bridge",
   "bridge": "cni0",
@@ -65,7 +65,7 @@ EOF'
 
 sudo sh -c 'cat > /etc/cni/net.d/99-loopback.conf << EOF
 {
-  "cniVersion": "0.4.0",
+  "cniVersion": "0.3.1",
   "name": "lo",
   "type": "loopback"
 }

From 7849589ef169e11d7270bf48e949dcd78f6f655c Mon Sep 17 00:00:00 2001
From: Tamir Duberstein <tamird@google.com>
Date: Thu, 3 Sep 2020 05:50:36 -0700
Subject: [PATCH 144/211] Avoid grpc_impl

PiperOrigin-RevId: 329902747
---
 test/packetimpact/dut/posix_server.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/packetimpact/dut/posix_server.cc b/test/packetimpact/dut/posix_server.cc
index 2f3becfba4..4de8540f6e 100644
--- a/test/packetimpact/dut/posix_server.cc
+++ b/test/packetimpact/dut/posix_server.cc
@@ -337,7 +337,7 @@ class PosixImpl final : public posix_server::Posix::Service {
     return ::grpc::Status::OK;
   }
 
-  ::grpc::Status Shutdown(grpc_impl::ServerContext *context,
+  ::grpc::Status Shutdown(grpc::ServerContext *context,
                           const ::posix_server::ShutdownRequest *request,
                           ::posix_server::ShutdownResponse *response) override {
     if (shutdown(request->fd(), request->how()) < 0) {

From 786310a6c3ccd4fecea99a7f5196e8096eb6c006 Mon Sep 17 00:00:00 2001
From: Nicolas Lacasse <nlacasse@google.com>
Date: Thu, 3 Sep 2020 10:52:01 -0700
Subject: [PATCH 145/211] Run gentdents_benchmark with fewer files.

This test regularly times out when "shared" filesystem is enabled.

PiperOrigin-RevId: 329950622
---
 test/perf/linux/getdents_benchmark.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/perf/linux/getdents_benchmark.cc b/test/perf/linux/getdents_benchmark.cc
index d8e81fa8c0..9030eb3563 100644
--- a/test/perf/linux/getdents_benchmark.cc
+++ b/test/perf/linux/getdents_benchmark.cc
@@ -105,7 +105,7 @@ void BM_GetdentsSameFD(benchmark::State& state) {
   state.SetItemsProcessed(state.iterations());
 }
 
-BENCHMARK(BM_GetdentsSameFD)->Range(1, 1 << 16)->UseRealTime();
+BENCHMARK(BM_GetdentsSameFD)->Range(1, 1 << 12)->UseRealTime();
 
 // Creates a directory containing `files` files, and reads all the directory
 // entries from the directory using a new FD each time.

From f6d2444ed32ffef47ccc72a595d97721f3fa0561 Mon Sep 17 00:00:00 2001
From: Jamie Liu <jamieliu@google.com>
Date: Thu, 3 Sep 2020 15:21:10 -0700
Subject: [PATCH 146/211] Use atomic.Value for Stack.tcpProbeFunc.

b/166980357#comment56 shows:

- 837 goroutines blocked in:
gvisor/pkg/sync/sync.(*RWMutex).Lock
gvisor/pkg/tcpip/stack/stack.(*Stack).StartTransportEndpointCleanup
gvisor/pkg/tcpip/transport/tcp/tcp.(*endpoint).cleanupLocked
gvisor/pkg/tcpip/transport/tcp/tcp.(*endpoint).completeWorkerLocked
gvisor/pkg/tcpip/transport/tcp/tcp.(*endpoint).protocolMainLoop.func1
gvisor/pkg/tcpip/transport/tcp/tcp.(*endpoint).protocolMainLoop

- 695 goroutines blocked in:
gvisor/pkg/sync/sync.(*RWMutex).Lock
gvisor/pkg/tcpip/stack/stack.(*Stack).CompleteTransportEndpointCleanup
gvisor/pkg/tcpip/transport/tcp/tcp.(*endpoint).cleanupLocked
gvisor/pkg/tcpip/transport/tcp/tcp.(*endpoint).completeWorkerLocked
gvisor/pkg/tcpip/transport/tcp/tcp.(*endpoint).protocolMainLoop.func1
gvisor/pkg/tcpip/transport/tcp/tcp.(*endpoint).protocolMainLoop

- 3882 goroutines blocked in:
gvisor/pkg/sync/sync.(*RWMutex).Lock
gvisor/pkg/tcpip/stack/stack.(*Stack).GetTCPProbe
gvisor/pkg/tcpip/transport/tcp/tcp.newEndpoint
gvisor/pkg/tcpip/transport/tcp/tcp.(*protocol).NewEndpoint
gvisor/pkg/tcpip/stack/stack.(*Stack).NewEndpoint

All of these are contending on Stack.mu. Stack.StartTransportEndpointCleanup()
and Stack.CompleteTransportEndpointCleanup() insert/delete TransportEndpoints
in a map (Stack.cleanupEndpoints), and the former also does endpoint
unregistration while holding Stack.mu, so it's not immediately clear how
feasible it is to replace the map with a mutex-less implementation or how much
doing so would help. However, Stack.GetTCPProbe() just reads a function object
(Stack.tcpProbeFunc) that is almost always nil (as far as I can tell,
Stack.AddTCPProbe() is only called in tests), and it's called for every new TCP
endpoint. So converting it to an atomic.Value should significantly reduce
contention on Stack.mu, improving TCP endpoint creation latency and allowing
TCP endpoint cleanup to proceed.

PiperOrigin-RevId: 330004140
---
 pkg/tcpip/stack/stack.go | 20 +++++++++-----------
 1 file changed, 9 insertions(+), 11 deletions(-)

diff --git a/pkg/tcpip/stack/stack.go b/pkg/tcpip/stack/stack.go
index c86ee1c132..66ce10357a 100644
--- a/pkg/tcpip/stack/stack.go
+++ b/pkg/tcpip/stack/stack.go
@@ -429,7 +429,7 @@ type Stack struct {
 
 	// If not nil, then any new endpoints will have this probe function
 	// invoked everytime they receive a TCP segment.
-	tcpProbeFunc TCPProbeFunc
+	tcpProbeFunc atomic.Value // TCPProbeFunc
 
 	// clock is used to generate user-visible times.
 	clock tcpip.Clock
@@ -1795,18 +1795,17 @@ func (s *Stack) TransportProtocolInstance(num tcpip.TransportProtocolNumber) Tra
 // guarantee provided on which probe will be invoked. Ideally this should only
 // be called once per stack.
 func (s *Stack) AddTCPProbe(probe TCPProbeFunc) {
-	s.mu.Lock()
-	s.tcpProbeFunc = probe
-	s.mu.Unlock()
+	s.tcpProbeFunc.Store(probe)
 }
 
 // GetTCPProbe returns the TCPProbeFunc if installed with AddTCPProbe, nil
 // otherwise.
 func (s *Stack) GetTCPProbe() TCPProbeFunc {
-	s.mu.Lock()
-	p := s.tcpProbeFunc
-	s.mu.Unlock()
-	return p
+	p := s.tcpProbeFunc.Load()
+	if p == nil {
+		return nil
+	}
+	return p.(TCPProbeFunc)
 }
 
 // RemoveTCPProbe removes an installed TCP probe.
@@ -1815,9 +1814,8 @@ func (s *Stack) GetTCPProbe() TCPProbeFunc {
 // have a probe attached. Endpoints already created will continue to invoke
 // TCP probe.
 func (s *Stack) RemoveTCPProbe() {
-	s.mu.Lock()
-	s.tcpProbeFunc = nil
-	s.mu.Unlock()
+	// This must be TCPProbeFunc(nil) because atomic.Value.Store(nil) panics.
+	s.tcpProbeFunc.Store(TCPProbeFunc(nil))
 }
 
 // JoinGroup joins the given multicast group on the given NIC.

From 4d5627f76346e8afbab1506b8151c7ccb3f82f16 Mon Sep 17 00:00:00 2001
From: Bhasker Hariharan <bhaskerh@google.com>
Date: Thu, 3 Sep 2020 17:34:56 -0700
Subject: [PATCH 147/211] Use fine-grained mutex for stack.cleanupEndpoints.

stack.cleanupEndpoints is protected by the stack.mu but that can cause
contention as the stack mutex is already acquired in a lot of hot paths during
new endpoint creation /cleanup etc. Moving this to a fine grained mutex should
reduce contention on the stack.mu.

PiperOrigin-RevId: 330026151
---
 pkg/tcpip/stack/stack.go | 28 +++++++++++++++-------------
 1 file changed, 15 insertions(+), 13 deletions(-)

diff --git a/pkg/tcpip/stack/stack.go b/pkg/tcpip/stack/stack.go
index 66ce10357a..133d90815d 100644
--- a/pkg/tcpip/stack/stack.go
+++ b/pkg/tcpip/stack/stack.go
@@ -415,10 +415,13 @@ type Stack struct {
 
 	linkAddrCache *linkAddrCache
 
-	mu               sync.RWMutex
-	nics             map[tcpip.NICID]*NIC
-	forwarding       bool
-	cleanupEndpoints map[TransportEndpoint]struct{}
+	mu         sync.RWMutex
+	nics       map[tcpip.NICID]*NIC
+	forwarding bool
+
+	// cleanupEndpointsMu protects cleanupEndpoints.
+	cleanupEndpointsMu sync.Mutex
+	cleanupEndpoints   map[TransportEndpoint]struct{}
 
 	// route is the route table passed in by the user via SetRouteTable(),
 	// it is used by FindRoute() to build a route for a specific
@@ -1528,10 +1531,9 @@ func (s *Stack) UnregisterTransportEndpoint(nicID tcpip.NICID, netProtos []tcpip
 // StartTransportEndpointCleanup removes the endpoint with the given id from
 // the stack transport dispatcher. It also transitions it to the cleanup stage.
 func (s *Stack) StartTransportEndpointCleanup(nicID tcpip.NICID, netProtos []tcpip.NetworkProtocolNumber, protocol tcpip.TransportProtocolNumber, id TransportEndpointID, ep TransportEndpoint, flags ports.Flags, bindToDevice tcpip.NICID) {
-	s.mu.Lock()
-	defer s.mu.Unlock()
-
+	s.cleanupEndpointsMu.Lock()
 	s.cleanupEndpoints[ep] = struct{}{}
+	s.cleanupEndpointsMu.Unlock()
 
 	s.demux.unregisterEndpoint(netProtos, protocol, id, ep, flags, bindToDevice)
 }
@@ -1539,9 +1541,9 @@ func (s *Stack) StartTransportEndpointCleanup(nicID tcpip.NICID, netProtos []tcp
 // CompleteTransportEndpointCleanup removes the endpoint from the cleanup
 // stage.
 func (s *Stack) CompleteTransportEndpointCleanup(ep TransportEndpoint) {
-	s.mu.Lock()
+	s.cleanupEndpointsMu.Lock()
 	delete(s.cleanupEndpoints, ep)
-	s.mu.Unlock()
+	s.cleanupEndpointsMu.Unlock()
 }
 
 // FindTransportEndpoint finds an endpoint that most closely matches the provided
@@ -1584,23 +1586,23 @@ func (s *Stack) RegisteredEndpoints() []TransportEndpoint {
 
 // CleanupEndpoints returns endpoints currently in the cleanup state.
 func (s *Stack) CleanupEndpoints() []TransportEndpoint {
-	s.mu.Lock()
+	s.cleanupEndpointsMu.Lock()
 	es := make([]TransportEndpoint, 0, len(s.cleanupEndpoints))
 	for e := range s.cleanupEndpoints {
 		es = append(es, e)
 	}
-	s.mu.Unlock()
+	s.cleanupEndpointsMu.Unlock()
 	return es
 }
 
 // RestoreCleanupEndpoints adds endpoints to cleanup tracking. This is useful
 // for restoring a stack after a save.
 func (s *Stack) RestoreCleanupEndpoints(es []TransportEndpoint) {
-	s.mu.Lock()
+	s.cleanupEndpointsMu.Lock()
 	for _, e := range es {
 		s.cleanupEndpoints[e] = struct{}{}
 	}
-	s.mu.Unlock()
+	s.cleanupEndpointsMu.Unlock()
 }
 
 // Close closes all currently registered transport endpoints.

From dfeb9d8b45f76aa01f09e9c0cd40347c9e58680d Mon Sep 17 00:00:00 2001
From: Ayush Ranjan <ayushranjan@google.com>
Date: Thu, 3 Sep 2020 21:42:49 -0700
Subject: [PATCH 148/211] Fix the release workflow.

PiperOrigin-RevId: 330049242
---
 tools/make_apt.sh | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/tools/make_apt.sh b/tools/make_apt.sh
index b47977ed58..fdc5e91924 100755
--- a/tools/make_apt.sh
+++ b/tools/make_apt.sh
@@ -54,18 +54,21 @@ declare -r release="${root}/dists/${suite}"
 mkdir -p "${release}"
 
 # Create a temporary keyring, and ensure it is cleaned up.
+# Using separate homedir allows us to install apt repositories multiple times
+# using the same key. This is a limitation in GnuPG pre-2.1.
 declare -r keyring=$(mktemp /tmp/keyringXXXXXX.gpg)
+declare -r homedir=$(mktemp -d /tmp/homedirXXXXXX)
 cleanup() {
-  rm -f "${keyring}"
+  rm -rf "${keyring}" "${homedir}"
 }
 trap cleanup EXIT
 
 # We attempt the import twice because the first one will fail if the public key
 # is not found. This isn't actually a failure for us, because we don't require
-# the public (this may be stored separately). The second import will succeed
+# the public key (this may be stored separately). The second import will succeed
 # because, in reality, the first import succeeded and it's a no-op.
-gpg --no-default-keyring --keyring "${keyring}" --secret-keyring "${keyring}" --import "${private_key}" || \
-  gpg --no-default-keyring --keyring "${keyring}" --secret-keyring "${keyring}" --import "${private_key}"
+gpg --no-default-keyring --keyring "${keyring}" --homedir "${homedir}" --import "${private_key}" || \
+  gpg --no-default-keyring --keyring "${keyring}" --homedir "${homedir}" --import "${private_key}"
 
 # Copy the packages into the root.
 for pkg in "$@"; do

From fb6c6faea2cefb05440845fccce9dcab0779b90d Mon Sep 17 00:00:00 2001
From: Dean Deng <deandeng@google.com>
Date: Thu, 3 Sep 2020 23:29:13 -0700
Subject: [PATCH 149/211] Adjust input file offset when sendfile only completes
 a partial write.

Fixes #3779.

PiperOrigin-RevId: 330057268
---
 pkg/sentry/syscalls/linux/vfs2/BUILD     |  1 +
 pkg/sentry/syscalls/linux/vfs2/splice.go | 22 ++++++++++++++--------
 2 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/pkg/sentry/syscalls/linux/vfs2/BUILD b/pkg/sentry/syscalls/linux/vfs2/BUILD
index 64696b4385..0030dee39e 100644
--- a/pkg/sentry/syscalls/linux/vfs2/BUILD
+++ b/pkg/sentry/syscalls/linux/vfs2/BUILD
@@ -44,6 +44,7 @@ go_library(
         "//pkg/context",
         "//pkg/fspath",
         "//pkg/gohacks",
+        "//pkg/log",
         "//pkg/sentry/arch",
         "//pkg/sentry/fs/lock",
         "//pkg/sentry/fsbridge",
diff --git a/pkg/sentry/syscalls/linux/vfs2/splice.go b/pkg/sentry/syscalls/linux/vfs2/splice.go
index 68ce947789..5543cfac21 100644
--- a/pkg/sentry/syscalls/linux/vfs2/splice.go
+++ b/pkg/sentry/syscalls/linux/vfs2/splice.go
@@ -18,6 +18,7 @@ import (
 	"io"
 
 	"gvisor.dev/gvisor/pkg/abi/linux"
+	"gvisor.dev/gvisor/pkg/log"
 	"gvisor.dev/gvisor/pkg/sentry/arch"
 	"gvisor.dev/gvisor/pkg/sentry/kernel"
 	"gvisor.dev/gvisor/pkg/sentry/kernel/pipe"
@@ -390,16 +391,21 @@ func Sendfile(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc
 					err = dw.waitForOut(t)
 				}
 				if err != nil {
-					// We didn't complete the write. Only
-					// report the bytes that were actually
-					// written, and rewind the offset.
+					// We didn't complete the write. Only report the bytes that were actually
+					// written, and rewind offsets as needed.
 					notWritten := int64(len(wbuf))
 					n -= notWritten
-					if offset != -1 {
-						// TODO(gvisor.dev/issue/3779): The inFile offset will be incorrect if we
-						// roll back, because it has already been advanced by the full amount.
-						// Merely seeking on inFile does not work, because there may be concurrent
-						// file operations.
+					if offset == -1 {
+						// We modified the offset of the input file itself during the read
+						// operation. Rewind it.
+						if _, seekErr := inFile.Seek(t, -notWritten, linux.SEEK_CUR); seekErr != nil {
+							// Log the error but don't return it, since the write has already
+							// completed successfully.
+							log.Warningf("failed to roll back input file offset: %v", seekErr)
+						}
+					} else {
+						// The sendfile call was provided an offset parameter that should be
+						// adjusted to reflect the number of bytes sent. Rewind it.
 						offset -= notWritten
 					}
 					break

From 3daaddb90c3d72c6244ef379c1a9a651aa971bef Mon Sep 17 00:00:00 2001
From: Fabricio Voznika <fvoznika@google.com>
Date: Fri, 4 Sep 2020 11:36:41 -0700
Subject: [PATCH 150/211] Simplify FD handling for container start/exec

VFS1 and VFS2 host FDs have different dupping behavior,
making error prone to code for both. Change the contract
so that FDs are released as they are used, so the caller
can simple defer a block that closes all remaining files.
This also addresses handling of partial failures.

With this fix, more VFS2 tests can be enabled.

Updates #1487

PiperOrigin-RevId: 330112266
---
 pkg/fd/fd.go                            | 42 ++++++++---
 pkg/sentry/control/BUILD                |  1 -
 pkg/sentry/control/proc.go              | 28 +++----
 pkg/sentry/fdimport/BUILD               |  1 +
 pkg/sentry/fdimport/fdimport.go         | 22 ++++--
 runsc/boot/BUILD                        |  2 +
 runsc/boot/controller.go                | 12 ++-
 runsc/boot/fs.go                        |  7 +-
 runsc/boot/loader.go                    | 99 +++++++++----------------
 runsc/boot/loader_test.go               |  9 ++-
 runsc/container/multi_container_test.go |  9 +--
 11 files changed, 120 insertions(+), 112 deletions(-)

diff --git a/pkg/fd/fd.go b/pkg/fd/fd.go
index 83bcfe2203..cc6b0cdf11 100644
--- a/pkg/fd/fd.go
+++ b/pkg/fd/fd.go
@@ -49,7 +49,7 @@ func fixCount(n int, err error) (int, error) {
 
 // Read implements io.Reader.
 func (r *ReadWriter) Read(b []byte) (int, error) {
-	c, err := fixCount(syscall.Read(int(atomic.LoadInt64(&r.fd)), b))
+	c, err := fixCount(syscall.Read(r.FD(), b))
 	if c == 0 && len(b) > 0 && err == nil {
 		return 0, io.EOF
 	}
@@ -62,7 +62,7 @@ func (r *ReadWriter) Read(b []byte) (int, error) {
 func (r *ReadWriter) ReadAt(b []byte, off int64) (c int, err error) {
 	for len(b) > 0 {
 		var m int
-		m, err = fixCount(syscall.Pread(int(atomic.LoadInt64(&r.fd)), b, off))
+		m, err = fixCount(syscall.Pread(r.FD(), b, off))
 		if m == 0 && err == nil {
 			return c, io.EOF
 		}
@@ -82,7 +82,7 @@ func (r *ReadWriter) Write(b []byte) (int, error) {
 	var n, remaining int
 	for remaining = len(b); remaining > 0; {
 		woff := len(b) - remaining
-		n, err = syscall.Write(int(atomic.LoadInt64(&r.fd)), b[woff:])
+		n, err = syscall.Write(r.FD(), b[woff:])
 
 		if n > 0 {
 			// syscall.Write wrote some bytes. This is the common case.
@@ -110,7 +110,7 @@ func (r *ReadWriter) Write(b []byte) (int, error) {
 func (r *ReadWriter) WriteAt(b []byte, off int64) (c int, err error) {
 	for len(b) > 0 {
 		var m int
-		m, err = fixCount(syscall.Pwrite(int(atomic.LoadInt64(&r.fd)), b, off))
+		m, err = fixCount(syscall.Pwrite(r.FD(), b, off))
 		if err != nil {
 			break
 		}
@@ -121,6 +121,16 @@ func (r *ReadWriter) WriteAt(b []byte, off int64) (c int, err error) {
 	return
 }
 
+// FD returns the owned file descriptor. Ownership remains unchanged.
+func (r *ReadWriter) FD() int {
+	return int(atomic.LoadInt64(&r.fd))
+}
+
+// String implements Stringer.String().
+func (r *ReadWriter) String() string {
+	return fmt.Sprintf("FD: %d", r.FD())
+}
+
 // FD owns a host file descriptor.
 //
 // It is similar to os.File, with a few important distinctions:
@@ -167,6 +177,23 @@ func NewFromFile(file *os.File) (*FD, error) {
 	return New(fd), nil
 }
 
+// NewFromFiles creates new FDs for each file in the slice.
+func NewFromFiles(files []*os.File) ([]*FD, error) {
+	rv := make([]*FD, 0, len(files))
+	for _, f := range files {
+		new, err := NewFromFile(f)
+		if err != nil {
+			// Cleanup on error.
+			for _, fd := range rv {
+				fd.Close()
+			}
+			return nil, err
+		}
+		rv = append(rv, new)
+	}
+	return rv, nil
+}
+
 // Open is equivalent to open(2).
 func Open(path string, openmode int, perm uint32) (*FD, error) {
 	f, err := syscall.Open(path, openmode|syscall.O_LARGEFILE, perm)
@@ -204,11 +231,6 @@ func (f *FD) Release() int {
 	return int(atomic.SwapInt64(&f.fd, -1))
 }
 
-// FD returns the file descriptor owned by FD. FD retains ownership.
-func (f *FD) FD() int {
-	return int(atomic.LoadInt64(&f.fd))
-}
-
 // File converts the FD to an os.File.
 //
 // FD does not transfer ownership of the file descriptor (it will be
@@ -219,7 +241,7 @@ func (f *FD) FD() int {
 // This operation is somewhat expensive, so care should be taken to minimize
 // its use.
 func (f *FD) File() (*os.File, error) {
-	fd, err := syscall.Dup(int(atomic.LoadInt64(&f.fd)))
+	fd, err := syscall.Dup(f.FD())
 	if err != nil {
 		return nil, err
 	}
diff --git a/pkg/sentry/control/BUILD b/pkg/sentry/control/BUILD
index 2c5d14be5a..deaf5fa23e 100644
--- a/pkg/sentry/control/BUILD
+++ b/pkg/sentry/control/BUILD
@@ -35,7 +35,6 @@ go_library(
         "//pkg/sync",
         "//pkg/tcpip/link/sniffer",
         "//pkg/urpc",
-        "@org_golang_x_sys//unix:go_default_library",
     ],
 )
 
diff --git a/pkg/sentry/control/proc.go b/pkg/sentry/control/proc.go
index dfa9365638..668f478026 100644
--- a/pkg/sentry/control/proc.go
+++ b/pkg/sentry/control/proc.go
@@ -23,8 +23,8 @@ import (
 	"text/tabwriter"
 	"time"
 
-	"golang.org/x/sys/unix"
 	"gvisor.dev/gvisor/pkg/abi/linux"
+	"gvisor.dev/gvisor/pkg/fd"
 	"gvisor.dev/gvisor/pkg/sentry/fdimport"
 	"gvisor.dev/gvisor/pkg/sentry/fs"
 	"gvisor.dev/gvisor/pkg/sentry/fs/host"
@@ -203,27 +203,17 @@ func (proc *Proc) execAsync(args *ExecArgs) (*kernel.ThreadGroup, kernel.ThreadI
 	}
 	initArgs.Filename = resolved
 
-	fds := make([]int, len(args.FilePayload.Files))
-	for i, file := range args.FilePayload.Files {
-		if kernel.VFS2Enabled {
-			// Need to dup to remove ownership from os.File.
-			dup, err := unix.Dup(int(file.Fd()))
-			if err != nil {
-				return nil, 0, nil, nil, fmt.Errorf("duplicating payload files: %w", err)
-			}
-			fds[i] = dup
-		} else {
-			// VFS1 dups the file on import.
-			fds[i] = int(file.Fd())
-		}
+	fds, err := fd.NewFromFiles(args.Files)
+	if err != nil {
+		return nil, 0, nil, nil, fmt.Errorf("duplicating payload files: %w", err)
 	}
+	defer func() {
+		for _, fd := range fds {
+			_ = fd.Close()
+		}
+	}()
 	ttyFile, ttyFileVFS2, err := fdimport.Import(ctx, fdTable, args.StdioIsPty, fds)
 	if err != nil {
-		if kernel.VFS2Enabled {
-			for _, fd := range fds {
-				unix.Close(fd)
-			}
-		}
 		return nil, 0, nil, nil, err
 	}
 
diff --git a/pkg/sentry/fdimport/BUILD b/pkg/sentry/fdimport/BUILD
index 5e41ceb4ec..6b4f8b0ed6 100644
--- a/pkg/sentry/fdimport/BUILD
+++ b/pkg/sentry/fdimport/BUILD
@@ -10,6 +10,7 @@ go_library(
     visibility = ["//pkg/sentry:internal"],
     deps = [
         "//pkg/context",
+        "//pkg/fd",
         "//pkg/sentry/fs",
         "//pkg/sentry/fs/host",
         "//pkg/sentry/fsimpl/host",
diff --git a/pkg/sentry/fdimport/fdimport.go b/pkg/sentry/fdimport/fdimport.go
index 1b7cb94c02..3146614750 100644
--- a/pkg/sentry/fdimport/fdimport.go
+++ b/pkg/sentry/fdimport/fdimport.go
@@ -18,6 +18,7 @@ import (
 	"fmt"
 
 	"gvisor.dev/gvisor/pkg/context"
+	"gvisor.dev/gvisor/pkg/fd"
 	"gvisor.dev/gvisor/pkg/sentry/fs"
 	"gvisor.dev/gvisor/pkg/sentry/fs/host"
 	hostvfs2 "gvisor.dev/gvisor/pkg/sentry/fsimpl/host"
@@ -27,8 +28,9 @@ import (
 
 // Import imports a slice of FDs into the given FDTable. If console is true,
 // sets up TTY for the first 3 FDs in the slice representing stdin, stdout,
-// stderr. Upon success, Import takes ownership of all FDs.
-func Import(ctx context.Context, fdTable *kernel.FDTable, console bool, fds []int) (*host.TTYFileOperations, *hostvfs2.TTYFileDescription, error) {
+// stderr. Used FDs are either closed or released. It's safe for the caller to
+// close any remaining files upon return.
+func Import(ctx context.Context, fdTable *kernel.FDTable, console bool, fds []*fd.FD) (*host.TTYFileOperations, *hostvfs2.TTYFileDescription, error) {
 	if kernel.VFS2Enabled {
 		ttyFile, err := importVFS2(ctx, fdTable, console, fds)
 		return nil, ttyFile, err
@@ -37,7 +39,7 @@ func Import(ctx context.Context, fdTable *kernel.FDTable, console bool, fds []in
 	return ttyFile, nil, err
 }
 
-func importFS(ctx context.Context, fdTable *kernel.FDTable, console bool, fds []int) (*host.TTYFileOperations, error) {
+func importFS(ctx context.Context, fdTable *kernel.FDTable, console bool, fds []*fd.FD) (*host.TTYFileOperations, error) {
 	var ttyFile *fs.File
 	for appFD, hostFD := range fds {
 		var appFile *fs.File
@@ -46,11 +48,12 @@ func importFS(ctx context.Context, fdTable *kernel.FDTable, console bool, fds []
 			// Import the file as a host TTY file.
 			if ttyFile == nil {
 				var err error
-				appFile, err = host.ImportFile(ctx, hostFD, true /* isTTY */)
+				appFile, err = host.ImportFile(ctx, hostFD.FD(), true /* isTTY */)
 				if err != nil {
 					return nil, err
 				}
 				defer appFile.DecRef(ctx)
+				_ = hostFD.Close() // FD is dup'd i ImportFile.
 
 				// Remember this in the TTY file, as we will
 				// use it for the other stdio FDs.
@@ -65,11 +68,12 @@ func importFS(ctx context.Context, fdTable *kernel.FDTable, console bool, fds []
 		} else {
 			// Import the file as a regular host file.
 			var err error
-			appFile, err = host.ImportFile(ctx, hostFD, false /* isTTY */)
+			appFile, err = host.ImportFile(ctx, hostFD.FD(), false /* isTTY */)
 			if err != nil {
 				return nil, err
 			}
 			defer appFile.DecRef(ctx)
+			_ = hostFD.Close() // FD is dup'd i ImportFile.
 		}
 
 		// Add the file to the FD map.
@@ -84,7 +88,7 @@ func importFS(ctx context.Context, fdTable *kernel.FDTable, console bool, fds []
 	return ttyFile.FileOperations.(*host.TTYFileOperations), nil
 }
 
-func importVFS2(ctx context.Context, fdTable *kernel.FDTable, console bool, stdioFDs []int) (*hostvfs2.TTYFileDescription, error) {
+func importVFS2(ctx context.Context, fdTable *kernel.FDTable, console bool, stdioFDs []*fd.FD) (*hostvfs2.TTYFileDescription, error) {
 	k := kernel.KernelFromContext(ctx)
 	if k == nil {
 		return nil, fmt.Errorf("cannot find kernel from context")
@@ -98,11 +102,12 @@ func importVFS2(ctx context.Context, fdTable *kernel.FDTable, console bool, stdi
 			// Import the file as a host TTY file.
 			if ttyFile == nil {
 				var err error
-				appFile, err = hostvfs2.ImportFD(ctx, k.HostMount(), hostFD, true /* isTTY */)
+				appFile, err = hostvfs2.ImportFD(ctx, k.HostMount(), hostFD.FD(), true /* isTTY */)
 				if err != nil {
 					return nil, err
 				}
 				defer appFile.DecRef(ctx)
+				hostFD.Release() // FD is transfered to host FD.
 
 				// Remember this in the TTY file, as we will use it for the other stdio
 				// FDs.
@@ -115,11 +120,12 @@ func importVFS2(ctx context.Context, fdTable *kernel.FDTable, console bool, stdi
 			}
 		} else {
 			var err error
-			appFile, err = hostvfs2.ImportFD(ctx, k.HostMount(), hostFD, false /* isTTY */)
+			appFile, err = hostvfs2.ImportFD(ctx, k.HostMount(), hostFD.FD(), false /* isTTY */)
 			if err != nil {
 				return nil, err
 			}
 			defer appFile.DecRef(ctx)
+			hostFD.Release() // FD is transfered to host FD.
 		}
 
 		if err := fdTable.NewFDAtVFS2(ctx, int32(appFD), appFile, kernel.FDFlags{}); err != nil {
diff --git a/runsc/boot/BUILD b/runsc/boot/BUILD
index 040f6a72d3..704c66742f 100644
--- a/runsc/boot/BUILD
+++ b/runsc/boot/BUILD
@@ -30,6 +30,7 @@ go_library(
         "//pkg/control/server",
         "//pkg/cpuid",
         "//pkg/eventchannel",
+        "//pkg/fd",
         "//pkg/fspath",
         "//pkg/log",
         "//pkg/memutil",
@@ -123,6 +124,7 @@ go_test(
     library = ":boot",
     deps = [
         "//pkg/control/server",
+        "//pkg/fd",
         "//pkg/fspath",
         "//pkg/log",
         "//pkg/p9",
diff --git a/runsc/boot/controller.go b/runsc/boot/controller.go
index 68a2b45cfb..8946515196 100644
--- a/runsc/boot/controller.go
+++ b/runsc/boot/controller.go
@@ -22,6 +22,7 @@ import (
 
 	specs "github.com/opencontainers/runtime-spec/specs-go"
 	"gvisor.dev/gvisor/pkg/control/server"
+	"gvisor.dev/gvisor/pkg/fd"
 	"gvisor.dev/gvisor/pkg/log"
 	"gvisor.dev/gvisor/pkg/sentry/control"
 	"gvisor.dev/gvisor/pkg/sentry/fs"
@@ -257,13 +258,20 @@ func (cm *containerManager) Start(args *StartArgs, _ *struct{}) error {
 	// All validation passed, logs the spec for debugging.
 	specutils.LogSpec(args.Spec)
 
-	err := cm.l.startContainer(args.Spec, args.Conf, args.CID, args.FilePayload.Files)
+	fds, err := fd.NewFromFiles(args.FilePayload.Files)
 	if err != nil {
+		return err
+	}
+	defer func() {
+		for _, fd := range fds {
+			_ = fd.Close()
+		}
+	}()
+	if err := cm.l.startContainer(args.Spec, args.Conf, args.CID, fds); err != nil {
 		log.Debugf("containerManager.Start failed %q: %+v: %v", args.CID, args, err)
 		return err
 	}
 	log.Debugf("Container %q started", args.CID)
-
 	return nil
 }
 
diff --git a/runsc/boot/fs.go b/runsc/boot/fs.go
index ea0461a3dd..e2c5f5fb1d 100644
--- a/runsc/boot/fs.go
+++ b/runsc/boot/fs.go
@@ -34,6 +34,7 @@ import (
 	specs "github.com/opencontainers/runtime-spec/specs-go"
 	"gvisor.dev/gvisor/pkg/abi/linux"
 	"gvisor.dev/gvisor/pkg/context"
+	"gvisor.dev/gvisor/pkg/fd"
 	"gvisor.dev/gvisor/pkg/log"
 	"gvisor.dev/gvisor/pkg/sentry/fs"
 	"gvisor.dev/gvisor/pkg/sentry/fs/gofer"
@@ -320,14 +321,14 @@ func adjustDirentCache(k *kernel.Kernel) error {
 }
 
 type fdDispenser struct {
-	fds []int
+	fds []*fd.FD
 }
 
 func (f *fdDispenser) remove() int {
 	if f.empty() {
 		panic("fdDispenser out of fds")
 	}
-	rv := f.fds[0]
+	rv := f.fds[0].Release()
 	f.fds = f.fds[1:]
 	return rv
 }
@@ -564,7 +565,7 @@ type containerMounter struct {
 	hints *podMountHints
 }
 
-func newContainerMounter(spec *specs.Spec, goferFDs []int, k *kernel.Kernel, hints *podMountHints) *containerMounter {
+func newContainerMounter(spec *specs.Spec, goferFDs []*fd.FD, k *kernel.Kernel, hints *podMountHints) *containerMounter {
 	return &containerMounter{
 		root:   spec.Root,
 		mounts: compileMounts(spec),
diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go
index 882cf270b3..246ae3c3eb 100644
--- a/runsc/boot/loader.go
+++ b/runsc/boot/loader.go
@@ -29,6 +29,7 @@ import (
 	"gvisor.dev/gvisor/pkg/abi/linux"
 	"gvisor.dev/gvisor/pkg/context"
 	"gvisor.dev/gvisor/pkg/cpuid"
+	"gvisor.dev/gvisor/pkg/fd"
 	"gvisor.dev/gvisor/pkg/log"
 	"gvisor.dev/gvisor/pkg/memutil"
 	"gvisor.dev/gvisor/pkg/rand"
@@ -89,10 +90,10 @@ type containerInfo struct {
 	procArgs kernel.CreateProcessArgs
 
 	// stdioFDs contains stdin, stdout, and stderr.
-	stdioFDs []int
+	stdioFDs []*fd.FD
 
 	// goferFDs are the FDs that attach the sandbox to the gofers.
-	goferFDs []int
+	goferFDs []*fd.FD
 }
 
 // Loader keeps state needed to start the kernel and run the container..
@@ -356,12 +357,17 @@ func New(args Args) (*Loader, error) {
 		k.SetHostMount(hostMount)
 	}
 
+	info := containerInfo{
+		conf:     args.Conf,
+		spec:     args.Spec,
+		procArgs: procArgs,
+	}
+
 	// Make host FDs stable between invocations. Host FDs must map to the exact
 	// same number when the sandbox is restored. Otherwise the wrong FD will be
 	// used.
-	var stdioFDs []int
 	newfd := startingStdioFD
-	for _, fd := range args.StdioFDs {
+	for _, stdioFD := range args.StdioFDs {
 		// Check that newfd is unused to avoid clobbering over it.
 		if _, err := unix.FcntlInt(uintptr(newfd), unix.F_GETFD, 0); !errors.Is(err, unix.EBADF) {
 			if err != nil {
@@ -370,14 +376,17 @@ func New(args Args) (*Loader, error) {
 			return nil, fmt.Errorf("unable to remap stdios, FD %d is already in use", newfd)
 		}
 
-		err := unix.Dup3(fd, newfd, unix.O_CLOEXEC)
+		err := unix.Dup3(stdioFD, newfd, unix.O_CLOEXEC)
 		if err != nil {
-			return nil, fmt.Errorf("dup3 of stdioFDs failed: %v", err)
+			return nil, fmt.Errorf("dup3 of stdios failed: %w", err)
 		}
-		stdioFDs = append(stdioFDs, newfd)
-		_ = unix.Close(fd)
+		info.stdioFDs = append(info.stdioFDs, fd.New(newfd))
+		_ = unix.Close(stdioFD)
 		newfd++
 	}
+	for _, goferFD := range args.GoferFDs {
+		info.goferFDs = append(info.goferFDs, fd.New(goferFD))
+	}
 
 	eid := execID{cid: args.ID}
 	l := &Loader{
@@ -386,13 +395,7 @@ func New(args Args) (*Loader, error) {
 		sandboxID:  args.ID,
 		processes:  map[execID]*execProcess{eid: {}},
 		mountHints: mountHints,
-		root: containerInfo{
-			conf:     args.Conf,
-			stdioFDs: stdioFDs,
-			goferFDs: args.GoferFDs,
-			spec:     args.Spec,
-			procArgs: procArgs,
-		},
+		root:       info,
 	}
 
 	// We don't care about child signals; some platforms can generate a
@@ -466,9 +469,14 @@ func (l *Loader) Destroy() {
 	}
 	l.watchdog.Stop()
 
-	for i, fd := range l.root.stdioFDs {
-		_ = unix.Close(fd)
-		l.root.stdioFDs[i] = -1
+	// In the success case, stdioFDs and goferFDs will only contain
+	// released/closed FDs that ownership has been passed over to host FDs and
+	// gofer sessions. Close them here in case on failure.
+	for _, fd := range l.root.stdioFDs {
+		_ = fd.Close()
+	}
+	for _, fd := range l.root.goferFDs {
+		_ = fd.Close()
 	}
 }
 
@@ -598,17 +606,6 @@ func (l *Loader) run() error {
 		}
 	})
 
-	// l.stdioFDs are derived from dup() in boot.New() and they are now dup()ed again
-	// either in createFDTable() during initial start or in descriptor.initAfterLoad()
-	// during restore, we can release l.stdioFDs now. VFS2 takes ownership of the
-	// passed FDs, so only close for VFS1.
-	if !kernel.VFS2Enabled {
-		for i, fd := range l.root.stdioFDs {
-			_ = unix.Close(fd)
-			l.root.stdioFDs[i] = -1
-		}
-	}
-
 	log.Infof("Process should have started...")
 	l.watchdog.Start()
 	return l.k.Start()
@@ -628,9 +625,9 @@ func (l *Loader) createContainer(cid string) error {
 }
 
 // startContainer starts a child container. It returns the thread group ID of
-// the newly created process. Caller owns 'files' and may close them after
-// this method returns.
-func (l *Loader) startContainer(spec *specs.Spec, conf *config.Config, cid string, files []*os.File) error {
+// the newly created process. Used FDs are either closed or released. It's safe
+// for the caller to close any remaining files upon return.
+func (l *Loader) startContainer(spec *specs.Spec, conf *config.Config, cid string, files []*fd.FD) error {
 	// Create capabilities.
 	caps, err := specutils.Capabilities(conf.EnableRaw, spec.Process.Capabilities)
 	if err != nil {
@@ -681,37 +678,15 @@ func (l *Loader) startContainer(spec *specs.Spec, conf *config.Config, cid strin
 	}
 
 	info := &containerInfo{
-		conf: conf,
-		spec: spec,
+		conf:     conf,
+		spec:     spec,
+		stdioFDs: files[:3],
+		goferFDs: files[3:],
 	}
 	info.procArgs, err = createProcessArgs(cid, spec, creds, l.k, pidns)
 	if err != nil {
 		return fmt.Errorf("creating new process: %v", err)
 	}
-
-	// VFS1 dups stdioFDs, so we don't need to dup them here. VFS2 takes
-	// ownership of the passed FDs, and we need to dup them here.
-	for _, f := range files[:3] {
-		if !kernel.VFS2Enabled {
-			info.stdioFDs = append(info.stdioFDs, int(f.Fd()))
-		} else {
-			fd, err := unix.Dup(int(f.Fd()))
-			if err != nil {
-				return fmt.Errorf("failed to dup file: %v", err)
-			}
-			info.stdioFDs = append(info.stdioFDs, fd)
-		}
-	}
-
-	// Can't take ownership away from os.File. dup them to get a new FDs.
-	for _, f := range files[3:] {
-		fd, err := unix.Dup(int(f.Fd()))
-		if err != nil {
-			return fmt.Errorf("failed to dup file: %v", err)
-		}
-		info.goferFDs = append(info.goferFDs, fd)
-	}
-
 	tg, err := l.createContainerProcess(false, cid, info, ep)
 	if err != nil {
 		return err
@@ -795,13 +770,13 @@ func (l *Loader) createContainerProcess(root bool, cid string, info *containerIn
 // startGoferMonitor runs a goroutine to monitor gofer's health. It polls on
 // the gofer FDs looking for disconnects, and destroys the container if a
 // disconnect occurs in any of the gofer FDs.
-func (l *Loader) startGoferMonitor(cid string, goferFDs []int) {
+func (l *Loader) startGoferMonitor(cid string, goferFDs []*fd.FD) {
 	go func() {
 		log.Debugf("Monitoring gofer health for container %q", cid)
 		var events []unix.PollFd
-		for _, fd := range goferFDs {
+		for _, goferFD := range goferFDs {
 			events = append(events, unix.PollFd{
-				Fd:     int32(fd),
+				Fd:     int32(goferFD.FD()),
 				Events: unix.POLLHUP | unix.POLLRDHUP,
 			})
 		}
@@ -1281,7 +1256,7 @@ func (l *Loader) ttyFromIDLocked(key execID) (*host.TTYFileOperations, *hostvfs2
 	return ep.tty, ep.ttyVFS2, nil
 }
 
-func createFDTable(ctx context.Context, console bool, stdioFDs []int) (*kernel.FDTable, *host.TTYFileOperations, *hostvfs2.TTYFileDescription, error) {
+func createFDTable(ctx context.Context, console bool, stdioFDs []*fd.FD) (*kernel.FDTable, *host.TTYFileOperations, *hostvfs2.TTYFileDescription, error) {
 	if len(stdioFDs) != 3 {
 		return nil, nil, nil, fmt.Errorf("stdioFDs should contain exactly 3 FDs (stdin, stdout, and stderr), but %d FDs received", len(stdioFDs))
 	}
diff --git a/runsc/boot/loader_test.go b/runsc/boot/loader_test.go
index 2343ce76cc..dc9861389c 100644
--- a/runsc/boot/loader_test.go
+++ b/runsc/boot/loader_test.go
@@ -26,6 +26,7 @@ import (
 	specs "github.com/opencontainers/runtime-spec/specs-go"
 	"golang.org/x/sys/unix"
 	"gvisor.dev/gvisor/pkg/control/server"
+	"gvisor.dev/gvisor/pkg/fd"
 	"gvisor.dev/gvisor/pkg/fspath"
 	"gvisor.dev/gvisor/pkg/log"
 	"gvisor.dev/gvisor/pkg/p9"
@@ -444,7 +445,7 @@ func TestCreateMountNamespace(t *testing.T) {
 			}
 			defer cleanup()
 
-			mntr := newContainerMounter(&tc.spec, []int{sandEnd}, nil, &podMountHints{})
+			mntr := newContainerMounter(&tc.spec, []*fd.FD{fd.New(sandEnd)}, nil, &podMountHints{})
 			mns, err := mntr.createMountNamespace(ctx, conf)
 			if err != nil {
 				t.Fatalf("failed to create mount namespace: %v", err)
@@ -702,7 +703,11 @@ func TestRestoreEnvironment(t *testing.T) {
 	for _, tc := range testCases {
 		t.Run(tc.name, func(t *testing.T) {
 			conf := testConfig()
-			mntr := newContainerMounter(tc.spec, tc.ioFDs, nil, &podMountHints{})
+			var ioFDs []*fd.FD
+			for _, ioFD := range tc.ioFDs {
+				ioFDs = append(ioFDs, fd.New(ioFD))
+			}
+			mntr := newContainerMounter(tc.spec, ioFDs, nil, &podMountHints{})
 			actualRenv, err := mntr.createRestoreEnvironment(conf)
 			if !tc.errorExpected && err != nil {
 				t.Fatalf("could not create restore environment for test:%s", tc.name)
diff --git a/runsc/container/multi_container_test.go b/runsc/container/multi_container_test.go
index da1694280e..5b790c6c8d 100644
--- a/runsc/container/multi_container_test.go
+++ b/runsc/container/multi_container_test.go
@@ -169,7 +169,7 @@ func TestMultiContainerSanity(t *testing.T) {
 // TestMultiPIDNS checks that it is possible to run 2 dead-simple
 // containers in the same sandbox with different pidns.
 func TestMultiPIDNS(t *testing.T) {
-	for name, conf := range configs(t, all...) {
+	for name, conf := range configsWithVFS2(t, all...) {
 		t.Run(name, func(t *testing.T) {
 			rootDir, cleanup, err := testutil.SetupRootDir()
 			if err != nil {
@@ -214,7 +214,7 @@ func TestMultiPIDNS(t *testing.T) {
 
 // TestMultiPIDNSPath checks the pidns path.
 func TestMultiPIDNSPath(t *testing.T) {
-	for name, conf := range configs(t, all...) {
+	for name, conf := range configsWithVFS2(t, all...) {
 		t.Run(name, func(t *testing.T) {
 			rootDir, cleanup, err := testutil.SetupRootDir()
 			if err != nil {
@@ -580,7 +580,7 @@ func TestMultiContainerDestroy(t *testing.T) {
 		t.Fatal("error finding test_app:", err)
 	}
 
-	for name, conf := range configs(t, all...) {
+	for name, conf := range configsWithVFS2(t, all...) {
 		t.Run(name, func(t *testing.T) {
 			rootDir, cleanup, err := testutil.SetupRootDir()
 			if err != nil {
@@ -1252,8 +1252,7 @@ func TestMultiContainerSharedMountReadonly(t *testing.T) {
 
 // Test that shared pod mounts continue to work after container is restarted.
 func TestMultiContainerSharedMountRestart(t *testing.T) {
-	//TODO(gvisor.dev/issue/1487): This is failing with VFS2.
-	for name, conf := range configs(t, all...) {
+	for name, conf := range configsWithVFS2(t, all...) {
 		t.Run(name, func(t *testing.T) {
 			rootDir, cleanup, err := testutil.SetupRootDir()
 			if err != nil {

From 9804a9d401e7f0956540aaa87e8e2892e6e00562 Mon Sep 17 00:00:00 2001
From: Ayush Ranjan <ayushranjan@google.com>
Date: Mon, 7 Sep 2020 21:16:22 -0700
Subject: [PATCH 151/211] Fix make_apt script.

This change makes the following fixes:
- When creating a test repo.key, create a secret keyring as other workflows
  also use secret keyrings only.
- We should not be using both --keyring and --secret-keyring options. Just use
  --secret-keyring.
- Pass homedir to all gpg commands. dpkg-sig takes an arg -g which stands for
  gpgopts. So we need to pass the homedir there too.

PiperOrigin-RevId: 330443280
---
 Makefile          |  4 ++--
 tools/make_apt.sh | 12 +++++++-----
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/Makefile b/Makefile
index 43a243c906..d9e3206b44 100644
--- a/Makefile
+++ b/Makefile
@@ -294,8 +294,8 @@ $(RELEASE_KEY):
 	echo Name-Email: test@example.com >> $$C && \
 	echo Expire-Date: 0 >> $$C && \
 	echo %commit >> $$C && \
-	gpg --batch $(GPG_TEST_OPTIONS) --passphrase '' --no-default-keyring --keyring $$T --no-tty --gen-key $$C && \
-	gpg --batch $(GPG_TEST_OPTIONS) --export-secret-keys --no-default-keyring --keyring $$T --secret-keyring $$T > $@; \
+	gpg --batch $(GPG_TEST_OPTIONS) --passphrase '' --no-default-keyring --secret-keyring $$T --no-tty --gen-key $$C && \
+	gpg --batch $(GPG_TEST_OPTIONS) --export-secret-keys --no-default-keyring --secret-keyring $$T > $@; \
 	rc=$$?; rm -f $$T $$C; exit $$rc
 
 release: $(RELEASE_KEY) ## Builds a release.
diff --git a/tools/make_apt.sh b/tools/make_apt.sh
index fdc5e91924..13c5edd76a 100755
--- a/tools/make_apt.sh
+++ b/tools/make_apt.sh
@@ -58,6 +58,7 @@ mkdir -p "${release}"
 # using the same key. This is a limitation in GnuPG pre-2.1.
 declare -r keyring=$(mktemp /tmp/keyringXXXXXX.gpg)
 declare -r homedir=$(mktemp -d /tmp/homedirXXXXXX)
+declare -r gpg_opts=("--no-default-keyring" "--secret-keyring" "${keyring}" "--homedir" "${homedir}")
 cleanup() {
   rm -rf "${keyring}" "${homedir}"
 }
@@ -67,8 +68,8 @@ trap cleanup EXIT
 # is not found. This isn't actually a failure for us, because we don't require
 # the public key (this may be stored separately). The second import will succeed
 # because, in reality, the first import succeeded and it's a no-op.
-gpg --no-default-keyring --keyring "${keyring}" --homedir "${homedir}" --import "${private_key}" || \
-  gpg --no-default-keyring --keyring "${keyring}" --homedir "${homedir}" --import "${private_key}"
+gpg "${gpg_opts[@]}" --import "${private_key}" || \
+  gpg "${gpg_opts[@]}" --import "${private_key}"
 
 # Copy the packages into the root.
 for pkg in "$@"; do
@@ -103,7 +104,8 @@ for pkg in "$@"; do
   cp -a "${pkg}" "${target}"
   chmod 0644 "${target}"
   if [[ "${ext}" == "deb" ]]; then
-    dpkg-sig -g "--no-default-keyring --keyring ${keyring}" --sign builder "${target}"
+    # We use [*] here to expand the gpg_opts array into a single shell-word.
+    dpkg-sig -g "${gpg_opts[*]}" --sign builder "${target}"
   fi
 done
 
@@ -138,5 +140,5 @@ rm "${release}"/apt.conf
 # Sign the release.
 declare -r digest_opts=("--digest-algo" "SHA512" "--cert-digest-algo" "SHA512")
 (cd "${release}" && rm -f Release.gpg InRelease)
-(cd "${release}" && gpg --no-default-keyring --keyring "${keyring}" --clearsign "${digest_opts[@]}" -o InRelease Release)
-(cd "${release}" && gpg --no-default-keyring --keyring "${keyring}" -abs "${digest_opts[@]}" -o Release.gpg Release)
+(cd "${release}" && gpg "${gpg_opts[@]}" --clearsign "${digest_opts[@]}" -o InRelease Release)
+(cd "${release}" && gpg "${gpg_opts[@]}" -abs "${digest_opts[@]}" -o Release.gpg Release)

From 531340c7ba4da9bc4773dd7db77b62522c45aa20 Mon Sep 17 00:00:00 2001
From: Ayush Ranjan <ayushranjan@google.com>
Date: Tue, 8 Sep 2020 11:49:51 -0700
Subject: [PATCH 152/211] [vfs] Capitalize x in the {Get/Set/Remove/List}xattr
 functions.

PiperOrigin-RevId: 330554450
---
 pkg/sentry/fs/inode.go                       |  2 +-
 pkg/sentry/fs/inode_overlay.go               |  2 +-
 pkg/sentry/fsimpl/ext/filesystem.go          | 16 +++----
 pkg/sentry/fsimpl/gofer/filesystem.go        | 24 +++++-----
 pkg/sentry/fsimpl/gofer/gofer.go             | 32 ++++++-------
 pkg/sentry/fsimpl/kernfs/filesystem.go       | 16 +++----
 pkg/sentry/fsimpl/overlay/copy_up.go         | 12 ++---
 pkg/sentry/fsimpl/overlay/filesystem.go      | 36 +++++++-------
 pkg/sentry/fsimpl/overlay/overlay.go         | 16 +++----
 pkg/sentry/fsimpl/tmpfs/filesystem.go        | 24 +++++-----
 pkg/sentry/fsimpl/tmpfs/tmpfs.go             | 40 ++++++++--------
 pkg/sentry/fsimpl/verity/filesystem.go       | 26 +++++-----
 pkg/sentry/syscalls/linux/vfs2/vfs2.go       | 16 +++----
 pkg/sentry/syscalls/linux/vfs2/xattr.go      | 32 ++++++-------
 pkg/sentry/vfs/anonfs.go                     | 16 +++----
 pkg/sentry/vfs/file_description.go           | 50 ++++++++++----------
 pkg/sentry/vfs/file_description_impl_util.go | 18 +++----
 pkg/sentry/vfs/filesystem.go                 | 24 +++++-----
 pkg/sentry/vfs/memxattr/xattr.go             | 16 +++----
 pkg/sentry/vfs/options.go                    | 16 +++----
 pkg/sentry/vfs/vfs.go                        | 24 +++++-----
 test/syscalls/linux/xattr.cc                 | 50 ++++++++++----------
 22 files changed, 254 insertions(+), 254 deletions(-)

diff --git a/pkg/sentry/fs/inode.go b/pkg/sentry/fs/inode.go
index b79cd9877e..0049104532 100644
--- a/pkg/sentry/fs/inode.go
+++ b/pkg/sentry/fs/inode.go
@@ -270,7 +270,7 @@ func (i *Inode) GetXattr(ctx context.Context, name string, size uint64) (string,
 // SetXattr calls i.InodeOperations.SetXattr with i as the Inode.
 func (i *Inode) SetXattr(ctx context.Context, d *Dirent, name, value string, flags uint32) error {
 	if i.overlay != nil {
-		return overlaySetxattr(ctx, i.overlay, d, name, value, flags)
+		return overlaySetXattr(ctx, i.overlay, d, name, value, flags)
 	}
 	return i.InodeOperations.SetXattr(ctx, i, name, value, flags)
 }
diff --git a/pkg/sentry/fs/inode_overlay.go b/pkg/sentry/fs/inode_overlay.go
index 0a2d64e3a6..b16ab08ba9 100644
--- a/pkg/sentry/fs/inode_overlay.go
+++ b/pkg/sentry/fs/inode_overlay.go
@@ -552,7 +552,7 @@ func overlayGetXattr(ctx context.Context, o *overlayEntry, name string, size uin
 	return s, err
 }
 
-func overlaySetxattr(ctx context.Context, o *overlayEntry, d *Dirent, name, value string, flags uint32) error {
+func overlaySetXattr(ctx context.Context, o *overlayEntry, d *Dirent, name, value string, flags uint32) error {
 	// Don't allow changes to overlay xattrs through a setxattr syscall.
 	if isXattrOverlay(name) {
 		return syserror.EPERM
diff --git a/pkg/sentry/fsimpl/ext/filesystem.go b/pkg/sentry/fsimpl/ext/filesystem.go
index 8565d1a664..075de0e228 100644
--- a/pkg/sentry/fsimpl/ext/filesystem.go
+++ b/pkg/sentry/fsimpl/ext/filesystem.go
@@ -504,8 +504,8 @@ func (fs *filesystem) BoundEndpointAt(ctx context.Context, rp *vfs.ResolvingPath
 	return nil, syserror.ECONNREFUSED
 }
 
-// ListxattrAt implements vfs.FilesystemImpl.ListxattrAt.
-func (fs *filesystem) ListxattrAt(ctx context.Context, rp *vfs.ResolvingPath, size uint64) ([]string, error) {
+// ListXattrAt implements vfs.FilesystemImpl.ListXattrAt.
+func (fs *filesystem) ListXattrAt(ctx context.Context, rp *vfs.ResolvingPath, size uint64) ([]string, error) {
 	_, _, err := fs.walk(ctx, rp, false)
 	if err != nil {
 		return nil, err
@@ -513,8 +513,8 @@ func (fs *filesystem) ListxattrAt(ctx context.Context, rp *vfs.ResolvingPath, si
 	return nil, syserror.ENOTSUP
 }
 
-// GetxattrAt implements vfs.FilesystemImpl.GetxattrAt.
-func (fs *filesystem) GetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.GetxattrOptions) (string, error) {
+// GetXattrAt implements vfs.FilesystemImpl.GetXattrAt.
+func (fs *filesystem) GetXattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.GetXattrOptions) (string, error) {
 	_, _, err := fs.walk(ctx, rp, false)
 	if err != nil {
 		return "", err
@@ -522,8 +522,8 @@ func (fs *filesystem) GetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, opt
 	return "", syserror.ENOTSUP
 }
 
-// SetxattrAt implements vfs.FilesystemImpl.SetxattrAt.
-func (fs *filesystem) SetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetxattrOptions) error {
+// SetXattrAt implements vfs.FilesystemImpl.SetXattrAt.
+func (fs *filesystem) SetXattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetXattrOptions) error {
 	_, _, err := fs.walk(ctx, rp, false)
 	if err != nil {
 		return err
@@ -531,8 +531,8 @@ func (fs *filesystem) SetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, opt
 	return syserror.ENOTSUP
 }
 
-// RemovexattrAt implements vfs.FilesystemImpl.RemovexattrAt.
-func (fs *filesystem) RemovexattrAt(ctx context.Context, rp *vfs.ResolvingPath, name string) error {
+// RemoveXattrAt implements vfs.FilesystemImpl.RemoveXattrAt.
+func (fs *filesystem) RemoveXattrAt(ctx context.Context, rp *vfs.ResolvingPath, name string) error {
 	_, _, err := fs.walk(ctx, rp, false)
 	if err != nil {
 		return err
diff --git a/pkg/sentry/fsimpl/gofer/filesystem.go b/pkg/sentry/fsimpl/gofer/filesystem.go
index 5d0f487db6..b01121f9e9 100644
--- a/pkg/sentry/fsimpl/gofer/filesystem.go
+++ b/pkg/sentry/fsimpl/gofer/filesystem.go
@@ -1519,8 +1519,8 @@ func (fs *filesystem) BoundEndpointAt(ctx context.Context, rp *vfs.ResolvingPath
 	return nil, syserror.ECONNREFUSED
 }
 
-// ListxattrAt implements vfs.FilesystemImpl.ListxattrAt.
-func (fs *filesystem) ListxattrAt(ctx context.Context, rp *vfs.ResolvingPath, size uint64) ([]string, error) {
+// ListXattrAt implements vfs.FilesystemImpl.ListXattrAt.
+func (fs *filesystem) ListXattrAt(ctx context.Context, rp *vfs.ResolvingPath, size uint64) ([]string, error) {
 	var ds *[]*dentry
 	fs.renameMu.RLock()
 	defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
@@ -1528,11 +1528,11 @@ func (fs *filesystem) ListxattrAt(ctx context.Context, rp *vfs.ResolvingPath, si
 	if err != nil {
 		return nil, err
 	}
-	return d.listxattr(ctx, rp.Credentials(), size)
+	return d.listXattr(ctx, rp.Credentials(), size)
 }
 
-// GetxattrAt implements vfs.FilesystemImpl.GetxattrAt.
-func (fs *filesystem) GetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.GetxattrOptions) (string, error) {
+// GetXattrAt implements vfs.FilesystemImpl.GetXattrAt.
+func (fs *filesystem) GetXattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.GetXattrOptions) (string, error) {
 	var ds *[]*dentry
 	fs.renameMu.RLock()
 	defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
@@ -1540,11 +1540,11 @@ func (fs *filesystem) GetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, opt
 	if err != nil {
 		return "", err
 	}
-	return d.getxattr(ctx, rp.Credentials(), &opts)
+	return d.getXattr(ctx, rp.Credentials(), &opts)
 }
 
-// SetxattrAt implements vfs.FilesystemImpl.SetxattrAt.
-func (fs *filesystem) SetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetxattrOptions) error {
+// SetXattrAt implements vfs.FilesystemImpl.SetXattrAt.
+func (fs *filesystem) SetXattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetXattrOptions) error {
 	var ds *[]*dentry
 	fs.renameMu.RLock()
 	d, err := fs.resolveLocked(ctx, rp, &ds)
@@ -1552,7 +1552,7 @@ func (fs *filesystem) SetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, opt
 		fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
 		return err
 	}
-	if err := d.setxattr(ctx, rp.Credentials(), &opts); err != nil {
+	if err := d.setXattr(ctx, rp.Credentials(), &opts); err != nil {
 		fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
 		return err
 	}
@@ -1562,8 +1562,8 @@ func (fs *filesystem) SetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, opt
 	return nil
 }
 
-// RemovexattrAt implements vfs.FilesystemImpl.RemovexattrAt.
-func (fs *filesystem) RemovexattrAt(ctx context.Context, rp *vfs.ResolvingPath, name string) error {
+// RemoveXattrAt implements vfs.FilesystemImpl.RemoveXattrAt.
+func (fs *filesystem) RemoveXattrAt(ctx context.Context, rp *vfs.ResolvingPath, name string) error {
 	var ds *[]*dentry
 	fs.renameMu.RLock()
 	d, err := fs.resolveLocked(ctx, rp, &ds)
@@ -1571,7 +1571,7 @@ func (fs *filesystem) RemovexattrAt(ctx context.Context, rp *vfs.ResolvingPath,
 		fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
 		return err
 	}
-	if err := d.removexattr(ctx, rp.Credentials(), name); err != nil {
+	if err := d.removeXattr(ctx, rp.Credentials(), name); err != nil {
 		fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
 		return err
 	}
diff --git a/pkg/sentry/fsimpl/gofer/gofer.go b/pkg/sentry/fsimpl/gofer/gofer.go
index 78b07f1b3d..fa4e19113c 100644
--- a/pkg/sentry/fsimpl/gofer/gofer.go
+++ b/pkg/sentry/fsimpl/gofer/gofer.go
@@ -1372,7 +1372,7 @@ func (d *dentry) setDeleted() {
 	atomic.StoreUint32(&d.deleted, 1)
 }
 
-func (d *dentry) listxattr(ctx context.Context, creds *auth.Credentials, size uint64) ([]string, error) {
+func (d *dentry) listXattr(ctx context.Context, creds *auth.Credentials, size uint64) ([]string, error) {
 	if d.file.isNil() || !d.userXattrSupported() {
 		return nil, nil
 	}
@@ -1390,7 +1390,7 @@ func (d *dentry) listxattr(ctx context.Context, creds *auth.Credentials, size ui
 	return xattrs, nil
 }
 
-func (d *dentry) getxattr(ctx context.Context, creds *auth.Credentials, opts *vfs.GetxattrOptions) (string, error) {
+func (d *dentry) getXattr(ctx context.Context, creds *auth.Credentials, opts *vfs.GetXattrOptions) (string, error) {
 	if d.file.isNil() {
 		return "", syserror.ENODATA
 	}
@@ -1400,7 +1400,7 @@ func (d *dentry) getxattr(ctx context.Context, creds *auth.Credentials, opts *vf
 	return d.file.getXattr(ctx, opts.Name, opts.Size)
 }
 
-func (d *dentry) setxattr(ctx context.Context, creds *auth.Credentials, opts *vfs.SetxattrOptions) error {
+func (d *dentry) setXattr(ctx context.Context, creds *auth.Credentials, opts *vfs.SetXattrOptions) error {
 	if d.file.isNil() {
 		return syserror.EPERM
 	}
@@ -1410,7 +1410,7 @@ func (d *dentry) setxattr(ctx context.Context, creds *auth.Credentials, opts *vf
 	return d.file.setXattr(ctx, opts.Name, opts.Value, opts.Flags)
 }
 
-func (d *dentry) removexattr(ctx context.Context, creds *auth.Credentials, name string) error {
+func (d *dentry) removeXattr(ctx context.Context, creds *auth.Credentials, name string) error {
 	if d.file.isNil() {
 		return syserror.EPERM
 	}
@@ -1668,30 +1668,30 @@ func (fd *fileDescription) SetStat(ctx context.Context, opts vfs.SetStatOptions)
 	return nil
 }
 
-// Listxattr implements vfs.FileDescriptionImpl.Listxattr.
-func (fd *fileDescription) Listxattr(ctx context.Context, size uint64) ([]string, error) {
-	return fd.dentry().listxattr(ctx, auth.CredentialsFromContext(ctx), size)
+// ListXattr implements vfs.FileDescriptionImpl.ListXattr.
+func (fd *fileDescription) ListXattr(ctx context.Context, size uint64) ([]string, error) {
+	return fd.dentry().listXattr(ctx, auth.CredentialsFromContext(ctx), size)
 }
 
-// Getxattr implements vfs.FileDescriptionImpl.Getxattr.
-func (fd *fileDescription) Getxattr(ctx context.Context, opts vfs.GetxattrOptions) (string, error) {
-	return fd.dentry().getxattr(ctx, auth.CredentialsFromContext(ctx), &opts)
+// GetXattr implements vfs.FileDescriptionImpl.GetXattr.
+func (fd *fileDescription) GetXattr(ctx context.Context, opts vfs.GetXattrOptions) (string, error) {
+	return fd.dentry().getXattr(ctx, auth.CredentialsFromContext(ctx), &opts)
 }
 
-// Setxattr implements vfs.FileDescriptionImpl.Setxattr.
-func (fd *fileDescription) Setxattr(ctx context.Context, opts vfs.SetxattrOptions) error {
+// SetXattr implements vfs.FileDescriptionImpl.SetXattr.
+func (fd *fileDescription) SetXattr(ctx context.Context, opts vfs.SetXattrOptions) error {
 	d := fd.dentry()
-	if err := d.setxattr(ctx, auth.CredentialsFromContext(ctx), &opts); err != nil {
+	if err := d.setXattr(ctx, auth.CredentialsFromContext(ctx), &opts); err != nil {
 		return err
 	}
 	d.InotifyWithParent(ctx, linux.IN_ATTRIB, 0, vfs.InodeEvent)
 	return nil
 }
 
-// Removexattr implements vfs.FileDescriptionImpl.Removexattr.
-func (fd *fileDescription) Removexattr(ctx context.Context, name string) error {
+// RemoveXattr implements vfs.FileDescriptionImpl.RemoveXattr.
+func (fd *fileDescription) RemoveXattr(ctx context.Context, name string) error {
 	d := fd.dentry()
-	if err := d.removexattr(ctx, auth.CredentialsFromContext(ctx), name); err != nil {
+	if err := d.removeXattr(ctx, auth.CredentialsFromContext(ctx), name); err != nil {
 		return err
 	}
 	d.InotifyWithParent(ctx, linux.IN_ATTRIB, 0, vfs.InodeEvent)
diff --git a/pkg/sentry/fsimpl/kernfs/filesystem.go b/pkg/sentry/fsimpl/kernfs/filesystem.go
index 0e30116899..c428053e8b 100644
--- a/pkg/sentry/fsimpl/kernfs/filesystem.go
+++ b/pkg/sentry/fsimpl/kernfs/filesystem.go
@@ -810,8 +810,8 @@ func (fs *Filesystem) BoundEndpointAt(ctx context.Context, rp *vfs.ResolvingPath
 	return nil, syserror.ECONNREFUSED
 }
 
-// ListxattrAt implements vfs.FilesystemImpl.ListxattrAt.
-func (fs *Filesystem) ListxattrAt(ctx context.Context, rp *vfs.ResolvingPath, size uint64) ([]string, error) {
+// ListXattrAt implements vfs.FilesystemImpl.ListXattrAt.
+func (fs *Filesystem) ListXattrAt(ctx context.Context, rp *vfs.ResolvingPath, size uint64) ([]string, error) {
 	fs.mu.RLock()
 	_, _, err := fs.walkExistingLocked(ctx, rp)
 	fs.mu.RUnlock()
@@ -823,8 +823,8 @@ func (fs *Filesystem) ListxattrAt(ctx context.Context, rp *vfs.ResolvingPath, si
 	return nil, syserror.ENOTSUP
 }
 
-// GetxattrAt implements vfs.FilesystemImpl.GetxattrAt.
-func (fs *Filesystem) GetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.GetxattrOptions) (string, error) {
+// GetXattrAt implements vfs.FilesystemImpl.GetXattrAt.
+func (fs *Filesystem) GetXattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.GetXattrOptions) (string, error) {
 	fs.mu.RLock()
 	_, _, err := fs.walkExistingLocked(ctx, rp)
 	fs.mu.RUnlock()
@@ -836,8 +836,8 @@ func (fs *Filesystem) GetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, opt
 	return "", syserror.ENOTSUP
 }
 
-// SetxattrAt implements vfs.FilesystemImpl.SetxattrAt.
-func (fs *Filesystem) SetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetxattrOptions) error {
+// SetXattrAt implements vfs.FilesystemImpl.SetXattrAt.
+func (fs *Filesystem) SetXattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetXattrOptions) error {
 	fs.mu.RLock()
 	_, _, err := fs.walkExistingLocked(ctx, rp)
 	fs.mu.RUnlock()
@@ -849,8 +849,8 @@ func (fs *Filesystem) SetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, opt
 	return syserror.ENOTSUP
 }
 
-// RemovexattrAt implements vfs.FilesystemImpl.RemovexattrAt.
-func (fs *Filesystem) RemovexattrAt(ctx context.Context, rp *vfs.ResolvingPath, name string) error {
+// RemoveXattrAt implements vfs.FilesystemImpl.RemoveXattrAt.
+func (fs *Filesystem) RemoveXattrAt(ctx context.Context, rp *vfs.ResolvingPath, name string) error {
 	fs.mu.RLock()
 	_, _, err := fs.walkExistingLocked(ctx, rp)
 	fs.mu.RUnlock()
diff --git a/pkg/sentry/fsimpl/overlay/copy_up.go b/pkg/sentry/fsimpl/overlay/copy_up.go
index ba7b8495a7..c589b4746b 100644
--- a/pkg/sentry/fsimpl/overlay/copy_up.go
+++ b/pkg/sentry/fsimpl/overlay/copy_up.go
@@ -278,13 +278,13 @@ func (d *dentry) copyXattrsLocked(ctx context.Context) error {
 	lowerPop := &vfs.PathOperation{Root: d.lowerVDs[0], Start: d.lowerVDs[0]}
 	upperPop := &vfs.PathOperation{Root: d.upperVD, Start: d.upperVD}
 
-	lowerXattrs, err := vfsObj.ListxattrAt(ctx, d.fs.creds, lowerPop, 0)
+	lowerXattrs, err := vfsObj.ListXattrAt(ctx, d.fs.creds, lowerPop, 0)
 	if err != nil {
 		if err == syserror.EOPNOTSUPP {
 			// There are no guarantees as to the contents of lowerXattrs.
 			return nil
 		}
-		ctx.Warningf("failed to copy up xattrs because ListxattrAt failed: %v", err)
+		ctx.Warningf("failed to copy up xattrs because ListXattrAt failed: %v", err)
 		return err
 	}
 
@@ -294,14 +294,14 @@ func (d *dentry) copyXattrsLocked(ctx context.Context) error {
 			continue
 		}
 
-		value, err := vfsObj.GetxattrAt(ctx, d.fs.creds, lowerPop, &vfs.GetxattrOptions{Name: name, Size: 0})
+		value, err := vfsObj.GetXattrAt(ctx, d.fs.creds, lowerPop, &vfs.GetXattrOptions{Name: name, Size: 0})
 		if err != nil {
-			ctx.Warningf("failed to copy up xattrs because GetxattrAt failed: %v", err)
+			ctx.Warningf("failed to copy up xattrs because GetXattrAt failed: %v", err)
 			return err
 		}
 
-		if err := vfsObj.SetxattrAt(ctx, d.fs.creds, upperPop, &vfs.SetxattrOptions{Name: name, Value: value}); err != nil {
-			ctx.Warningf("failed to copy up xattrs because SetxattrAt failed: %v", err)
+		if err := vfsObj.SetXattrAt(ctx, d.fs.creds, upperPop, &vfs.SetXattrOptions{Name: name, Value: value}); err != nil {
+			ctx.Warningf("failed to copy up xattrs because SetXattrAt failed: %v", err)
 			return err
 		}
 	}
diff --git a/pkg/sentry/fsimpl/overlay/filesystem.go b/pkg/sentry/fsimpl/overlay/filesystem.go
index 46528c99c4..87afeeaf30 100644
--- a/pkg/sentry/fsimpl/overlay/filesystem.go
+++ b/pkg/sentry/fsimpl/overlay/filesystem.go
@@ -273,10 +273,10 @@ func (fs *filesystem) lookupLocked(ctx context.Context, parent *dentry, name str
 
 		// Directories are merged with directories from lower layers if they
 		// are not explicitly opaque.
-		opaqueVal, err := vfsObj.GetxattrAt(ctx, fs.creds, &vfs.PathOperation{
+		opaqueVal, err := vfsObj.GetXattrAt(ctx, fs.creds, &vfs.PathOperation{
 			Root:  childVD,
 			Start: childVD,
-		}, &vfs.GetxattrOptions{
+		}, &vfs.GetXattrOptions{
 			Name: _OVL_XATTR_OPAQUE,
 			Size: 1,
 		})
@@ -671,7 +671,7 @@ func (fs *filesystem) MkdirAt(ctx context.Context, rp *vfs.ResolvingPath, opts v
 			// There may be directories on lower layers (previously hidden by
 			// the whiteout) that the new directory should not be merged with.
 			// Mark it opaque to prevent merging.
-			if err := vfsObj.SetxattrAt(ctx, fs.creds, &pop, &vfs.SetxattrOptions{
+			if err := vfsObj.SetXattrAt(ctx, fs.creds, &pop, &vfs.SetXattrOptions{
 				Name:  _OVL_XATTR_OPAQUE,
 				Value: "y",
 			}); err != nil {
@@ -1359,8 +1359,8 @@ func isOverlayXattr(name string) bool {
 	return strings.HasPrefix(name, _OVL_XATTR_PREFIX)
 }
 
-// ListxattrAt implements vfs.FilesystemImpl.ListxattrAt.
-func (fs *filesystem) ListxattrAt(ctx context.Context, rp *vfs.ResolvingPath, size uint64) ([]string, error) {
+// ListXattrAt implements vfs.FilesystemImpl.ListXattrAt.
+func (fs *filesystem) ListXattrAt(ctx context.Context, rp *vfs.ResolvingPath, size uint64) ([]string, error) {
 	var ds *[]*dentry
 	fs.renameMu.RLock()
 	defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
@@ -1375,7 +1375,7 @@ func (fs *filesystem) ListxattrAt(ctx context.Context, rp *vfs.ResolvingPath, si
 func (fs *filesystem) listXattr(ctx context.Context, d *dentry, size uint64) ([]string, error) {
 	vfsObj := d.fs.vfsfs.VirtualFilesystem()
 	top := d.topLayer()
-	names, err := vfsObj.ListxattrAt(ctx, fs.creds, &vfs.PathOperation{Root: top, Start: top}, size)
+	names, err := vfsObj.ListXattrAt(ctx, fs.creds, &vfs.PathOperation{Root: top, Start: top}, size)
 	if err != nil {
 		return nil, err
 	}
@@ -1391,8 +1391,8 @@ func (fs *filesystem) listXattr(ctx context.Context, d *dentry, size uint64) ([]
 	return names[:n], err
 }
 
-// GetxattrAt implements vfs.FilesystemImpl.GetxattrAt.
-func (fs *filesystem) GetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.GetxattrOptions) (string, error) {
+// GetXattrAt implements vfs.FilesystemImpl.GetXattrAt.
+func (fs *filesystem) GetXattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.GetXattrOptions) (string, error) {
 	var ds *[]*dentry
 	fs.renameMu.RLock()
 	defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
@@ -1404,7 +1404,7 @@ func (fs *filesystem) GetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, opt
 	return fs.getXattr(ctx, d, rp.Credentials(), &opts)
 }
 
-func (fs *filesystem) getXattr(ctx context.Context, d *dentry, creds *auth.Credentials, opts *vfs.GetxattrOptions) (string, error) {
+func (fs *filesystem) getXattr(ctx context.Context, d *dentry, creds *auth.Credentials, opts *vfs.GetXattrOptions) (string, error) {
 	if err := d.checkXattrPermissions(creds, opts.Name, vfs.MayRead); err != nil {
 		return "", err
 	}
@@ -1418,11 +1418,11 @@ func (fs *filesystem) getXattr(ctx context.Context, d *dentry, creds *auth.Crede
 	// Analogous to fs/overlayfs/super.c:ovl_other_xattr_get().
 	vfsObj := d.fs.vfsfs.VirtualFilesystem()
 	top := d.topLayer()
-	return vfsObj.GetxattrAt(ctx, fs.creds, &vfs.PathOperation{Root: top, Start: top}, opts)
+	return vfsObj.GetXattrAt(ctx, fs.creds, &vfs.PathOperation{Root: top, Start: top}, opts)
 }
 
-// SetxattrAt implements vfs.FilesystemImpl.SetxattrAt.
-func (fs *filesystem) SetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetxattrOptions) error {
+// SetXattrAt implements vfs.FilesystemImpl.SetXattrAt.
+func (fs *filesystem) SetXattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetXattrOptions) error {
 	var ds *[]*dentry
 	fs.renameMu.RLock()
 	defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
@@ -1435,7 +1435,7 @@ func (fs *filesystem) SetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, opt
 }
 
 // Precondition: fs.renameMu must be locked.
-func (fs *filesystem) setXattrLocked(ctx context.Context, d *dentry, mnt *vfs.Mount, creds *auth.Credentials, opts *vfs.SetxattrOptions) error {
+func (fs *filesystem) setXattrLocked(ctx context.Context, d *dentry, mnt *vfs.Mount, creds *auth.Credentials, opts *vfs.SetXattrOptions) error {
 	if err := d.checkXattrPermissions(creds, opts.Name, vfs.MayWrite); err != nil {
 		return err
 	}
@@ -1455,11 +1455,11 @@ func (fs *filesystem) setXattrLocked(ctx context.Context, d *dentry, mnt *vfs.Mo
 		return err
 	}
 	vfsObj := d.fs.vfsfs.VirtualFilesystem()
-	return vfsObj.SetxattrAt(ctx, fs.creds, &vfs.PathOperation{Root: d.upperVD, Start: d.upperVD}, opts)
+	return vfsObj.SetXattrAt(ctx, fs.creds, &vfs.PathOperation{Root: d.upperVD, Start: d.upperVD}, opts)
 }
 
-// RemovexattrAt implements vfs.FilesystemImpl.RemovexattrAt.
-func (fs *filesystem) RemovexattrAt(ctx context.Context, rp *vfs.ResolvingPath, name string) error {
+// RemoveXattrAt implements vfs.FilesystemImpl.RemoveXattrAt.
+func (fs *filesystem) RemoveXattrAt(ctx context.Context, rp *vfs.ResolvingPath, name string) error {
 	var ds *[]*dentry
 	fs.renameMu.RLock()
 	defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
@@ -1477,7 +1477,7 @@ func (fs *filesystem) removeXattrLocked(ctx context.Context, d *dentry, mnt *vfs
 		return err
 	}
 
-	// Like SetxattrAt, return EOPNOTSUPP when removing an overlay attribute.
+	// Like SetXattrAt, return EOPNOTSUPP when removing an overlay attribute.
 	// Linux passes the remove request to xattr_handler->set.
 	// See fs/xattr.c:vfs_removexattr().
 	if isOverlayXattr(name) {
@@ -1492,7 +1492,7 @@ func (fs *filesystem) removeXattrLocked(ctx context.Context, d *dentry, mnt *vfs
 		return err
 	}
 	vfsObj := d.fs.vfsfs.VirtualFilesystem()
-	return vfsObj.RemovexattrAt(ctx, fs.creds, &vfs.PathOperation{Root: d.upperVD, Start: d.upperVD}, name)
+	return vfsObj.RemoveXattrAt(ctx, fs.creds, &vfs.PathOperation{Root: d.upperVD, Start: d.upperVD}, name)
 }
 
 // PrependPath implements vfs.FilesystemImpl.PrependPath.
diff --git a/pkg/sentry/fsimpl/overlay/overlay.go b/pkg/sentry/fsimpl/overlay/overlay.go
index e706f9d4ea..9a8f7010e9 100644
--- a/pkg/sentry/fsimpl/overlay/overlay.go
+++ b/pkg/sentry/fsimpl/overlay/overlay.go
@@ -632,26 +632,26 @@ func (fd *fileDescription) dentry() *dentry {
 	return fd.vfsfd.Dentry().Impl().(*dentry)
 }
 
-// Listxattr implements vfs.FileDescriptionImpl.Listxattr.
-func (fd *fileDescription) Listxattr(ctx context.Context, size uint64) ([]string, error) {
+// ListXattr implements vfs.FileDescriptionImpl.ListXattr.
+func (fd *fileDescription) ListXattr(ctx context.Context, size uint64) ([]string, error) {
 	return fd.filesystem().listXattr(ctx, fd.dentry(), size)
 }
 
-// Getxattr implements vfs.FileDescriptionImpl.Getxattr.
-func (fd *fileDescription) Getxattr(ctx context.Context, opts vfs.GetxattrOptions) (string, error) {
+// GetXattr implements vfs.FileDescriptionImpl.GetXattr.
+func (fd *fileDescription) GetXattr(ctx context.Context, opts vfs.GetXattrOptions) (string, error) {
 	return fd.filesystem().getXattr(ctx, fd.dentry(), auth.CredentialsFromContext(ctx), &opts)
 }
 
-// Setxattr implements vfs.FileDescriptionImpl.Setxattr.
-func (fd *fileDescription) Setxattr(ctx context.Context, opts vfs.SetxattrOptions) error {
+// SetXattr implements vfs.FileDescriptionImpl.SetXattr.
+func (fd *fileDescription) SetXattr(ctx context.Context, opts vfs.SetXattrOptions) error {
 	fs := fd.filesystem()
 	fs.renameMu.RLock()
 	defer fs.renameMu.RUnlock()
 	return fs.setXattrLocked(ctx, fd.dentry(), fd.vfsfd.Mount(), auth.CredentialsFromContext(ctx), &opts)
 }
 
-// Removexattr implements vfs.FileDescriptionImpl.Removexattr.
-func (fd *fileDescription) Removexattr(ctx context.Context, name string) error {
+// RemoveXattr implements vfs.FileDescriptionImpl.RemoveXattr.
+func (fd *fileDescription) RemoveXattr(ctx context.Context, name string) error {
 	fs := fd.filesystem()
 	fs.renameMu.RLock()
 	defer fs.renameMu.RUnlock()
diff --git a/pkg/sentry/fsimpl/tmpfs/filesystem.go b/pkg/sentry/fsimpl/tmpfs/filesystem.go
index e0de04e05c..61d925a734 100644
--- a/pkg/sentry/fsimpl/tmpfs/filesystem.go
+++ b/pkg/sentry/fsimpl/tmpfs/filesystem.go
@@ -792,37 +792,37 @@ func (fs *filesystem) BoundEndpointAt(ctx context.Context, rp *vfs.ResolvingPath
 	}
 }
 
-// ListxattrAt implements vfs.FilesystemImpl.ListxattrAt.
-func (fs *filesystem) ListxattrAt(ctx context.Context, rp *vfs.ResolvingPath, size uint64) ([]string, error) {
+// ListXattrAt implements vfs.FilesystemImpl.ListXattrAt.
+func (fs *filesystem) ListXattrAt(ctx context.Context, rp *vfs.ResolvingPath, size uint64) ([]string, error) {
 	fs.mu.RLock()
 	defer fs.mu.RUnlock()
 	d, err := resolveLocked(ctx, rp)
 	if err != nil {
 		return nil, err
 	}
-	return d.inode.listxattr(size)
+	return d.inode.listXattr(size)
 }
 
-// GetxattrAt implements vfs.FilesystemImpl.GetxattrAt.
-func (fs *filesystem) GetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.GetxattrOptions) (string, error) {
+// GetXattrAt implements vfs.FilesystemImpl.GetXattrAt.
+func (fs *filesystem) GetXattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.GetXattrOptions) (string, error) {
 	fs.mu.RLock()
 	defer fs.mu.RUnlock()
 	d, err := resolveLocked(ctx, rp)
 	if err != nil {
 		return "", err
 	}
-	return d.inode.getxattr(rp.Credentials(), &opts)
+	return d.inode.getXattr(rp.Credentials(), &opts)
 }
 
-// SetxattrAt implements vfs.FilesystemImpl.SetxattrAt.
-func (fs *filesystem) SetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetxattrOptions) error {
+// SetXattrAt implements vfs.FilesystemImpl.SetXattrAt.
+func (fs *filesystem) SetXattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetXattrOptions) error {
 	fs.mu.RLock()
 	d, err := resolveLocked(ctx, rp)
 	if err != nil {
 		fs.mu.RUnlock()
 		return err
 	}
-	if err := d.inode.setxattr(rp.Credentials(), &opts); err != nil {
+	if err := d.inode.setXattr(rp.Credentials(), &opts); err != nil {
 		fs.mu.RUnlock()
 		return err
 	}
@@ -832,15 +832,15 @@ func (fs *filesystem) SetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, opt
 	return nil
 }
 
-// RemovexattrAt implements vfs.FilesystemImpl.RemovexattrAt.
-func (fs *filesystem) RemovexattrAt(ctx context.Context, rp *vfs.ResolvingPath, name string) error {
+// RemoveXattrAt implements vfs.FilesystemImpl.RemoveXattrAt.
+func (fs *filesystem) RemoveXattrAt(ctx context.Context, rp *vfs.ResolvingPath, name string) error {
 	fs.mu.RLock()
 	d, err := resolveLocked(ctx, rp)
 	if err != nil {
 		fs.mu.RUnlock()
 		return err
 	}
-	if err := d.inode.removexattr(rp.Credentials(), name); err != nil {
+	if err := d.inode.removeXattr(rp.Credentials(), name); err != nil {
 		fs.mu.RUnlock()
 		return err
 	}
diff --git a/pkg/sentry/fsimpl/tmpfs/tmpfs.go b/pkg/sentry/fsimpl/tmpfs/tmpfs.go
index d6074f20f5..4871e55d39 100644
--- a/pkg/sentry/fsimpl/tmpfs/tmpfs.go
+++ b/pkg/sentry/fsimpl/tmpfs/tmpfs.go
@@ -626,29 +626,29 @@ func (i *inode) touchCMtimeLocked() {
 	atomic.StoreInt64(&i.ctime, now)
 }
 
-func (i *inode) listxattr(size uint64) ([]string, error) {
-	return i.xattrs.Listxattr(size)
+func (i *inode) listXattr(size uint64) ([]string, error) {
+	return i.xattrs.ListXattr(size)
 }
 
-func (i *inode) getxattr(creds *auth.Credentials, opts *vfs.GetxattrOptions) (string, error) {
+func (i *inode) getXattr(creds *auth.Credentials, opts *vfs.GetXattrOptions) (string, error) {
 	if err := i.checkXattrPermissions(creds, opts.Name, vfs.MayRead); err != nil {
 		return "", err
 	}
-	return i.xattrs.Getxattr(opts)
+	return i.xattrs.GetXattr(opts)
 }
 
-func (i *inode) setxattr(creds *auth.Credentials, opts *vfs.SetxattrOptions) error {
+func (i *inode) setXattr(creds *auth.Credentials, opts *vfs.SetXattrOptions) error {
 	if err := i.checkXattrPermissions(creds, opts.Name, vfs.MayWrite); err != nil {
 		return err
 	}
-	return i.xattrs.Setxattr(opts)
+	return i.xattrs.SetXattr(opts)
 }
 
-func (i *inode) removexattr(creds *auth.Credentials, name string) error {
+func (i *inode) removeXattr(creds *auth.Credentials, name string) error {
 	if err := i.checkXattrPermissions(creds, name, vfs.MayWrite); err != nil {
 		return err
 	}
-	return i.xattrs.Removexattr(name)
+	return i.xattrs.RemoveXattr(name)
 }
 
 func (i *inode) checkXattrPermissions(creds *auth.Credentials, name string, ats vfs.AccessTypes) error {
@@ -712,20 +712,20 @@ func (fd *fileDescription) StatFS(ctx context.Context) (linux.Statfs, error) {
 	return globalStatfs, nil
 }
 
-// Listxattr implements vfs.FileDescriptionImpl.Listxattr.
-func (fd *fileDescription) Listxattr(ctx context.Context, size uint64) ([]string, error) {
-	return fd.inode().listxattr(size)
+// ListXattr implements vfs.FileDescriptionImpl.ListXattr.
+func (fd *fileDescription) ListXattr(ctx context.Context, size uint64) ([]string, error) {
+	return fd.inode().listXattr(size)
 }
 
-// Getxattr implements vfs.FileDescriptionImpl.Getxattr.
-func (fd *fileDescription) Getxattr(ctx context.Context, opts vfs.GetxattrOptions) (string, error) {
-	return fd.inode().getxattr(auth.CredentialsFromContext(ctx), &opts)
+// GetXattr implements vfs.FileDescriptionImpl.GetXattr.
+func (fd *fileDescription) GetXattr(ctx context.Context, opts vfs.GetXattrOptions) (string, error) {
+	return fd.inode().getXattr(auth.CredentialsFromContext(ctx), &opts)
 }
 
-// Setxattr implements vfs.FileDescriptionImpl.Setxattr.
-func (fd *fileDescription) Setxattr(ctx context.Context, opts vfs.SetxattrOptions) error {
+// SetXattr implements vfs.FileDescriptionImpl.SetXattr.
+func (fd *fileDescription) SetXattr(ctx context.Context, opts vfs.SetXattrOptions) error {
 	d := fd.dentry()
-	if err := d.inode.setxattr(auth.CredentialsFromContext(ctx), &opts); err != nil {
+	if err := d.inode.setXattr(auth.CredentialsFromContext(ctx), &opts); err != nil {
 		return err
 	}
 
@@ -734,10 +734,10 @@ func (fd *fileDescription) Setxattr(ctx context.Context, opts vfs.SetxattrOption
 	return nil
 }
 
-// Removexattr implements vfs.FileDescriptionImpl.Removexattr.
-func (fd *fileDescription) Removexattr(ctx context.Context, name string) error {
+// RemoveXattr implements vfs.FileDescriptionImpl.RemoveXattr.
+func (fd *fileDescription) RemoveXattr(ctx context.Context, name string) error {
 	d := fd.dentry()
-	if err := d.inode.removexattr(auth.CredentialsFromContext(ctx), name); err != nil {
+	if err := d.inode.removeXattr(auth.CredentialsFromContext(ctx), name); err != nil {
 		return err
 	}
 
diff --git a/pkg/sentry/fsimpl/verity/filesystem.go b/pkg/sentry/fsimpl/verity/filesystem.go
index 0e17dbddc4..e944fd5d2e 100644
--- a/pkg/sentry/fsimpl/verity/filesystem.go
+++ b/pkg/sentry/fsimpl/verity/filesystem.go
@@ -179,10 +179,10 @@ func (fs *filesystem) verifyChild(ctx context.Context, parent *dentry, child *de
 	// corresponding Merkle tree file.
 	// This is the offset of the root hash for child in its parent's Merkle
 	// tree file.
-	off, err := vfsObj.GetxattrAt(ctx, fs.creds, &vfs.PathOperation{
+	off, err := vfsObj.GetXattrAt(ctx, fs.creds, &vfs.PathOperation{
 		Root:  child.lowerMerkleVD,
 		Start: child.lowerMerkleVD,
-	}, &vfs.GetxattrOptions{
+	}, &vfs.GetXattrOptions{
 		Name: merkleOffsetInParentXattr,
 		// Offset is a 32 bit integer.
 		Size: sizeOfInt32,
@@ -233,7 +233,7 @@ func (fs *filesystem) verifyChild(ctx context.Context, parent *dentry, child *de
 	// dataSize is the size of raw data for the Merkle tree. For a file,
 	// dataSize is the size of the whole file. For a directory, dataSize is
 	// the size of all its children's root hashes.
-	dataSize, err := parentMerkleFD.Getxattr(ctx, &vfs.GetxattrOptions{
+	dataSize, err := parentMerkleFD.GetXattr(ctx, &vfs.GetXattrOptions{
 		Name: merkleSizeXattr,
 		Size: sizeOfInt32,
 	})
@@ -660,8 +660,8 @@ func (fs *filesystem) BoundEndpointAt(ctx context.Context, rp *vfs.ResolvingPath
 	return nil, syserror.ECONNREFUSED
 }
 
-// ListxattrAt implements vfs.FilesystemImpl.ListxattrAt.
-func (fs *filesystem) ListxattrAt(ctx context.Context, rp *vfs.ResolvingPath, size uint64) ([]string, error) {
+// ListXattrAt implements vfs.FilesystemImpl.ListXattrAt.
+func (fs *filesystem) ListXattrAt(ctx context.Context, rp *vfs.ResolvingPath, size uint64) ([]string, error) {
 	var ds *[]*dentry
 	fs.renameMu.RLock()
 	defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
@@ -670,14 +670,14 @@ func (fs *filesystem) ListxattrAt(ctx context.Context, rp *vfs.ResolvingPath, si
 		return nil, err
 	}
 	lowerVD := d.lowerVD
-	return fs.vfsfs.VirtualFilesystem().ListxattrAt(ctx, d.fs.creds, &vfs.PathOperation{
+	return fs.vfsfs.VirtualFilesystem().ListXattrAt(ctx, d.fs.creds, &vfs.PathOperation{
 		Root:  lowerVD,
 		Start: lowerVD,
 	}, size)
 }
 
-// GetxattrAt implements vfs.FilesystemImpl.GetxattrAt.
-func (fs *filesystem) GetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.GetxattrOptions) (string, error) {
+// GetXattrAt implements vfs.FilesystemImpl.GetXattrAt.
+func (fs *filesystem) GetXattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.GetXattrOptions) (string, error) {
 	var ds *[]*dentry
 	fs.renameMu.RLock()
 	defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
@@ -686,20 +686,20 @@ func (fs *filesystem) GetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, opt
 		return "", err
 	}
 	lowerVD := d.lowerVD
-	return fs.vfsfs.VirtualFilesystem().GetxattrAt(ctx, d.fs.creds, &vfs.PathOperation{
+	return fs.vfsfs.VirtualFilesystem().GetXattrAt(ctx, d.fs.creds, &vfs.PathOperation{
 		Root:  lowerVD,
 		Start: lowerVD,
 	}, &opts)
 }
 
-// SetxattrAt implements vfs.FilesystemImpl.SetxattrAt.
-func (fs *filesystem) SetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetxattrOptions) error {
+// SetXattrAt implements vfs.FilesystemImpl.SetXattrAt.
+func (fs *filesystem) SetXattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetXattrOptions) error {
 	// Verity file system is read-only.
 	return syserror.EROFS
 }
 
-// RemovexattrAt implements vfs.FilesystemImpl.RemovexattrAt.
-func (fs *filesystem) RemovexattrAt(ctx context.Context, rp *vfs.ResolvingPath, name string) error {
+// RemoveXattrAt implements vfs.FilesystemImpl.RemoveXattrAt.
+func (fs *filesystem) RemoveXattrAt(ctx context.Context, rp *vfs.ResolvingPath, name string) error {
 	// Verity file system is read-only.
 	return syserror.EROFS
 }
diff --git a/pkg/sentry/syscalls/linux/vfs2/vfs2.go b/pkg/sentry/syscalls/linux/vfs2/vfs2.go
index c576d9475d..0df3bd449c 100644
--- a/pkg/sentry/syscalls/linux/vfs2/vfs2.go
+++ b/pkg/sentry/syscalls/linux/vfs2/vfs2.go
@@ -93,16 +93,16 @@ func Override() {
 	s.Table[165] = syscalls.Supported("mount", Mount)
 	s.Table[166] = syscalls.Supported("umount2", Umount2)
 	s.Table[187] = syscalls.Supported("readahead", Readahead)
-	s.Table[188] = syscalls.Supported("setxattr", Setxattr)
+	s.Table[188] = syscalls.Supported("setxattr", SetXattr)
 	s.Table[189] = syscalls.Supported("lsetxattr", Lsetxattr)
 	s.Table[190] = syscalls.Supported("fsetxattr", Fsetxattr)
-	s.Table[191] = syscalls.Supported("getxattr", Getxattr)
+	s.Table[191] = syscalls.Supported("getxattr", GetXattr)
 	s.Table[192] = syscalls.Supported("lgetxattr", Lgetxattr)
 	s.Table[193] = syscalls.Supported("fgetxattr", Fgetxattr)
-	s.Table[194] = syscalls.Supported("listxattr", Listxattr)
+	s.Table[194] = syscalls.Supported("listxattr", ListXattr)
 	s.Table[195] = syscalls.Supported("llistxattr", Llistxattr)
 	s.Table[196] = syscalls.Supported("flistxattr", Flistxattr)
-	s.Table[197] = syscalls.Supported("removexattr", Removexattr)
+	s.Table[197] = syscalls.Supported("removexattr", RemoveXattr)
 	s.Table[198] = syscalls.Supported("lremovexattr", Lremovexattr)
 	s.Table[199] = syscalls.Supported("fremovexattr", Fremovexattr)
 	s.Table[209] = syscalls.PartiallySupported("io_submit", IoSubmit, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"})
@@ -163,16 +163,16 @@ func Override() {
 
 	// Override ARM64.
 	s = linux.ARM64
-	s.Table[5] = syscalls.Supported("setxattr", Setxattr)
+	s.Table[5] = syscalls.Supported("setxattr", SetXattr)
 	s.Table[6] = syscalls.Supported("lsetxattr", Lsetxattr)
 	s.Table[7] = syscalls.Supported("fsetxattr", Fsetxattr)
-	s.Table[8] = syscalls.Supported("getxattr", Getxattr)
+	s.Table[8] = syscalls.Supported("getxattr", GetXattr)
 	s.Table[9] = syscalls.Supported("lgetxattr", Lgetxattr)
 	s.Table[10] = syscalls.Supported("fgetxattr", Fgetxattr)
-	s.Table[11] = syscalls.Supported("listxattr", Listxattr)
+	s.Table[11] = syscalls.Supported("listxattr", ListXattr)
 	s.Table[12] = syscalls.Supported("llistxattr", Llistxattr)
 	s.Table[13] = syscalls.Supported("flistxattr", Flistxattr)
-	s.Table[14] = syscalls.Supported("removexattr", Removexattr)
+	s.Table[14] = syscalls.Supported("removexattr", RemoveXattr)
 	s.Table[15] = syscalls.Supported("lremovexattr", Lremovexattr)
 	s.Table[16] = syscalls.Supported("fremovexattr", Fremovexattr)
 	s.Table[17] = syscalls.Supported("getcwd", Getcwd)
diff --git a/pkg/sentry/syscalls/linux/vfs2/xattr.go b/pkg/sentry/syscalls/linux/vfs2/xattr.go
index ef99246ed0..e05723ef96 100644
--- a/pkg/sentry/syscalls/linux/vfs2/xattr.go
+++ b/pkg/sentry/syscalls/linux/vfs2/xattr.go
@@ -26,8 +26,8 @@ import (
 	"gvisor.dev/gvisor/pkg/usermem"
 )
 
-// Listxattr implements Linux syscall listxattr(2).
-func Listxattr(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
+// ListXattr implements Linux syscall listxattr(2).
+func ListXattr(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
 	return listxattr(t, args, followFinalSymlink)
 }
 
@@ -51,7 +51,7 @@ func listxattr(t *kernel.Task, args arch.SyscallArguments, shouldFollowFinalSyml
 	}
 	defer tpop.Release(t)
 
-	names, err := t.Kernel().VFS().ListxattrAt(t, t.Credentials(), &tpop.pop, uint64(size))
+	names, err := t.Kernel().VFS().ListXattrAt(t, t.Credentials(), &tpop.pop, uint64(size))
 	if err != nil {
 		return 0, nil, err
 	}
@@ -74,7 +74,7 @@ func Flistxattr(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sy
 	}
 	defer file.DecRef(t)
 
-	names, err := file.Listxattr(t, uint64(size))
+	names, err := file.ListXattr(t, uint64(size))
 	if err != nil {
 		return 0, nil, err
 	}
@@ -85,8 +85,8 @@ func Flistxattr(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sy
 	return uintptr(n), nil, nil
 }
 
-// Getxattr implements Linux syscall getxattr(2).
-func Getxattr(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
+// GetXattr implements Linux syscall getxattr(2).
+func GetXattr(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
 	return getxattr(t, args, followFinalSymlink)
 }
 
@@ -116,7 +116,7 @@ func getxattr(t *kernel.Task, args arch.SyscallArguments, shouldFollowFinalSymli
 		return 0, nil, err
 	}
 
-	value, err := t.Kernel().VFS().GetxattrAt(t, t.Credentials(), &tpop.pop, &vfs.GetxattrOptions{
+	value, err := t.Kernel().VFS().GetXattrAt(t, t.Credentials(), &tpop.pop, &vfs.GetXattrOptions{
 		Name: name,
 		Size: uint64(size),
 	})
@@ -148,7 +148,7 @@ func Fgetxattr(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys
 		return 0, nil, err
 	}
 
-	value, err := file.Getxattr(t, &vfs.GetxattrOptions{Name: name, Size: uint64(size)})
+	value, err := file.GetXattr(t, &vfs.GetXattrOptions{Name: name, Size: uint64(size)})
 	if err != nil {
 		return 0, nil, err
 	}
@@ -159,8 +159,8 @@ func Fgetxattr(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys
 	return uintptr(n), nil, nil
 }
 
-// Setxattr implements Linux syscall setxattr(2).
-func Setxattr(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
+// SetXattr implements Linux syscall setxattr(2).
+func SetXattr(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
 	return 0, nil, setxattr(t, args, followFinalSymlink)
 }
 
@@ -199,7 +199,7 @@ func setxattr(t *kernel.Task, args arch.SyscallArguments, shouldFollowFinalSymli
 		return err
 	}
 
-	return t.Kernel().VFS().SetxattrAt(t, t.Credentials(), &tpop.pop, &vfs.SetxattrOptions{
+	return t.Kernel().VFS().SetXattrAt(t, t.Credentials(), &tpop.pop, &vfs.SetXattrOptions{
 		Name:  name,
 		Value: value,
 		Flags: uint32(flags),
@@ -233,15 +233,15 @@ func Fsetxattr(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys
 		return 0, nil, err
 	}
 
-	return 0, nil, file.Setxattr(t, &vfs.SetxattrOptions{
+	return 0, nil, file.SetXattr(t, &vfs.SetXattrOptions{
 		Name:  name,
 		Value: value,
 		Flags: uint32(flags),
 	})
 }
 
-// Removexattr implements Linux syscall removexattr(2).
-func Removexattr(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
+// RemoveXattr implements Linux syscall removexattr(2).
+func RemoveXattr(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
 	return 0, nil, removexattr(t, args, followFinalSymlink)
 }
 
@@ -269,7 +269,7 @@ func removexattr(t *kernel.Task, args arch.SyscallArguments, shouldFollowFinalSy
 		return err
 	}
 
-	return t.Kernel().VFS().RemovexattrAt(t, t.Credentials(), &tpop.pop, name)
+	return t.Kernel().VFS().RemoveXattrAt(t, t.Credentials(), &tpop.pop, name)
 }
 
 // Fremovexattr implements Linux syscall fremovexattr(2).
@@ -288,7 +288,7 @@ func Fremovexattr(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.
 		return 0, nil, err
 	}
 
-	return 0, nil, file.Removexattr(t, name)
+	return 0, nil, file.RemoveXattr(t, name)
 }
 
 func copyInXattrName(t *kernel.Task, nameAddr usermem.Addr) (string, error) {
diff --git a/pkg/sentry/vfs/anonfs.go b/pkg/sentry/vfs/anonfs.go
index 5a0e3e6b5b..9c4db3047d 100644
--- a/pkg/sentry/vfs/anonfs.go
+++ b/pkg/sentry/vfs/anonfs.go
@@ -245,32 +245,32 @@ func (fs *anonFilesystem) BoundEndpointAt(ctx context.Context, rp *ResolvingPath
 	return nil, syserror.ECONNREFUSED
 }
 
-// ListxattrAt implements FilesystemImpl.ListxattrAt.
-func (fs *anonFilesystem) ListxattrAt(ctx context.Context, rp *ResolvingPath, size uint64) ([]string, error) {
+// ListXattrAt implements FilesystemImpl.ListXattrAt.
+func (fs *anonFilesystem) ListXattrAt(ctx context.Context, rp *ResolvingPath, size uint64) ([]string, error) {
 	if !rp.Done() {
 		return nil, syserror.ENOTDIR
 	}
 	return nil, nil
 }
 
-// GetxattrAt implements FilesystemImpl.GetxattrAt.
-func (fs *anonFilesystem) GetxattrAt(ctx context.Context, rp *ResolvingPath, opts GetxattrOptions) (string, error) {
+// GetXattrAt implements FilesystemImpl.GetXattrAt.
+func (fs *anonFilesystem) GetXattrAt(ctx context.Context, rp *ResolvingPath, opts GetXattrOptions) (string, error) {
 	if !rp.Done() {
 		return "", syserror.ENOTDIR
 	}
 	return "", syserror.ENOTSUP
 }
 
-// SetxattrAt implements FilesystemImpl.SetxattrAt.
-func (fs *anonFilesystem) SetxattrAt(ctx context.Context, rp *ResolvingPath, opts SetxattrOptions) error {
+// SetXattrAt implements FilesystemImpl.SetXattrAt.
+func (fs *anonFilesystem) SetXattrAt(ctx context.Context, rp *ResolvingPath, opts SetXattrOptions) error {
 	if !rp.Done() {
 		return syserror.ENOTDIR
 	}
 	return syserror.EPERM
 }
 
-// RemovexattrAt implements FilesystemImpl.RemovexattrAt.
-func (fs *anonFilesystem) RemovexattrAt(ctx context.Context, rp *ResolvingPath, name string) error {
+// RemoveXattrAt implements FilesystemImpl.RemoveXattrAt.
+func (fs *anonFilesystem) RemoveXattrAt(ctx context.Context, rp *ResolvingPath, name string) error {
 	if !rp.Done() {
 		return syserror.ENOTDIR
 	}
diff --git a/pkg/sentry/vfs/file_description.go b/pkg/sentry/vfs/file_description.go
index 22a54fa487..2b29a3c3f1 100644
--- a/pkg/sentry/vfs/file_description.go
+++ b/pkg/sentry/vfs/file_description.go
@@ -101,7 +101,7 @@ type FileDescriptionOptions struct {
 
 	// If UseDentryMetadata is true, calls to FileDescription methods that
 	// interact with file and filesystem metadata (Stat, SetStat, StatFS,
-	// Listxattr, Getxattr, Setxattr, Removexattr) are implemented by calling
+	// ListXattr, GetXattr, SetXattr, RemoveXattr) are implemented by calling
 	// the corresponding FilesystemImpl methods instead of the corresponding
 	// FileDescriptionImpl methods.
 	//
@@ -420,19 +420,19 @@ type FileDescriptionImpl interface {
 	// Ioctl implements the ioctl(2) syscall.
 	Ioctl(ctx context.Context, uio usermem.IO, args arch.SyscallArguments) (uintptr, error)
 
-	// Listxattr returns all extended attribute names for the file.
-	Listxattr(ctx context.Context, size uint64) ([]string, error)
+	// ListXattr returns all extended attribute names for the file.
+	ListXattr(ctx context.Context, size uint64) ([]string, error)
 
-	// Getxattr returns the value associated with the given extended attribute
+	// GetXattr returns the value associated with the given extended attribute
 	// for the file.
-	Getxattr(ctx context.Context, opts GetxattrOptions) (string, error)
+	GetXattr(ctx context.Context, opts GetXattrOptions) (string, error)
 
-	// Setxattr changes the value associated with the given extended attribute
+	// SetXattr changes the value associated with the given extended attribute
 	// for the file.
-	Setxattr(ctx context.Context, opts SetxattrOptions) error
+	SetXattr(ctx context.Context, opts SetXattrOptions) error
 
-	// Removexattr removes the given extended attribute from the file.
-	Removexattr(ctx context.Context, name string) error
+	// RemoveXattr removes the given extended attribute from the file.
+	RemoveXattr(ctx context.Context, name string) error
 
 	// LockBSD tries to acquire a BSD-style advisory file lock.
 	LockBSD(ctx context.Context, uid lock.UniqueID, t lock.LockType, block lock.Blocker) error
@@ -635,25 +635,25 @@ func (fd *FileDescription) Ioctl(ctx context.Context, uio usermem.IO, args arch.
 	return fd.impl.Ioctl(ctx, uio, args)
 }
 
-// Listxattr returns all extended attribute names for the file represented by
+// ListXattr returns all extended attribute names for the file represented by
 // fd.
 //
 // If the size of the list (including a NUL terminating byte after every entry)
 // would exceed size, ERANGE may be returned. Note that implementations
 // are free to ignore size entirely and return without error). In all cases,
 // if size is 0, the list should be returned without error, regardless of size.
-func (fd *FileDescription) Listxattr(ctx context.Context, size uint64) ([]string, error) {
+func (fd *FileDescription) ListXattr(ctx context.Context, size uint64) ([]string, error) {
 	if fd.opts.UseDentryMetadata {
 		vfsObj := fd.vd.mount.vfs
 		rp := vfsObj.getResolvingPath(auth.CredentialsFromContext(ctx), &PathOperation{
 			Root:  fd.vd,
 			Start: fd.vd,
 		})
-		names, err := fd.vd.mount.fs.impl.ListxattrAt(ctx, rp, size)
+		names, err := fd.vd.mount.fs.impl.ListXattrAt(ctx, rp, size)
 		vfsObj.putResolvingPath(ctx, rp)
 		return names, err
 	}
-	names, err := fd.impl.Listxattr(ctx, size)
+	names, err := fd.impl.ListXattr(ctx, size)
 	if err == syserror.ENOTSUP {
 		// Linux doesn't actually return ENOTSUP in this case; instead,
 		// fs/xattr.c:vfs_listxattr() falls back to allowing the security
@@ -664,57 +664,57 @@ func (fd *FileDescription) Listxattr(ctx context.Context, size uint64) ([]string
 	return names, err
 }
 
-// Getxattr returns the value associated with the given extended attribute for
+// GetXattr returns the value associated with the given extended attribute for
 // the file represented by fd.
 //
 // If the size of the return value exceeds opts.Size, ERANGE may be returned
 // (note that implementations are free to ignore opts.Size entirely and return
 // without error). In all cases, if opts.Size is 0, the value should be
 // returned without error, regardless of size.
-func (fd *FileDescription) Getxattr(ctx context.Context, opts *GetxattrOptions) (string, error) {
+func (fd *FileDescription) GetXattr(ctx context.Context, opts *GetXattrOptions) (string, error) {
 	if fd.opts.UseDentryMetadata {
 		vfsObj := fd.vd.mount.vfs
 		rp := vfsObj.getResolvingPath(auth.CredentialsFromContext(ctx), &PathOperation{
 			Root:  fd.vd,
 			Start: fd.vd,
 		})
-		val, err := fd.vd.mount.fs.impl.GetxattrAt(ctx, rp, *opts)
+		val, err := fd.vd.mount.fs.impl.GetXattrAt(ctx, rp, *opts)
 		vfsObj.putResolvingPath(ctx, rp)
 		return val, err
 	}
-	return fd.impl.Getxattr(ctx, *opts)
+	return fd.impl.GetXattr(ctx, *opts)
 }
 
-// Setxattr changes the value associated with the given extended attribute for
+// SetXattr changes the value associated with the given extended attribute for
 // the file represented by fd.
-func (fd *FileDescription) Setxattr(ctx context.Context, opts *SetxattrOptions) error {
+func (fd *FileDescription) SetXattr(ctx context.Context, opts *SetXattrOptions) error {
 	if fd.opts.UseDentryMetadata {
 		vfsObj := fd.vd.mount.vfs
 		rp := vfsObj.getResolvingPath(auth.CredentialsFromContext(ctx), &PathOperation{
 			Root:  fd.vd,
 			Start: fd.vd,
 		})
-		err := fd.vd.mount.fs.impl.SetxattrAt(ctx, rp, *opts)
+		err := fd.vd.mount.fs.impl.SetXattrAt(ctx, rp, *opts)
 		vfsObj.putResolvingPath(ctx, rp)
 		return err
 	}
-	return fd.impl.Setxattr(ctx, *opts)
+	return fd.impl.SetXattr(ctx, *opts)
 }
 
-// Removexattr removes the given extended attribute from the file represented
+// RemoveXattr removes the given extended attribute from the file represented
 // by fd.
-func (fd *FileDescription) Removexattr(ctx context.Context, name string) error {
+func (fd *FileDescription) RemoveXattr(ctx context.Context, name string) error {
 	if fd.opts.UseDentryMetadata {
 		vfsObj := fd.vd.mount.vfs
 		rp := vfsObj.getResolvingPath(auth.CredentialsFromContext(ctx), &PathOperation{
 			Root:  fd.vd,
 			Start: fd.vd,
 		})
-		err := fd.vd.mount.fs.impl.RemovexattrAt(ctx, rp, name)
+		err := fd.vd.mount.fs.impl.RemoveXattrAt(ctx, rp, name)
 		vfsObj.putResolvingPath(ctx, rp)
 		return err
 	}
-	return fd.impl.Removexattr(ctx, name)
+	return fd.impl.RemoveXattr(ctx, name)
 }
 
 // SyncFS instructs the filesystem containing fd to execute the semantics of
diff --git a/pkg/sentry/vfs/file_description_impl_util.go b/pkg/sentry/vfs/file_description_impl_util.go
index 6b8b4ad490..68b80a951a 100644
--- a/pkg/sentry/vfs/file_description_impl_util.go
+++ b/pkg/sentry/vfs/file_description_impl_util.go
@@ -134,28 +134,28 @@ func (FileDescriptionDefaultImpl) Ioctl(ctx context.Context, uio usermem.IO, arg
 	return 0, syserror.ENOTTY
 }
 
-// Listxattr implements FileDescriptionImpl.Listxattr analogously to
+// ListXattr implements FileDescriptionImpl.ListXattr analogously to
 // inode_operations::listxattr == NULL in Linux.
-func (FileDescriptionDefaultImpl) Listxattr(ctx context.Context, size uint64) ([]string, error) {
-	// This isn't exactly accurate; see FileDescription.Listxattr.
+func (FileDescriptionDefaultImpl) ListXattr(ctx context.Context, size uint64) ([]string, error) {
+	// This isn't exactly accurate; see FileDescription.ListXattr.
 	return nil, syserror.ENOTSUP
 }
 
-// Getxattr implements FileDescriptionImpl.Getxattr analogously to
+// GetXattr implements FileDescriptionImpl.GetXattr analogously to
 // inode::i_opflags & IOP_XATTR == 0 in Linux.
-func (FileDescriptionDefaultImpl) Getxattr(ctx context.Context, opts GetxattrOptions) (string, error) {
+func (FileDescriptionDefaultImpl) GetXattr(ctx context.Context, opts GetXattrOptions) (string, error) {
 	return "", syserror.ENOTSUP
 }
 
-// Setxattr implements FileDescriptionImpl.Setxattr analogously to
+// SetXattr implements FileDescriptionImpl.SetXattr analogously to
 // inode::i_opflags & IOP_XATTR == 0 in Linux.
-func (FileDescriptionDefaultImpl) Setxattr(ctx context.Context, opts SetxattrOptions) error {
+func (FileDescriptionDefaultImpl) SetXattr(ctx context.Context, opts SetXattrOptions) error {
 	return syserror.ENOTSUP
 }
 
-// Removexattr implements FileDescriptionImpl.Removexattr analogously to
+// RemoveXattr implements FileDescriptionImpl.RemoveXattr analogously to
 // inode::i_opflags & IOP_XATTR == 0 in Linux.
-func (FileDescriptionDefaultImpl) Removexattr(ctx context.Context, name string) error {
+func (FileDescriptionDefaultImpl) RemoveXattr(ctx context.Context, name string) error {
 	return syserror.ENOTSUP
 }
 
diff --git a/pkg/sentry/vfs/filesystem.go b/pkg/sentry/vfs/filesystem.go
index 46851f638e..7dae4e7e8f 100644
--- a/pkg/sentry/vfs/filesystem.go
+++ b/pkg/sentry/vfs/filesystem.go
@@ -416,26 +416,26 @@ type FilesystemImpl interface {
 	// ResolvingPath.Resolve*(), then !rp.Done().
 	UnlinkAt(ctx context.Context, rp *ResolvingPath) error
 
-	// ListxattrAt returns all extended attribute names for the file at rp.
+	// ListXattrAt returns all extended attribute names for the file at rp.
 	//
 	// Errors:
 	//
 	// - If extended attributes are not supported by the filesystem,
-	// ListxattrAt returns ENOTSUP.
+	// ListXattrAt returns ENOTSUP.
 	//
 	// - If the size of the list (including a NUL terminating byte after every
 	// entry) would exceed size, ERANGE may be returned. Note that
 	// implementations are free to ignore size entirely and return without
 	// error). In all cases, if size is 0, the list should be returned without
 	// error, regardless of size.
-	ListxattrAt(ctx context.Context, rp *ResolvingPath, size uint64) ([]string, error)
+	ListXattrAt(ctx context.Context, rp *ResolvingPath, size uint64) ([]string, error)
 
-	// GetxattrAt returns the value associated with the given extended
+	// GetXattrAt returns the value associated with the given extended
 	// attribute for the file at rp.
 	//
 	// Errors:
 	//
-	// - If extended attributes are not supported by the filesystem, GetxattrAt
+	// - If extended attributes are not supported by the filesystem, GetXattrAt
 	// returns ENOTSUP.
 	//
 	// - If an extended attribute named opts.Name does not exist, ENODATA is
@@ -445,30 +445,30 @@ type FilesystemImpl interface {
 	// returned (note that implementations are free to ignore opts.Size entirely
 	// and return without error). In all cases, if opts.Size is 0, the value
 	// should be returned without error, regardless of size.
-	GetxattrAt(ctx context.Context, rp *ResolvingPath, opts GetxattrOptions) (string, error)
+	GetXattrAt(ctx context.Context, rp *ResolvingPath, opts GetXattrOptions) (string, error)
 
-	// SetxattrAt changes the value associated with the given extended
+	// SetXattrAt changes the value associated with the given extended
 	// attribute for the file at rp.
 	//
 	// Errors:
 	//
-	// - If extended attributes are not supported by the filesystem, SetxattrAt
+	// - If extended attributes are not supported by the filesystem, SetXattrAt
 	// returns ENOTSUP.
 	//
 	// - If XATTR_CREATE is set in opts.Flag and opts.Name already exists,
 	// EEXIST is returned. If XATTR_REPLACE is set and opts.Name does not exist,
 	// ENODATA is returned.
-	SetxattrAt(ctx context.Context, rp *ResolvingPath, opts SetxattrOptions) error
+	SetXattrAt(ctx context.Context, rp *ResolvingPath, opts SetXattrOptions) error
 
-	// RemovexattrAt removes the given extended attribute from the file at rp.
+	// RemoveXattrAt removes the given extended attribute from the file at rp.
 	//
 	// Errors:
 	//
 	// - If extended attributes are not supported by the filesystem,
-	// RemovexattrAt returns ENOTSUP.
+	// RemoveXattrAt returns ENOTSUP.
 	//
 	// - If name does not exist, ENODATA is returned.
-	RemovexattrAt(ctx context.Context, rp *ResolvingPath, name string) error
+	RemoveXattrAt(ctx context.Context, rp *ResolvingPath, name string) error
 
 	// BoundEndpointAt returns the Unix socket endpoint bound at the path rp.
 	//
diff --git a/pkg/sentry/vfs/memxattr/xattr.go b/pkg/sentry/vfs/memxattr/xattr.go
index cc1e7d764d..638b5d8304 100644
--- a/pkg/sentry/vfs/memxattr/xattr.go
+++ b/pkg/sentry/vfs/memxattr/xattr.go
@@ -33,8 +33,8 @@ type SimpleExtendedAttributes struct {
 	xattrs map[string]string
 }
 
-// Getxattr returns the value at 'name'.
-func (x *SimpleExtendedAttributes) Getxattr(opts *vfs.GetxattrOptions) (string, error) {
+// GetXattr returns the value at 'name'.
+func (x *SimpleExtendedAttributes) GetXattr(opts *vfs.GetXattrOptions) (string, error) {
 	x.mu.RLock()
 	value, ok := x.xattrs[opts.Name]
 	x.mu.RUnlock()
@@ -49,8 +49,8 @@ func (x *SimpleExtendedAttributes) Getxattr(opts *vfs.GetxattrOptions) (string,
 	return value, nil
 }
 
-// Setxattr sets 'value' at 'name'.
-func (x *SimpleExtendedAttributes) Setxattr(opts *vfs.SetxattrOptions) error {
+// SetXattr sets 'value' at 'name'.
+func (x *SimpleExtendedAttributes) SetXattr(opts *vfs.SetXattrOptions) error {
 	x.mu.Lock()
 	defer x.mu.Unlock()
 	if x.xattrs == nil {
@@ -72,8 +72,8 @@ func (x *SimpleExtendedAttributes) Setxattr(opts *vfs.SetxattrOptions) error {
 	return nil
 }
 
-// Listxattr returns all names in xattrs.
-func (x *SimpleExtendedAttributes) Listxattr(size uint64) ([]string, error) {
+// ListXattr returns all names in xattrs.
+func (x *SimpleExtendedAttributes) ListXattr(size uint64) ([]string, error) {
 	// Keep track of the size of the buffer needed in listxattr(2) for the list.
 	listSize := 0
 	x.mu.RLock()
@@ -90,8 +90,8 @@ func (x *SimpleExtendedAttributes) Listxattr(size uint64) ([]string, error) {
 	return names, nil
 }
 
-// Removexattr removes the xattr at 'name'.
-func (x *SimpleExtendedAttributes) Removexattr(name string) error {
+// RemoveXattr removes the xattr at 'name'.
+func (x *SimpleExtendedAttributes) RemoveXattr(name string) error {
 	x.mu.Lock()
 	defer x.mu.Unlock()
 	if _, ok := x.xattrs[name]; !ok {
diff --git a/pkg/sentry/vfs/options.go b/pkg/sentry/vfs/options.go
index dfc8573fde..b33d36cb11 100644
--- a/pkg/sentry/vfs/options.go
+++ b/pkg/sentry/vfs/options.go
@@ -190,10 +190,10 @@ type BoundEndpointOptions struct {
 	Addr string
 }
 
-// GetxattrOptions contains options to VirtualFilesystem.GetxattrAt(),
-// FilesystemImpl.GetxattrAt(), FileDescription.Getxattr(), and
-// FileDescriptionImpl.Getxattr().
-type GetxattrOptions struct {
+// GetXattrOptions contains options to VirtualFilesystem.GetXattrAt(),
+// FilesystemImpl.GetXattrAt(), FileDescription.GetXattr(), and
+// FileDescriptionImpl.GetXattr().
+type GetXattrOptions struct {
 	// Name is the name of the extended attribute to retrieve.
 	Name string
 
@@ -204,10 +204,10 @@ type GetxattrOptions struct {
 	Size uint64
 }
 
-// SetxattrOptions contains options to VirtualFilesystem.SetxattrAt(),
-// FilesystemImpl.SetxattrAt(), FileDescription.Setxattr(), and
-// FileDescriptionImpl.Setxattr().
-type SetxattrOptions struct {
+// SetXattrOptions contains options to VirtualFilesystem.SetXattrAt(),
+// FilesystemImpl.SetXattrAt(), FileDescription.SetXattr(), and
+// FileDescriptionImpl.SetXattr().
+type SetXattrOptions struct {
 	// Name is the name of the extended attribute being mutated.
 	Name string
 
diff --git a/pkg/sentry/vfs/vfs.go b/pkg/sentry/vfs/vfs.go
index ec27562d6c..6825d81a56 100644
--- a/pkg/sentry/vfs/vfs.go
+++ b/pkg/sentry/vfs/vfs.go
@@ -687,12 +687,12 @@ func (vfs *VirtualFilesystem) BoundEndpointAt(ctx context.Context, creds *auth.C
 	}
 }
 
-// ListxattrAt returns all extended attribute names for the file at the given
+// ListXattrAt returns all extended attribute names for the file at the given
 // path.
-func (vfs *VirtualFilesystem) ListxattrAt(ctx context.Context, creds *auth.Credentials, pop *PathOperation, size uint64) ([]string, error) {
+func (vfs *VirtualFilesystem) ListXattrAt(ctx context.Context, creds *auth.Credentials, pop *PathOperation, size uint64) ([]string, error) {
 	rp := vfs.getResolvingPath(creds, pop)
 	for {
-		names, err := rp.mount.fs.impl.ListxattrAt(ctx, rp, size)
+		names, err := rp.mount.fs.impl.ListXattrAt(ctx, rp, size)
 		if err == nil {
 			vfs.putResolvingPath(ctx, rp)
 			return names, nil
@@ -712,12 +712,12 @@ func (vfs *VirtualFilesystem) ListxattrAt(ctx context.Context, creds *auth.Crede
 	}
 }
 
-// GetxattrAt returns the value associated with the given extended attribute
+// GetXattrAt returns the value associated with the given extended attribute
 // for the file at the given path.
-func (vfs *VirtualFilesystem) GetxattrAt(ctx context.Context, creds *auth.Credentials, pop *PathOperation, opts *GetxattrOptions) (string, error) {
+func (vfs *VirtualFilesystem) GetXattrAt(ctx context.Context, creds *auth.Credentials, pop *PathOperation, opts *GetXattrOptions) (string, error) {
 	rp := vfs.getResolvingPath(creds, pop)
 	for {
-		val, err := rp.mount.fs.impl.GetxattrAt(ctx, rp, *opts)
+		val, err := rp.mount.fs.impl.GetXattrAt(ctx, rp, *opts)
 		if err == nil {
 			vfs.putResolvingPath(ctx, rp)
 			return val, nil
@@ -729,12 +729,12 @@ func (vfs *VirtualFilesystem) GetxattrAt(ctx context.Context, creds *auth.Creden
 	}
 }
 
-// SetxattrAt changes the value associated with the given extended attribute
+// SetXattrAt changes the value associated with the given extended attribute
 // for the file at the given path.
-func (vfs *VirtualFilesystem) SetxattrAt(ctx context.Context, creds *auth.Credentials, pop *PathOperation, opts *SetxattrOptions) error {
+func (vfs *VirtualFilesystem) SetXattrAt(ctx context.Context, creds *auth.Credentials, pop *PathOperation, opts *SetXattrOptions) error {
 	rp := vfs.getResolvingPath(creds, pop)
 	for {
-		err := rp.mount.fs.impl.SetxattrAt(ctx, rp, *opts)
+		err := rp.mount.fs.impl.SetXattrAt(ctx, rp, *opts)
 		if err == nil {
 			vfs.putResolvingPath(ctx, rp)
 			return nil
@@ -746,11 +746,11 @@ func (vfs *VirtualFilesystem) SetxattrAt(ctx context.Context, creds *auth.Creden
 	}
 }
 
-// RemovexattrAt removes the given extended attribute from the file at rp.
-func (vfs *VirtualFilesystem) RemovexattrAt(ctx context.Context, creds *auth.Credentials, pop *PathOperation, name string) error {
+// RemoveXattrAt removes the given extended attribute from the file at rp.
+func (vfs *VirtualFilesystem) RemoveXattrAt(ctx context.Context, creds *auth.Credentials, pop *PathOperation, name string) error {
 	rp := vfs.getResolvingPath(creds, pop)
 	for {
-		err := rp.mount.fs.impl.RemovexattrAt(ctx, rp, name)
+		err := rp.mount.fs.impl.RemoveXattrAt(ctx, rp, name)
 		if err == nil {
 			vfs.putResolvingPath(ctx, rp)
 			return nil
diff --git a/test/syscalls/linux/xattr.cc b/test/syscalls/linux/xattr.cc
index 5510a87a00..1a1010bb56 100644
--- a/test/syscalls/linux/xattr.cc
+++ b/test/syscalls/linux/xattr.cc
@@ -232,7 +232,7 @@ TEST_F(XattrTest, XattrOnInvalidFileTypes) {
   EXPECT_THAT(removexattr(path, name), SyscallFailsWithErrno(EPERM));
 }
 
-TEST_F(XattrTest, SetxattrSizeSmallerThanValue) {
+TEST_F(XattrTest, SetXattrSizeSmallerThanValue) {
   const char* path = test_file_name_.c_str();
   const char name[] = "user.test";
   std::vector<char> val = {'a', 'a'};
@@ -247,7 +247,7 @@ TEST_F(XattrTest, SetxattrSizeSmallerThanValue) {
   EXPECT_EQ(buf, expected_buf);
 }
 
-TEST_F(XattrTest, SetxattrZeroSize) {
+TEST_F(XattrTest, SetXattrZeroSize) {
   const char* path = test_file_name_.c_str();
   const char name[] = "user.test";
   char val = 'a';
@@ -259,7 +259,7 @@ TEST_F(XattrTest, SetxattrZeroSize) {
   EXPECT_EQ(buf, '-');
 }
 
-TEST_F(XattrTest, SetxattrSizeTooLarge) {
+TEST_F(XattrTest, SetXattrSizeTooLarge) {
   const char* path = test_file_name_.c_str();
   const char name[] = "user.test";
 
@@ -274,7 +274,7 @@ TEST_F(XattrTest, SetxattrSizeTooLarge) {
   EXPECT_THAT(getxattr(path, name, nullptr, 0), SyscallFailsWithErrno(ENODATA));
 }
 
-TEST_F(XattrTest, SetxattrNullValueAndNonzeroSize) {
+TEST_F(XattrTest, SetXattrNullValueAndNonzeroSize) {
   const char* path = test_file_name_.c_str();
   const char name[] = "user.test";
   EXPECT_THAT(setxattr(path, name, nullptr, 1, /*flags=*/0),
@@ -283,7 +283,7 @@ TEST_F(XattrTest, SetxattrNullValueAndNonzeroSize) {
   EXPECT_THAT(getxattr(path, name, nullptr, 0), SyscallFailsWithErrno(ENODATA));
 }
 
-TEST_F(XattrTest, SetxattrNullValueAndZeroSize) {
+TEST_F(XattrTest, SetXattrNullValueAndZeroSize) {
   const char* path = test_file_name_.c_str();
   const char name[] = "user.test";
   EXPECT_THAT(setxattr(path, name, nullptr, 0, /*flags=*/0), SyscallSucceeds());
@@ -291,7 +291,7 @@ TEST_F(XattrTest, SetxattrNullValueAndZeroSize) {
   EXPECT_THAT(getxattr(path, name, nullptr, 0), SyscallSucceedsWithValue(0));
 }
 
-TEST_F(XattrTest, SetxattrValueTooLargeButOKSize) {
+TEST_F(XattrTest, SetXattrValueTooLargeButOKSize) {
   const char* path = test_file_name_.c_str();
   const char name[] = "user.test";
   std::vector<char> val(XATTR_SIZE_MAX + 1);
@@ -307,7 +307,7 @@ TEST_F(XattrTest, SetxattrValueTooLargeButOKSize) {
   EXPECT_EQ(buf, expected_buf);
 }
 
-TEST_F(XattrTest, SetxattrReplaceWithSmaller) {
+TEST_F(XattrTest, SetXattrReplaceWithSmaller) {
   const char* path = test_file_name_.c_str();
   const char name[] = "user.test";
   std::vector<char> val = {'a', 'a'};
@@ -322,7 +322,7 @@ TEST_F(XattrTest, SetxattrReplaceWithSmaller) {
   EXPECT_EQ(buf, expected_buf);
 }
 
-TEST_F(XattrTest, SetxattrReplaceWithLarger) {
+TEST_F(XattrTest, SetXattrReplaceWithLarger) {
   const char* path = test_file_name_.c_str();
   const char name[] = "user.test";
   std::vector<char> val = {'a', 'a'};
@@ -336,7 +336,7 @@ TEST_F(XattrTest, SetxattrReplaceWithLarger) {
   EXPECT_EQ(buf, val);
 }
 
-TEST_F(XattrTest, SetxattrCreateFlag) {
+TEST_F(XattrTest, SetXattrCreateFlag) {
   const char* path = test_file_name_.c_str();
   const char name[] = "user.test";
   EXPECT_THAT(setxattr(path, name, nullptr, 0, XATTR_CREATE),
@@ -347,7 +347,7 @@ TEST_F(XattrTest, SetxattrCreateFlag) {
   EXPECT_THAT(getxattr(path, name, nullptr, 0), SyscallSucceedsWithValue(0));
 }
 
-TEST_F(XattrTest, SetxattrReplaceFlag) {
+TEST_F(XattrTest, SetXattrReplaceFlag) {
   const char* path = test_file_name_.c_str();
   const char name[] = "user.test";
   EXPECT_THAT(setxattr(path, name, nullptr, 0, XATTR_REPLACE),
@@ -359,14 +359,14 @@ TEST_F(XattrTest, SetxattrReplaceFlag) {
   EXPECT_THAT(getxattr(path, name, nullptr, 0), SyscallSucceedsWithValue(0));
 }
 
-TEST_F(XattrTest, SetxattrInvalidFlags) {
+TEST_F(XattrTest, SetXattrInvalidFlags) {
   const char* path = test_file_name_.c_str();
   int invalid_flags = 0xff;
   EXPECT_THAT(setxattr(path, nullptr, nullptr, 0, invalid_flags),
               SyscallFailsWithErrno(EINVAL));
 }
 
-TEST_F(XattrTest, Getxattr) {
+TEST_F(XattrTest, GetXattr) {
   const char* path = test_file_name_.c_str();
   const char name[] = "user.test";
   int val = 1234;
@@ -378,7 +378,7 @@ TEST_F(XattrTest, Getxattr) {
   EXPECT_EQ(buf, val);
 }
 
-TEST_F(XattrTest, GetxattrSizeSmallerThanValue) {
+TEST_F(XattrTest, GetXattrSizeSmallerThanValue) {
   const char* path = test_file_name_.c_str();
   const char name[] = "user.test";
   std::vector<char> val = {'a', 'a'};
@@ -390,7 +390,7 @@ TEST_F(XattrTest, GetxattrSizeSmallerThanValue) {
   EXPECT_EQ(buf, '-');
 }
 
-TEST_F(XattrTest, GetxattrSizeLargerThanValue) {
+TEST_F(XattrTest, GetXattrSizeLargerThanValue) {
   const char* path = test_file_name_.c_str();
   const char name[] = "user.test";
   char val = 'a';
@@ -405,7 +405,7 @@ TEST_F(XattrTest, GetxattrSizeLargerThanValue) {
   EXPECT_EQ(buf, expected_buf);
 }
 
-TEST_F(XattrTest, GetxattrZeroSize) {
+TEST_F(XattrTest, GetXattrZeroSize) {
   const char* path = test_file_name_.c_str();
   const char name[] = "user.test";
   char val = 'a';
@@ -418,7 +418,7 @@ TEST_F(XattrTest, GetxattrZeroSize) {
   EXPECT_EQ(buf, '-');
 }
 
-TEST_F(XattrTest, GetxattrSizeTooLarge) {
+TEST_F(XattrTest, GetXattrSizeTooLarge) {
   const char* path = test_file_name_.c_str();
   const char name[] = "user.test";
   char val = 'a';
@@ -434,7 +434,7 @@ TEST_F(XattrTest, GetxattrSizeTooLarge) {
   EXPECT_EQ(buf, expected_buf);
 }
 
-TEST_F(XattrTest, GetxattrNullValue) {
+TEST_F(XattrTest, GetXattrNullValue) {
   const char* path = test_file_name_.c_str();
   const char name[] = "user.test";
   char val = 'a';
@@ -445,7 +445,7 @@ TEST_F(XattrTest, GetxattrNullValue) {
               SyscallFailsWithErrno(EFAULT));
 }
 
-TEST_F(XattrTest, GetxattrNullValueAndZeroSize) {
+TEST_F(XattrTest, GetXattrNullValueAndZeroSize) {
   const char* path = test_file_name_.c_str();
   const char name[] = "user.test";
   char val = 'a';
@@ -461,13 +461,13 @@ TEST_F(XattrTest, GetxattrNullValueAndZeroSize) {
   EXPECT_THAT(getxattr(path, name, nullptr, 0), SyscallSucceedsWithValue(size));
 }
 
-TEST_F(XattrTest, GetxattrNonexistentName) {
+TEST_F(XattrTest, GetXattrNonexistentName) {
   const char* path = test_file_name_.c_str();
   const char name[] = "user.test";
   EXPECT_THAT(getxattr(path, name, nullptr, 0), SyscallFailsWithErrno(ENODATA));
 }
 
-TEST_F(XattrTest, Listxattr) {
+TEST_F(XattrTest, ListXattr) {
   const char* path = test_file_name_.c_str();
   const std::string name = "user.test";
   const std::string name2 = "user.test2";
@@ -493,7 +493,7 @@ TEST_F(XattrTest, Listxattr) {
   EXPECT_EQ(got, expected);
 }
 
-TEST_F(XattrTest, ListxattrNoXattrs) {
+TEST_F(XattrTest, ListXattrNoXattrs) {
   const char* path = test_file_name_.c_str();
 
   std::vector<char> list, expected;
@@ -501,13 +501,13 @@ TEST_F(XattrTest, ListxattrNoXattrs) {
               SyscallSucceedsWithValue(0));
   EXPECT_EQ(list, expected);
 
-  // Listxattr should succeed if there are no attributes, even if the buffer
+  // ListXattr should succeed if there are no attributes, even if the buffer
   // passed in is a nullptr.
   EXPECT_THAT(listxattr(path, nullptr, sizeof(list)),
               SyscallSucceedsWithValue(0));
 }
 
-TEST_F(XattrTest, ListxattrNullBuffer) {
+TEST_F(XattrTest, ListXattrNullBuffer) {
   const char* path = test_file_name_.c_str();
   const char name[] = "user.test";
   EXPECT_THAT(setxattr(path, name, nullptr, 0, /*flags=*/0), SyscallSucceeds());
@@ -516,7 +516,7 @@ TEST_F(XattrTest, ListxattrNullBuffer) {
               SyscallFailsWithErrno(EFAULT));
 }
 
-TEST_F(XattrTest, ListxattrSizeTooSmall) {
+TEST_F(XattrTest, ListXattrSizeTooSmall) {
   const char* path = test_file_name_.c_str();
   const char name[] = "user.test";
   EXPECT_THAT(setxattr(path, name, nullptr, 0, /*flags=*/0), SyscallSucceeds());
@@ -526,7 +526,7 @@ TEST_F(XattrTest, ListxattrSizeTooSmall) {
               SyscallFailsWithErrno(ERANGE));
 }
 
-TEST_F(XattrTest, ListxattrZeroSize) {
+TEST_F(XattrTest, ListXattrZeroSize) {
   const char* path = test_file_name_.c_str();
   const char name[] = "user.test";
   EXPECT_THAT(setxattr(path, name, nullptr, 0, /*flags=*/0), SyscallSucceeds());

From 89581f6495f201344227f3571eda8f9305e77d06 Mon Sep 17 00:00:00 2001
From: Ghanan Gowripalan <ghanan@google.com>
Date: Tue, 8 Sep 2020 12:15:58 -0700
Subject: [PATCH 153/211] Improve type safety for transport protocol options

The existing implementation for TransportProtocol.{Set}Option take
arguments of an empty interface type which all types (implicitly)
implement; any type may be passed to the functions.

This change introduces marker interfaces for transport protocol options
that may be set or queried which transport protocol option types
implement to ensure that invalid types are caught at compile time.
Different interfaces are used to allow the compiler to enforce read-only
or set-only socket options.

RELNOTES: n/a
PiperOrigin-RevId: 330559811
---
 pkg/sentry/socket/netstack/stack.go           |  22 ++-
 pkg/tcpip/stack/registration.go               |   4 +-
 pkg/tcpip/stack/stack.go                      |   4 +-
 pkg/tcpip/stack/transport_test.go             |  59 ++----
 pkg/tcpip/tcpip.go                            | 127 ++++++++++++-
 pkg/tcpip/transport/icmp/protocol.go          |   4 +-
 pkg/tcpip/transport/tcp/connect.go            |   2 +-
 pkg/tcpip/transport/tcp/dual_stack_test.go    |   5 +-
 pkg/tcpip/transport/tcp/endpoint.go           |  20 +-
 pkg/tcpip/transport/tcp/endpoint_state.go     |   4 +-
 pkg/tcpip/transport/tcp/protocol.go           | 178 +++++++-----------
 pkg/tcpip/transport/tcp/tcp_sack_test.go      |  15 +-
 pkg/tcpip/transport/tcp/tcp_test.go           | 164 +++++++++-------
 pkg/tcpip/transport/tcp/tcp_timestamp_test.go |  10 +-
 .../transport/tcp/testing/context/context.go  |  17 +-
 pkg/tcpip/transport/udp/protocol.go           |   4 +-
 runsc/boot/loader.go                          |  28 ++-
 test/benchmarks/tcp/tcp_proxy.go              |  19 +-
 18 files changed, 399 insertions(+), 287 deletions(-)

diff --git a/pkg/sentry/socket/netstack/stack.go b/pkg/sentry/socket/netstack/stack.go
index f0fe18684b..36144e1eb4 100644
--- a/pkg/sentry/socket/netstack/stack.go
+++ b/pkg/sentry/socket/netstack/stack.go
@@ -155,7 +155,7 @@ func (s *Stack) AddInterfaceAddr(idx int32, addr inet.InterfaceAddr) error {
 
 // TCPReceiveBufferSize implements inet.Stack.TCPReceiveBufferSize.
 func (s *Stack) TCPReceiveBufferSize() (inet.TCPBufferSize, error) {
-	var rs tcp.ReceiveBufferSizeOption
+	var rs tcpip.TCPReceiveBufferSizeRangeOption
 	err := s.Stack.TransportProtocolOption(tcp.ProtocolNumber, &rs)
 	return inet.TCPBufferSize{
 		Min:     rs.Min,
@@ -166,17 +166,17 @@ func (s *Stack) TCPReceiveBufferSize() (inet.TCPBufferSize, error) {
 
 // SetTCPReceiveBufferSize implements inet.Stack.SetTCPReceiveBufferSize.
 func (s *Stack) SetTCPReceiveBufferSize(size inet.TCPBufferSize) error {
-	rs := tcp.ReceiveBufferSizeOption{
+	rs := tcpip.TCPReceiveBufferSizeRangeOption{
 		Min:     size.Min,
 		Default: size.Default,
 		Max:     size.Max,
 	}
-	return syserr.TranslateNetstackError(s.Stack.SetTransportProtocolOption(tcp.ProtocolNumber, rs)).ToError()
+	return syserr.TranslateNetstackError(s.Stack.SetTransportProtocolOption(tcp.ProtocolNumber, &rs)).ToError()
 }
 
 // TCPSendBufferSize implements inet.Stack.TCPSendBufferSize.
 func (s *Stack) TCPSendBufferSize() (inet.TCPBufferSize, error) {
-	var ss tcp.SendBufferSizeOption
+	var ss tcpip.TCPSendBufferSizeRangeOption
 	err := s.Stack.TransportProtocolOption(tcp.ProtocolNumber, &ss)
 	return inet.TCPBufferSize{
 		Min:     ss.Min,
@@ -187,29 +187,30 @@ func (s *Stack) TCPSendBufferSize() (inet.TCPBufferSize, error) {
 
 // SetTCPSendBufferSize implements inet.Stack.SetTCPSendBufferSize.
 func (s *Stack) SetTCPSendBufferSize(size inet.TCPBufferSize) error {
-	ss := tcp.SendBufferSizeOption{
+	ss := tcpip.TCPSendBufferSizeRangeOption{
 		Min:     size.Min,
 		Default: size.Default,
 		Max:     size.Max,
 	}
-	return syserr.TranslateNetstackError(s.Stack.SetTransportProtocolOption(tcp.ProtocolNumber, ss)).ToError()
+	return syserr.TranslateNetstackError(s.Stack.SetTransportProtocolOption(tcp.ProtocolNumber, &ss)).ToError()
 }
 
 // TCPSACKEnabled implements inet.Stack.TCPSACKEnabled.
 func (s *Stack) TCPSACKEnabled() (bool, error) {
-	var sack tcp.SACKEnabled
+	var sack tcpip.TCPSACKEnabled
 	err := s.Stack.TransportProtocolOption(tcp.ProtocolNumber, &sack)
 	return bool(sack), syserr.TranslateNetstackError(err).ToError()
 }
 
 // SetTCPSACKEnabled implements inet.Stack.SetTCPSACKEnabled.
 func (s *Stack) SetTCPSACKEnabled(enabled bool) error {
-	return syserr.TranslateNetstackError(s.Stack.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.SACKEnabled(enabled))).ToError()
+	opt := tcpip.TCPSACKEnabled(enabled)
+	return syserr.TranslateNetstackError(s.Stack.SetTransportProtocolOption(tcp.ProtocolNumber, &opt)).ToError()
 }
 
 // TCPRecovery implements inet.Stack.TCPRecovery.
 func (s *Stack) TCPRecovery() (inet.TCPLossRecovery, error) {
-	var recovery tcp.Recovery
+	var recovery tcpip.TCPRecovery
 	if err := s.Stack.TransportProtocolOption(tcp.ProtocolNumber, &recovery); err != nil {
 		return 0, syserr.TranslateNetstackError(err).ToError()
 	}
@@ -218,7 +219,8 @@ func (s *Stack) TCPRecovery() (inet.TCPLossRecovery, error) {
 
 // SetTCPRecovery implements inet.Stack.SetTCPRecovery.
 func (s *Stack) SetTCPRecovery(recovery inet.TCPLossRecovery) error {
-	return syserr.TranslateNetstackError(s.Stack.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.Recovery(recovery))).ToError()
+	opt := tcpip.TCPRecovery(recovery)
+	return syserr.TranslateNetstackError(s.Stack.SetTransportProtocolOption(tcp.ProtocolNumber, &opt)).ToError()
 }
 
 // Statistics implements inet.Stack.Statistics.
diff --git a/pkg/tcpip/stack/registration.go b/pkg/tcpip/stack/registration.go
index 2d88fa1f78..4fa86a3ac6 100644
--- a/pkg/tcpip/stack/registration.go
+++ b/pkg/tcpip/stack/registration.go
@@ -159,12 +159,12 @@ type TransportProtocol interface {
 	// SetOption allows enabling/disabling protocol specific features.
 	// SetOption returns an error if the option is not supported or the
 	// provided option value is invalid.
-	SetOption(option interface{}) *tcpip.Error
+	SetOption(option tcpip.SettableTransportProtocolOption) *tcpip.Error
 
 	// Option allows retrieving protocol specific option values.
 	// Option returns an error if the option is not supported or the
 	// provided option value is invalid.
-	Option(option interface{}) *tcpip.Error
+	Option(option tcpip.GettableTransportProtocolOption) *tcpip.Error
 
 	// Close requests that any worker goroutines owned by the protocol
 	// stop.
diff --git a/pkg/tcpip/stack/stack.go b/pkg/tcpip/stack/stack.go
index 133d90815d..def8b0b435 100644
--- a/pkg/tcpip/stack/stack.go
+++ b/pkg/tcpip/stack/stack.go
@@ -817,7 +817,7 @@ func (s *Stack) NetworkProtocolOption(network tcpip.NetworkProtocolNumber, optio
 // options. This method returns an error if the protocol is not supported or
 // option is not supported by the protocol implementation or the provided value
 // is incorrect.
-func (s *Stack) SetTransportProtocolOption(transport tcpip.TransportProtocolNumber, option interface{}) *tcpip.Error {
+func (s *Stack) SetTransportProtocolOption(transport tcpip.TransportProtocolNumber, option tcpip.SettableTransportProtocolOption) *tcpip.Error {
 	transProtoState, ok := s.transportProtocols[transport]
 	if !ok {
 		return tcpip.ErrUnknownProtocol
@@ -832,7 +832,7 @@ func (s *Stack) SetTransportProtocolOption(transport tcpip.TransportProtocolNumb
 // if err := s.TransportProtocolOption(tcpip.TCPProtocolNumber, &v); err != nil {
 //   ...
 // }
-func (s *Stack) TransportProtocolOption(transport tcpip.TransportProtocolNumber, option interface{}) *tcpip.Error {
+func (s *Stack) TransportProtocolOption(transport tcpip.TransportProtocolNumber, option tcpip.GettableTransportProtocolOption) *tcpip.Error {
 	transProtoState, ok := s.transportProtocols[transport]
 	if !ok {
 		return tcpip.ErrUnknownProtocol
diff --git a/pkg/tcpip/stack/transport_test.go b/pkg/tcpip/stack/transport_test.go
index 9292bfccba..ef3457e32c 100644
--- a/pkg/tcpip/stack/transport_test.go
+++ b/pkg/tcpip/stack/transport_test.go
@@ -291,22 +291,20 @@ func (*fakeTransportProtocol) HandleUnknownDestinationPacket(*stack.Route, stack
 	return true
 }
 
-func (f *fakeTransportProtocol) SetOption(option interface{}) *tcpip.Error {
+func (f *fakeTransportProtocol) SetOption(option tcpip.SettableTransportProtocolOption) *tcpip.Error {
 	switch v := option.(type) {
-	case fakeTransportGoodOption:
-		f.opts.good = bool(v)
+	case *tcpip.TCPModerateReceiveBufferOption:
+		f.opts.good = bool(*v)
 		return nil
-	case fakeTransportInvalidValueOption:
-		return tcpip.ErrInvalidOptionValue
 	default:
 		return tcpip.ErrUnknownProtocolOption
 	}
 }
 
-func (f *fakeTransportProtocol) Option(option interface{}) *tcpip.Error {
+func (f *fakeTransportProtocol) Option(option tcpip.GettableTransportProtocolOption) *tcpip.Error {
 	switch v := option.(type) {
-	case *fakeTransportGoodOption:
-		*v = fakeTransportGoodOption(f.opts.good)
+	case *tcpip.TCPModerateReceiveBufferOption:
+		*v = tcpip.TCPModerateReceiveBufferOption(f.opts.good)
 		return nil
 	default:
 		return tcpip.ErrUnknownProtocolOption
@@ -533,41 +531,16 @@ func TestTransportOptions(t *testing.T) {
 		TransportProtocols: []stack.TransportProtocol{fakeTransFactory()},
 	})
 
-	// Try an unsupported transport protocol.
-	if err := s.SetTransportProtocolOption(tcpip.TransportProtocolNumber(99999), fakeTransportGoodOption(false)); err != tcpip.ErrUnknownProtocol {
-		t.Fatalf("SetTransportProtocolOption(fakeTrans2, blah, false) = %v, want = tcpip.ErrUnknownProtocol", err)
-	}
-
-	testCases := []struct {
-		option   interface{}
-		wantErr  *tcpip.Error
-		verifier func(t *testing.T, p stack.TransportProtocol)
-	}{
-		{fakeTransportGoodOption(true), nil, func(t *testing.T, p stack.TransportProtocol) {
-			t.Helper()
-			fakeTrans := p.(*fakeTransportProtocol)
-			if fakeTrans.opts.good != true {
-				t.Fatalf("fakeTrans.opts.good = false, want = true")
-			}
-			var v fakeTransportGoodOption
-			if err := s.TransportProtocolOption(fakeTransNumber, &v); err != nil {
-				t.Fatalf("s.TransportProtocolOption(fakeTransNumber, &v) = %v, want = nil, where v is option %T", v, err)
-			}
-			if v != true {
-				t.Fatalf("s.TransportProtocolOption(fakeTransNumber, &v) returned v = %v, want = true", v)
-			}
-
-		}},
-		{fakeTransportBadOption(true), tcpip.ErrUnknownProtocolOption, nil},
-		{fakeTransportInvalidValueOption(1), tcpip.ErrInvalidOptionValue, nil},
-	}
-	for _, tc := range testCases {
-		if got := s.SetTransportProtocolOption(fakeTransNumber, tc.option); got != tc.wantErr {
-			t.Errorf("s.SetTransportProtocolOption(fakeTrans, %v) = %v, want = %v", tc.option, got, tc.wantErr)
-		}
-		if tc.verifier != nil {
-			tc.verifier(t, s.TransportProtocolInstance(fakeTransNumber))
-		}
+	v := tcpip.TCPModerateReceiveBufferOption(true)
+	if err := s.SetTransportProtocolOption(fakeTransNumber, &v); err != nil {
+		t.Errorf("s.SetTransportProtocolOption(fakeTrans, &%T(%t)): %s", v, v, err)
+	}
+	v = false
+	if err := s.TransportProtocolOption(fakeTransNumber, &v); err != nil {
+		t.Fatalf("s.TransportProtocolOption(fakeTransNumber, &%T): %s", v, err)
+	}
+	if !v {
+		t.Fatalf("got tcpip.TCPModerateReceiveBufferOption = false, want = true")
 	}
 }
 
diff --git a/pkg/tcpip/tcpip.go b/pkg/tcpip/tcpip.go
index 8ba6155217..5e34e27ba9 100644
--- a/pkg/tcpip/tcpip.go
+++ b/pkg/tcpip/tcpip.go
@@ -864,12 +864,93 @@ func (*DefaultTTLOption) isGettableNetworkProtocolOption() {}
 
 func (*DefaultTTLOption) isSettableNetworkProtocolOption() {}
 
-// AvailableCongestionControlOption is used to query the supported congestion
-// control algorithms.
-type AvailableCongestionControlOption string
+// GettableTransportProtocolOption is a marker interface for transport protocol
+// options that may be queried.
+type GettableTransportProtocolOption interface {
+	isGettableTransportProtocolOption()
+}
+
+// SettableTransportProtocolOption is a marker interface for transport protocol
+// options that may be set.
+type SettableTransportProtocolOption interface {
+	isSettableTransportProtocolOption()
+}
+
+// TCPSACKEnabled the SACK option for TCP.
+//
+// See: https://tools.ietf.org/html/rfc2018.
+type TCPSACKEnabled bool
+
+func (*TCPSACKEnabled) isGettableTransportProtocolOption() {}
+
+func (*TCPSACKEnabled) isSettableTransportProtocolOption() {}
+
+// TCPRecovery is the loss deteoction algorithm used by TCP.
+type TCPRecovery int32
+
+func (*TCPRecovery) isGettableTransportProtocolOption() {}
+
+func (*TCPRecovery) isSettableTransportProtocolOption() {}
+
+const (
+	// TCPRACKLossDetection indicates RACK is used for loss detection and
+	// recovery.
+	TCPRACKLossDetection TCPRecovery = 1 << iota
+
+	// TCPRACKStaticReoWnd indicates the reordering window should not be
+	// adjusted when DSACK is received.
+	TCPRACKStaticReoWnd
+
+	// TCPRACKNoDupTh indicates RACK should not consider the classic three
+	// duplicate acknowledgements rule to mark the segments as lost. This
+	// is used when reordering is not detected.
+	TCPRACKNoDupTh
+)
+
+// TCPDelayEnabled enables/disables Nagle's algorithm in TCP.
+type TCPDelayEnabled bool
+
+func (*TCPDelayEnabled) isGettableTransportProtocolOption() {}
+
+func (*TCPDelayEnabled) isSettableTransportProtocolOption() {}
+
+// TCPSendBufferSizeRangeOption is the send buffer size range for TCP.
+type TCPSendBufferSizeRangeOption struct {
+	Min     int
+	Default int
+	Max     int
+}
+
+func (*TCPSendBufferSizeRangeOption) isGettableTransportProtocolOption() {}
+
+func (*TCPSendBufferSizeRangeOption) isSettableTransportProtocolOption() {}
+
+// TCPReceiveBufferSizeRangeOption is the receive buffer size range for TCP.
+type TCPReceiveBufferSizeRangeOption struct {
+	Min     int
+	Default int
+	Max     int
+}
+
+func (*TCPReceiveBufferSizeRangeOption) isGettableTransportProtocolOption() {}
+
+func (*TCPReceiveBufferSizeRangeOption) isSettableTransportProtocolOption() {}
+
+// TCPAvailableCongestionControlOption is the supported congestion control
+// algorithms for TCP
+type TCPAvailableCongestionControlOption string
+
+func (*TCPAvailableCongestionControlOption) isGettableTransportProtocolOption() {}
+
+func (*TCPAvailableCongestionControlOption) isSettableTransportProtocolOption() {}
+
+// TCPModerateReceiveBufferOption enables/disables receive buffer moderation
+// for TCP.
+type TCPModerateReceiveBufferOption bool
 
-// ModerateReceiveBufferOption is used by buffer moderation.
-type ModerateReceiveBufferOption bool
+func (*TCPModerateReceiveBufferOption) isGettableTransportProtocolOption() {}
+
+func (*TCPModerateReceiveBufferOption) isSettableTransportProtocolOption() {}
 
 // GettableSocketOption is a marker interface for socket options that may be
 // queried.
@@ -935,6 +1016,10 @@ func (*CongestionControlOption) isGettableSocketOption() {}
 
 func (*CongestionControlOption) isSettableSocketOption() {}
 
+func (*CongestionControlOption) isGettableTransportProtocolOption() {}
+
+func (*CongestionControlOption) isSettableTransportProtocolOption() {}
+
 // TCPLingerTimeoutOption is used by SetSockOpt/GetSockOpt to set/get the
 // maximum duration for which a socket lingers in the TCP_FIN_WAIT_2 state
 // before being marked closed.
@@ -944,6 +1029,10 @@ func (*TCPLingerTimeoutOption) isGettableSocketOption() {}
 
 func (*TCPLingerTimeoutOption) isSettableSocketOption() {}
 
+func (*TCPLingerTimeoutOption) isGettableTransportProtocolOption() {}
+
+func (*TCPLingerTimeoutOption) isSettableTransportProtocolOption() {}
+
 // TCPTimeWaitTimeoutOption is used by SetSockOpt/GetSockOpt to set/get the
 // maximum duration for which a socket lingers in the TIME_WAIT state
 // before being marked closed.
@@ -953,6 +1042,10 @@ func (*TCPTimeWaitTimeoutOption) isGettableSocketOption() {}
 
 func (*TCPTimeWaitTimeoutOption) isSettableSocketOption() {}
 
+func (*TCPTimeWaitTimeoutOption) isGettableTransportProtocolOption() {}
+
+func (*TCPTimeWaitTimeoutOption) isSettableTransportProtocolOption() {}
+
 // TCPDeferAcceptOption is used by SetSockOpt/GetSockOpt to allow a
 // accept to return a completed connection only when there is data to be
 // read. This usually means the listening socket will drop the final ACK
@@ -971,6 +1064,10 @@ func (*TCPMinRTOOption) isGettableSocketOption() {}
 
 func (*TCPMinRTOOption) isSettableSocketOption() {}
 
+func (*TCPMinRTOOption) isGettableTransportProtocolOption() {}
+
+func (*TCPMinRTOOption) isSettableTransportProtocolOption() {}
+
 // TCPMaxRTOOption is use by SetSockOpt/GetSockOpt to allow overriding
 // default MaxRTO used by the Stack.
 type TCPMaxRTOOption time.Duration
@@ -979,6 +1076,10 @@ func (*TCPMaxRTOOption) isGettableSocketOption() {}
 
 func (*TCPMaxRTOOption) isSettableSocketOption() {}
 
+func (*TCPMaxRTOOption) isGettableTransportProtocolOption() {}
+
+func (*TCPMaxRTOOption) isSettableTransportProtocolOption() {}
+
 // TCPMaxRetriesOption is used by SetSockOpt/GetSockOpt to set/get the
 // maximum number of retransmits after which we time out the connection.
 type TCPMaxRetriesOption uint64
@@ -987,6 +1088,10 @@ func (*TCPMaxRetriesOption) isGettableSocketOption() {}
 
 func (*TCPMaxRetriesOption) isSettableSocketOption() {}
 
+func (*TCPMaxRetriesOption) isGettableTransportProtocolOption() {}
+
+func (*TCPMaxRetriesOption) isSettableTransportProtocolOption() {}
+
 // TCPSynRcvdCountThresholdOption is used by SetSockOpt/GetSockOpt to specify
 // the number of endpoints that can be in SYN-RCVD state before the stack
 // switches to using SYN cookies.
@@ -996,6 +1101,10 @@ func (*TCPSynRcvdCountThresholdOption) isGettableSocketOption() {}
 
 func (*TCPSynRcvdCountThresholdOption) isSettableSocketOption() {}
 
+func (*TCPSynRcvdCountThresholdOption) isGettableTransportProtocolOption() {}
+
+func (*TCPSynRcvdCountThresholdOption) isSettableTransportProtocolOption() {}
+
 // TCPSynRetriesOption is used by SetSockOpt/GetSockOpt to specify stack-wide
 // default for number of times SYN is retransmitted before aborting a connect.
 type TCPSynRetriesOption uint8
@@ -1004,6 +1113,10 @@ func (*TCPSynRetriesOption) isGettableSocketOption() {}
 
 func (*TCPSynRetriesOption) isSettableSocketOption() {}
 
+func (*TCPSynRetriesOption) isGettableTransportProtocolOption() {}
+
+func (*TCPSynRetriesOption) isSettableTransportProtocolOption() {}
+
 // MulticastInterfaceOption is used by SetSockOpt/GetSockOpt to specify a
 // default interface for multicast.
 type MulticastInterfaceOption struct {
@@ -1062,6 +1175,10 @@ func (*TCPTimeWaitReuseOption) isGettableSocketOption() {}
 
 func (*TCPTimeWaitReuseOption) isSettableSocketOption() {}
 
+func (*TCPTimeWaitReuseOption) isGettableTransportProtocolOption() {}
+
+func (*TCPTimeWaitReuseOption) isSettableTransportProtocolOption() {}
+
 const (
 	// TCPTimeWaitReuseDisabled indicates reuse of port bound by endponts in TIME-WAIT cannot
 	// be reused for new connections.
diff --git a/pkg/tcpip/transport/icmp/protocol.go b/pkg/tcpip/transport/icmp/protocol.go
index 74ef6541e8..bb11e4e83e 100644
--- a/pkg/tcpip/transport/icmp/protocol.go
+++ b/pkg/tcpip/transport/icmp/protocol.go
@@ -109,12 +109,12 @@ func (*protocol) HandleUnknownDestinationPacket(*stack.Route, stack.TransportEnd
 }
 
 // SetOption implements stack.TransportProtocol.SetOption.
-func (*protocol) SetOption(option interface{}) *tcpip.Error {
+func (*protocol) SetOption(tcpip.SettableTransportProtocolOption) *tcpip.Error {
 	return tcpip.ErrUnknownProtocolOption
 }
 
 // Option implements stack.TransportProtocol.Option.
-func (*protocol) Option(option interface{}) *tcpip.Error {
+func (*protocol) Option(tcpip.GettableTransportProtocolOption) *tcpip.Error {
 	return tcpip.ErrUnknownProtocolOption
 }
 
diff --git a/pkg/tcpip/transport/tcp/connect.go b/pkg/tcpip/transport/tcp/connect.go
index 72df5c2a16..09d53d158a 100644
--- a/pkg/tcpip/transport/tcp/connect.go
+++ b/pkg/tcpip/transport/tcp/connect.go
@@ -522,7 +522,7 @@ func (h *handshake) execute() *tcpip.Error {
 	s.AddWaker(&h.ep.newSegmentWaker, wakerForNewSegment)
 	defer s.Done()
 
-	var sackEnabled SACKEnabled
+	var sackEnabled tcpip.TCPSACKEnabled
 	if err := h.ep.stack.TransportProtocolOption(ProtocolNumber, &sackEnabled); err != nil {
 		// If stack returned an error when checking for SACKEnabled
 		// status then just default to switching off SACK negotiation.
diff --git a/pkg/tcpip/transport/tcp/dual_stack_test.go b/pkg/tcpip/transport/tcp/dual_stack_test.go
index 80e9dd4652..94207c141f 100644
--- a/pkg/tcpip/transport/tcp/dual_stack_test.go
+++ b/pkg/tcpip/transport/tcp/dual_stack_test.go
@@ -560,8 +560,9 @@ func TestV4AcceptOnV4(t *testing.T) {
 func testV4ListenClose(t *testing.T, c *context.Context) {
 	// Set the SynRcvd threshold to zero to force a syn cookie based accept
 	// to happen.
-	if err := c.Stack().SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.TCPSynRcvdCountThresholdOption(0)); err != nil {
-		t.Fatalf("setting TCPSynRcvdCountThresholdOption failed: %s", err)
+	var opt tcpip.TCPSynRcvdCountThresholdOption
+	if err := c.Stack().SetTransportProtocolOption(tcp.ProtocolNumber, &opt); err != nil {
+		t.Fatalf("setting TCPSynRcvdCountThresholdOption(%d, &%T(%d)): %s", tcp.ProtocolNumber, opt, opt, err)
 	}
 
 	const n = uint16(32)
diff --git a/pkg/tcpip/transport/tcp/endpoint.go b/pkg/tcpip/transport/tcp/endpoint.go
index 4cf966b65f..8cb769d585 100644
--- a/pkg/tcpip/transport/tcp/endpoint.go
+++ b/pkg/tcpip/transport/tcp/endpoint.go
@@ -849,12 +849,12 @@ func newEndpoint(s *stack.Stack, netProto tcpip.NetworkProtocolNumber, waiterQue
 		maxSynRetries: DefaultSynRetries,
 	}
 
-	var ss SendBufferSizeOption
+	var ss tcpip.TCPSendBufferSizeRangeOption
 	if err := s.TransportProtocolOption(ProtocolNumber, &ss); err == nil {
 		e.sndBufSize = ss.Default
 	}
 
-	var rs ReceiveBufferSizeOption
+	var rs tcpip.TCPReceiveBufferSizeRangeOption
 	if err := s.TransportProtocolOption(ProtocolNumber, &rs); err == nil {
 		e.rcvBufSize = rs.Default
 	}
@@ -864,12 +864,12 @@ func newEndpoint(s *stack.Stack, netProto tcpip.NetworkProtocolNumber, waiterQue
 		e.cc = cs
 	}
 
-	var mrb tcpip.ModerateReceiveBufferOption
+	var mrb tcpip.TCPModerateReceiveBufferOption
 	if err := s.TransportProtocolOption(ProtocolNumber, &mrb); err == nil {
 		e.rcvAutoParams.disabled = !bool(mrb)
 	}
 
-	var de DelayEnabled
+	var de tcpip.TCPDelayEnabled
 	if err := s.TransportProtocolOption(ProtocolNumber, &de); err == nil && de {
 		e.SetSockOptBool(tcpip.DelayOption, true)
 	}
@@ -1609,7 +1609,7 @@ func (e *endpoint) SetSockOptInt(opt tcpip.SockOptInt, v int) *tcpip.Error {
 	case tcpip.ReceiveBufferSizeOption:
 		// Make sure the receive buffer size is within the min and max
 		// allowed.
-		var rs ReceiveBufferSizeOption
+		var rs tcpip.TCPReceiveBufferSizeRangeOption
 		if err := e.stack.TransportProtocolOption(ProtocolNumber, &rs); err == nil {
 			if v < rs.Min {
 				v = rs.Min
@@ -1659,7 +1659,7 @@ func (e *endpoint) SetSockOptInt(opt tcpip.SockOptInt, v int) *tcpip.Error {
 	case tcpip.SendBufferSizeOption:
 		// Make sure the send buffer size is within the min and max
 		// allowed.
-		var ss SendBufferSizeOption
+		var ss tcpip.TCPSendBufferSizeRangeOption
 		if err := e.stack.TransportProtocolOption(ProtocolNumber, &ss); err == nil {
 			if v < ss.Min {
 				v = ss.Min
@@ -1699,7 +1699,7 @@ func (e *endpoint) SetSockOptInt(opt tcpip.SockOptInt, v int) *tcpip.Error {
 				return tcpip.ErrInvalidOptionValue
 			}
 		}
-		var rs ReceiveBufferSizeOption
+		var rs tcpip.TCPReceiveBufferSizeRangeOption
 		if err := e.stack.TransportProtocolOption(ProtocolNumber, &rs); err == nil {
 			if v < rs.Min/2 {
 				v = rs.Min / 2
@@ -1748,7 +1748,7 @@ func (e *endpoint) SetSockOpt(opt tcpip.SettableSocketOption) *tcpip.Error {
 		// Query the available cc algorithms in the stack and
 		// validate that the specified algorithm is actually
 		// supported in the stack.
-		var avail tcpip.AvailableCongestionControlOption
+		var avail tcpip.TCPAvailableCongestionControlOption
 		if err := e.stack.TransportProtocolOption(ProtocolNumber, &avail); err != nil {
 			return err
 		}
@@ -2707,7 +2707,7 @@ func (e *endpoint) receiveBufferSize() int {
 }
 
 func (e *endpoint) maxReceiveBufferSize() int {
-	var rs ReceiveBufferSizeOption
+	var rs tcpip.TCPReceiveBufferSizeRangeOption
 	if err := e.stack.TransportProtocolOption(ProtocolNumber, &rs); err != nil {
 		// As a fallback return the hardcoded max buffer size.
 		return MaxBufferSize
@@ -2787,7 +2787,7 @@ func timeStampOffset() uint32 {
 // if the SYN options indicate that the SACK option was negotiated and the TCP
 // stack is configured to enable TCP SACK option.
 func (e *endpoint) maybeEnableSACKPermitted(synOpts *header.TCPSynOptions) {
-	var v SACKEnabled
+	var v tcpip.TCPSACKEnabled
 	if err := e.stack.TransportProtocolOption(ProtocolNumber, &v); err != nil {
 		// Stack doesn't support SACK. So just return.
 		return
diff --git a/pkg/tcpip/transport/tcp/endpoint_state.go b/pkg/tcpip/transport/tcp/endpoint_state.go
index 723e47ddc5..41d0050f3e 100644
--- a/pkg/tcpip/transport/tcp/endpoint_state.go
+++ b/pkg/tcpip/transport/tcp/endpoint_state.go
@@ -182,14 +182,14 @@ func (e *endpoint) Resume(s *stack.Stack) {
 	epState := e.origEndpointState
 	switch epState {
 	case StateInitial, StateBound, StateListen, StateConnecting, StateEstablished:
-		var ss SendBufferSizeOption
+		var ss tcpip.TCPSendBufferSizeRangeOption
 		if err := e.stack.TransportProtocolOption(ProtocolNumber, &ss); err == nil {
 			if e.sndBufSize < ss.Min || e.sndBufSize > ss.Max {
 				panic(fmt.Sprintf("endpoint.sndBufSize %d is outside the min and max allowed [%d, %d]", e.sndBufSize, ss.Min, ss.Max))
 			}
 		}
 
-		var rs ReceiveBufferSizeOption
+		var rs tcpip.TCPReceiveBufferSizeRangeOption
 		if err := e.stack.TransportProtocolOption(ProtocolNumber, &rs); err == nil {
 			if e.rcvBufSize < rs.Min || e.rcvBufSize > rs.Max {
 				panic(fmt.Sprintf("endpoint.rcvBufSize %d is outside the min and max allowed [%d, %d]", e.rcvBufSize, rs.Min, rs.Max))
diff --git a/pkg/tcpip/transport/tcp/protocol.go b/pkg/tcpip/transport/tcp/protocol.go
index c5afa26801..63ec12be89 100644
--- a/pkg/tcpip/transport/tcp/protocol.go
+++ b/pkg/tcpip/transport/tcp/protocol.go
@@ -79,50 +79,6 @@ const (
 	ccCubic = "cubic"
 )
 
-// SACKEnabled is used by stack.(*Stack).TransportProtocolOption to
-// enable/disable SACK support in TCP. See: https://tools.ietf.org/html/rfc2018.
-type SACKEnabled bool
-
-// Recovery is used by stack.(*Stack).TransportProtocolOption to
-// set loss detection algorithm in TCP.
-type Recovery int32
-
-const (
-	// RACKLossDetection indicates RACK is used for loss detection and
-	// recovery.
-	RACKLossDetection Recovery = 1 << iota
-
-	// RACKStaticReoWnd indicates the reordering window should not be
-	// adjusted when DSACK is received.
-	RACKStaticReoWnd
-
-	// RACKNoDupTh indicates RACK should not consider the classic three
-	// duplicate acknowledgements rule to mark the segments as lost. This
-	// is used when reordering is not detected.
-	RACKNoDupTh
-)
-
-// DelayEnabled is used by stack.(Stack*).TransportProtocolOption to
-// enable/disable Nagle's algorithm in TCP.
-type DelayEnabled bool
-
-// SendBufferSizeOption is used by stack.(Stack*).TransportProtocolOption
-// to get/set the default, min and max TCP send buffer sizes.
-type SendBufferSizeOption struct {
-	Min     int
-	Default int
-	Max     int
-}
-
-// ReceiveBufferSizeOption is used by
-// stack.(Stack*).TransportProtocolOption to get/set the default, min and max
-// TCP receive buffer sizes.
-type ReceiveBufferSizeOption struct {
-	Min     int
-	Default int
-	Max     int
-}
-
 // syncRcvdCounter tracks the number of endpoints in the SYN-RCVD state. The
 // value is protected by a mutex so that we can increment only when it's
 // guaranteed not to go above a threshold.
@@ -183,10 +139,10 @@ func (s *synRcvdCounter) Threshold() uint64 {
 type protocol struct {
 	mu                         sync.RWMutex
 	sackEnabled                bool
-	recovery                   Recovery
+	recovery                   tcpip.TCPRecovery
 	delayEnabled               bool
-	sendBufferSize             SendBufferSizeOption
-	recvBufferSize             ReceiveBufferSizeOption
+	sendBufferSize             tcpip.TCPSendBufferSizeRangeOption
+	recvBufferSize             tcpip.TCPReceiveBufferSizeRangeOption
 	congestionControl          string
 	availableCongestionControl []string
 	moderateReceiveBuffer      bool
@@ -296,49 +252,49 @@ func replyWithReset(s *segment, tos, ttl uint8) {
 }
 
 // SetOption implements stack.TransportProtocol.SetOption.
-func (p *protocol) SetOption(option interface{}) *tcpip.Error {
+func (p *protocol) SetOption(option tcpip.SettableTransportProtocolOption) *tcpip.Error {
 	switch v := option.(type) {
-	case SACKEnabled:
+	case *tcpip.TCPSACKEnabled:
 		p.mu.Lock()
-		p.sackEnabled = bool(v)
+		p.sackEnabled = bool(*v)
 		p.mu.Unlock()
 		return nil
 
-	case Recovery:
+	case *tcpip.TCPRecovery:
 		p.mu.Lock()
-		p.recovery = Recovery(v)
+		p.recovery = *v
 		p.mu.Unlock()
 		return nil
 
-	case DelayEnabled:
+	case *tcpip.TCPDelayEnabled:
 		p.mu.Lock()
-		p.delayEnabled = bool(v)
+		p.delayEnabled = bool(*v)
 		p.mu.Unlock()
 		return nil
 
-	case SendBufferSizeOption:
+	case *tcpip.TCPSendBufferSizeRangeOption:
 		if v.Min <= 0 || v.Default < v.Min || v.Default > v.Max {
 			return tcpip.ErrInvalidOptionValue
 		}
 		p.mu.Lock()
-		p.sendBufferSize = v
+		p.sendBufferSize = *v
 		p.mu.Unlock()
 		return nil
 
-	case ReceiveBufferSizeOption:
+	case *tcpip.TCPReceiveBufferSizeRangeOption:
 		if v.Min <= 0 || v.Default < v.Min || v.Default > v.Max {
 			return tcpip.ErrInvalidOptionValue
 		}
 		p.mu.Lock()
-		p.recvBufferSize = v
+		p.recvBufferSize = *v
 		p.mu.Unlock()
 		return nil
 
-	case tcpip.CongestionControlOption:
+	case *tcpip.CongestionControlOption:
 		for _, c := range p.availableCongestionControl {
-			if string(v) == c {
+			if string(*v) == c {
 				p.mu.Lock()
-				p.congestionControl = string(v)
+				p.congestionControl = string(*v)
 				p.mu.Unlock()
 				return nil
 			}
@@ -347,75 +303,79 @@ func (p *protocol) SetOption(option interface{}) *tcpip.Error {
 		// is specified.
 		return tcpip.ErrNoSuchFile
 
-	case tcpip.ModerateReceiveBufferOption:
+	case *tcpip.TCPModerateReceiveBufferOption:
 		p.mu.Lock()
-		p.moderateReceiveBuffer = bool(v)
+		p.moderateReceiveBuffer = bool(*v)
 		p.mu.Unlock()
 		return nil
 
-	case tcpip.TCPLingerTimeoutOption:
-		if v < 0 {
-			v = 0
-		}
+	case *tcpip.TCPLingerTimeoutOption:
 		p.mu.Lock()
-		p.lingerTimeout = time.Duration(v)
+		if *v < 0 {
+			p.lingerTimeout = 0
+		} else {
+			p.lingerTimeout = time.Duration(*v)
+		}
 		p.mu.Unlock()
 		return nil
 
-	case tcpip.TCPTimeWaitTimeoutOption:
-		if v < 0 {
-			v = 0
-		}
+	case *tcpip.TCPTimeWaitTimeoutOption:
 		p.mu.Lock()
-		p.timeWaitTimeout = time.Duration(v)
+		if *v < 0 {
+			p.timeWaitTimeout = 0
+		} else {
+			p.timeWaitTimeout = time.Duration(*v)
+		}
 		p.mu.Unlock()
 		return nil
 
-	case tcpip.TCPTimeWaitReuseOption:
-		if v < tcpip.TCPTimeWaitReuseDisabled || v > tcpip.TCPTimeWaitReuseLoopbackOnly {
+	case *tcpip.TCPTimeWaitReuseOption:
+		if *v < tcpip.TCPTimeWaitReuseDisabled || *v > tcpip.TCPTimeWaitReuseLoopbackOnly {
 			return tcpip.ErrInvalidOptionValue
 		}
 		p.mu.Lock()
-		p.timeWaitReuse = v
+		p.timeWaitReuse = *v
 		p.mu.Unlock()
 		return nil
 
-	case tcpip.TCPMinRTOOption:
-		if v < 0 {
-			v = tcpip.TCPMinRTOOption(MinRTO)
-		}
+	case *tcpip.TCPMinRTOOption:
 		p.mu.Lock()
-		p.minRTO = time.Duration(v)
+		if *v < 0 {
+			p.minRTO = MinRTO
+		} else {
+			p.minRTO = time.Duration(*v)
+		}
 		p.mu.Unlock()
 		return nil
 
-	case tcpip.TCPMaxRTOOption:
-		if v < 0 {
-			v = tcpip.TCPMaxRTOOption(MaxRTO)
-		}
+	case *tcpip.TCPMaxRTOOption:
 		p.mu.Lock()
-		p.maxRTO = time.Duration(v)
+		if *v < 0 {
+			p.maxRTO = MaxRTO
+		} else {
+			p.maxRTO = time.Duration(*v)
+		}
 		p.mu.Unlock()
 		return nil
 
-	case tcpip.TCPMaxRetriesOption:
+	case *tcpip.TCPMaxRetriesOption:
 		p.mu.Lock()
-		p.maxRetries = uint32(v)
+		p.maxRetries = uint32(*v)
 		p.mu.Unlock()
 		return nil
 
-	case tcpip.TCPSynRcvdCountThresholdOption:
+	case *tcpip.TCPSynRcvdCountThresholdOption:
 		p.mu.Lock()
-		p.synRcvdCount.SetThreshold(uint64(v))
+		p.synRcvdCount.SetThreshold(uint64(*v))
 		p.mu.Unlock()
 		return nil
 
-	case tcpip.TCPSynRetriesOption:
-		if v < 1 || v > 255 {
+	case *tcpip.TCPSynRetriesOption:
+		if *v < 1 || *v > 255 {
 			return tcpip.ErrInvalidOptionValue
 		}
 		p.mu.Lock()
-		p.synRetries = uint8(v)
+		p.synRetries = uint8(*v)
 		p.mu.Unlock()
 		return nil
 
@@ -425,33 +385,33 @@ func (p *protocol) SetOption(option interface{}) *tcpip.Error {
 }
 
 // Option implements stack.TransportProtocol.Option.
-func (p *protocol) Option(option interface{}) *tcpip.Error {
+func (p *protocol) Option(option tcpip.GettableTransportProtocolOption) *tcpip.Error {
 	switch v := option.(type) {
-	case *SACKEnabled:
+	case *tcpip.TCPSACKEnabled:
 		p.mu.RLock()
-		*v = SACKEnabled(p.sackEnabled)
+		*v = tcpip.TCPSACKEnabled(p.sackEnabled)
 		p.mu.RUnlock()
 		return nil
 
-	case *Recovery:
+	case *tcpip.TCPRecovery:
 		p.mu.RLock()
-		*v = Recovery(p.recovery)
+		*v = tcpip.TCPRecovery(p.recovery)
 		p.mu.RUnlock()
 		return nil
 
-	case *DelayEnabled:
+	case *tcpip.TCPDelayEnabled:
 		p.mu.RLock()
-		*v = DelayEnabled(p.delayEnabled)
+		*v = tcpip.TCPDelayEnabled(p.delayEnabled)
 		p.mu.RUnlock()
 		return nil
 
-	case *SendBufferSizeOption:
+	case *tcpip.TCPSendBufferSizeRangeOption:
 		p.mu.RLock()
 		*v = p.sendBufferSize
 		p.mu.RUnlock()
 		return nil
 
-	case *ReceiveBufferSizeOption:
+	case *tcpip.TCPReceiveBufferSizeRangeOption:
 		p.mu.RLock()
 		*v = p.recvBufferSize
 		p.mu.RUnlock()
@@ -463,15 +423,15 @@ func (p *protocol) Option(option interface{}) *tcpip.Error {
 		p.mu.RUnlock()
 		return nil
 
-	case *tcpip.AvailableCongestionControlOption:
+	case *tcpip.TCPAvailableCongestionControlOption:
 		p.mu.RLock()
-		*v = tcpip.AvailableCongestionControlOption(strings.Join(p.availableCongestionControl, " "))
+		*v = tcpip.TCPAvailableCongestionControlOption(strings.Join(p.availableCongestionControl, " "))
 		p.mu.RUnlock()
 		return nil
 
-	case *tcpip.ModerateReceiveBufferOption:
+	case *tcpip.TCPModerateReceiveBufferOption:
 		p.mu.RLock()
-		*v = tcpip.ModerateReceiveBufferOption(p.moderateReceiveBuffer)
+		*v = tcpip.TCPModerateReceiveBufferOption(p.moderateReceiveBuffer)
 		p.mu.RUnlock()
 		return nil
 
@@ -567,12 +527,12 @@ func (*protocol) Parse(pkt *stack.PacketBuffer) bool {
 // NewProtocol returns a TCP transport protocol.
 func NewProtocol() stack.TransportProtocol {
 	p := protocol{
-		sendBufferSize: SendBufferSizeOption{
+		sendBufferSize: tcpip.TCPSendBufferSizeRangeOption{
 			Min:     MinBufferSize,
 			Default: DefaultSendBufferSize,
 			Max:     MaxBufferSize,
 		},
-		recvBufferSize: ReceiveBufferSizeOption{
+		recvBufferSize: tcpip.TCPReceiveBufferSizeRangeOption{
 			Min:     MinBufferSize,
 			Default: DefaultReceiveBufferSize,
 			Max:     MaxBufferSize,
@@ -587,7 +547,7 @@ func NewProtocol() stack.TransportProtocol {
 		minRTO:                     MinRTO,
 		maxRTO:                     MaxRTO,
 		maxRetries:                 MaxRetries,
-		recovery:                   RACKLossDetection,
+		recovery:                   tcpip.TCPRACKLossDetection,
 	}
 	p.dispatcher.init(runtime.GOMAXPROCS(0))
 	return &p
diff --git a/pkg/tcpip/transport/tcp/tcp_sack_test.go b/pkg/tcpip/transport/tcp/tcp_sack_test.go
index 99521f0c10..ef7f5719f4 100644
--- a/pkg/tcpip/transport/tcp/tcp_sack_test.go
+++ b/pkg/tcpip/transport/tcp/tcp_sack_test.go
@@ -46,8 +46,9 @@ func createConnectedWithSACKAndTS(c *context.Context) *context.RawEndpoint {
 
 func setStackSACKPermitted(t *testing.T, c *context.Context, enable bool) {
 	t.Helper()
-	if err := c.Stack().SetTransportProtocolOption(tcp.ProtocolNumber, tcp.SACKEnabled(enable)); err != nil {
-		t.Fatalf("c.s.SetTransportProtocolOption(tcp.ProtocolNumber, SACKEnabled(%t) = %s", enable, err)
+	opt := tcpip.TCPSACKEnabled(enable)
+	if err := c.Stack().SetTransportProtocolOption(tcp.ProtocolNumber, &opt); err != nil {
+		t.Fatalf("c.s.SetTransportProtocolOption(%d, &%T(%t)): %s", tcp.ProtocolNumber, opt, opt, err)
 	}
 }
 
@@ -162,8 +163,9 @@ func TestSackPermittedAccept(t *testing.T) {
 						// Set the SynRcvd threshold to
 						// zero to force a syn cookie
 						// based accept to happen.
-						if err := c.Stack().SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.TCPSynRcvdCountThresholdOption(0)); err != nil {
-							t.Fatalf("setting TCPSynRcvdCountThresholdOption to 0 failed: %s", err)
+						var opt tcpip.TCPSynRcvdCountThresholdOption
+						if err := c.Stack().SetTransportProtocolOption(tcp.ProtocolNumber, &opt); err != nil {
+							t.Fatalf("SetTransportProtocolOption(%d, &%T(%d)): %s", tcp.ProtocolNumber, opt, opt, err)
 						}
 					}
 					setStackSACKPermitted(t, c, sackEnabled)
@@ -236,8 +238,9 @@ func TestSackDisabledAccept(t *testing.T) {
 						// Set the SynRcvd threshold to
 						// zero to force a syn cookie
 						// based accept to happen.
-						if err := c.Stack().SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.TCPSynRcvdCountThresholdOption(0)); err != nil {
-							t.Fatalf("setting TCPSynRcvdCountThresholdOption to 0 failed: %s", err)
+						var opt tcpip.TCPSynRcvdCountThresholdOption
+						if err := c.Stack().SetTransportProtocolOption(tcp.ProtocolNumber, &opt); err != nil {
+							t.Fatalf("SetTransportProtocolOption(%d, &%T(%d)): %s", tcp.ProtocolNumber, opt, opt, err)
 						}
 					}
 
diff --git a/pkg/tcpip/transport/tcp/tcp_test.go b/pkg/tcpip/transport/tcp/tcp_test.go
index 3d09d6def6..0d13e1efd4 100644
--- a/pkg/tcpip/transport/tcp/tcp_test.go
+++ b/pkg/tcpip/transport/tcp/tcp_test.go
@@ -309,8 +309,8 @@ func TestTCPResetSentForACKWhenNotUsingSynCookies(t *testing.T) {
 	// Lower stackwide TIME_WAIT timeout so that the reservations
 	// are released instantly on Close.
 	tcpTW := tcpip.TCPTimeWaitTimeoutOption(1 * time.Millisecond)
-	if err := c.Stack().SetTransportProtocolOption(tcp.ProtocolNumber, tcpTW); err != nil {
-		t.Fatalf("e.stack.SetTransportProtocolOption(%d, %#v) = %s", tcp.ProtocolNumber, tcpTW, err)
+	if err := c.Stack().SetTransportProtocolOption(tcp.ProtocolNumber, &tcpTW); err != nil {
+		t.Fatalf("SetTransportProtocolOption(%d, &%T(%d)): %s", tcp.ProtocolNumber, tcpTW, tcpTW, err)
 	}
 
 	c.EP.Close()
@@ -432,8 +432,9 @@ func TestConnectResetAfterClose(t *testing.T) {
 	// Set TCPLinger to 3 seconds so that sockets are marked closed
 	// after 3 second in FIN_WAIT2 state.
 	tcpLingerTimeout := 3 * time.Second
-	if err := c.Stack().SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.TCPLingerTimeoutOption(tcpLingerTimeout)); err != nil {
-		t.Fatalf("c.stack.SetTransportProtocolOption(tcp, tcpip.TCPLingerTimeoutOption(%s) failed: %s", tcpLingerTimeout, err)
+	opt := tcpip.TCPLingerTimeoutOption(tcpLingerTimeout)
+	if err := c.Stack().SetTransportProtocolOption(tcp.ProtocolNumber, &opt); err != nil {
+		t.Fatalf("SetTransportProtocolOption(%d, &%T(%d)): %s", tcp.ProtocolNumber, opt, opt, err)
 	}
 
 	c.CreateConnected(789, 30000, -1 /* epRcvBuf */)
@@ -506,8 +507,9 @@ func TestCurrentConnectedIncrement(t *testing.T) {
 	// Set TCPTimeWaitTimeout to 1 seconds so that sockets are marked closed
 	// after 1 second in TIME_WAIT state.
 	tcpTimeWaitTimeout := 1 * time.Second
-	if err := c.Stack().SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.TCPTimeWaitTimeoutOption(tcpTimeWaitTimeout)); err != nil {
-		t.Fatalf("c.stack.SetTransportProtocolOption(tcp, tcpip.TCPTimeWaitTimeout(%d) failed: %s", tcpTimeWaitTimeout, err)
+	opt := tcpip.TCPTimeWaitTimeoutOption(tcpTimeWaitTimeout)
+	if err := c.Stack().SetTransportProtocolOption(tcp.ProtocolNumber, &opt); err != nil {
+		t.Fatalf("SetTransportProtocolOption(%d, &%T(%d)): %s", tcp.ProtocolNumber, opt, opt, err)
 	}
 
 	c.CreateConnected(789, 30000, -1 /* epRcvBuf */)
@@ -933,8 +935,8 @@ func TestUserSuppliedMSSOnListenAccept(t *testing.T) {
 
 					// Set the SynRcvd threshold to force a syn cookie based accept to happen.
 					opt := tcpip.TCPSynRcvdCountThresholdOption(nonSynCookieAccepts)
-					if err := c.Stack().SetTransportProtocolOption(tcp.ProtocolNumber, opt); err != nil {
-						t.Fatalf("SetTransportProtocolOption(%d, %#v): %s", tcp.ProtocolNumber, opt, err)
+					if err := c.Stack().SetTransportProtocolOption(tcp.ProtocolNumber, &opt); err != nil {
+						t.Fatalf("SetTransportProtocolOption(%d, &%T(%d)): %s", tcp.ProtocolNumber, opt, opt, err)
 					}
 
 					if err := c.EP.SetSockOptInt(tcpip.MaxSegOption, int(test.setMSS)); err != nil {
@@ -2867,8 +2869,9 @@ func TestSynCookiePassiveSendMSSLessThanMTU(t *testing.T) {
 
 	// Set the SynRcvd threshold to zero to force a syn cookie based accept
 	// to happen.
-	if err := c.Stack().SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.TCPSynRcvdCountThresholdOption(0)); err != nil {
-		t.Fatalf("setting TCPSynRcvdCountThresholdOption to 0 failed: %s", err)
+	opt := tcpip.TCPSynRcvdCountThresholdOption(0)
+	if err := c.Stack().SetTransportProtocolOption(tcp.ProtocolNumber, &opt); err != nil {
+		t.Fatalf("SetTransportProtocolOption(%d, &%T(%d)): %s", tcp.ProtocolNumber, opt, opt, err)
 	}
 
 	// Create EP and start listening.
@@ -3146,8 +3149,9 @@ func TestMaxRetransmitsTimeout(t *testing.T) {
 	defer c.Cleanup()
 
 	const numRetries = 2
-	if err := c.Stack().SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.TCPMaxRetriesOption(numRetries)); err != nil {
-		t.Fatalf("could not set protocol option MaxRetries.\n")
+	opt := tcpip.TCPMaxRetriesOption(numRetries)
+	if err := c.Stack().SetTransportProtocolOption(tcp.ProtocolNumber, &opt); err != nil {
+		t.Fatalf("SetTransportProtocolOption(%d, &%T(%d)): %s", tcp.ProtocolNumber, opt, opt, err)
 	}
 
 	c.CreateConnected(789 /* iss */, 30000 /* rcvWnd */, -1 /* epRcvBuf */)
@@ -3206,8 +3210,9 @@ func TestMaxRTO(t *testing.T) {
 	defer c.Cleanup()
 
 	rto := 1 * time.Second
-	if err := c.Stack().SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.TCPMaxRTOOption(rto)); err != nil {
-		t.Fatalf("c.stack.SetTransportProtocolOption(tcp, tcpip.TCPMaxRTO(%d) failed: %s", rto, err)
+	opt := tcpip.TCPMaxRTOOption(rto)
+	if err := c.Stack().SetTransportProtocolOption(tcp.ProtocolNumber, &opt); err != nil {
+		t.Fatalf("SetTransportProtocolOption(%d, &%T(%d)): %s", tcp.ProtocolNumber, opt, opt, err)
 	}
 
 	c.CreateConnected(789 /* iss */, 30000 /* rcvWnd */, -1 /* epRcvBuf */)
@@ -3964,8 +3969,9 @@ func TestReadAfterClosedState(t *testing.T) {
 	// Set TCPTimeWaitTimeout to 1 seconds so that sockets are marked closed
 	// after 1 second in TIME_WAIT state.
 	tcpTimeWaitTimeout := 1 * time.Second
-	if err := c.Stack().SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.TCPTimeWaitTimeoutOption(tcpTimeWaitTimeout)); err != nil {
-		t.Fatalf("c.stack.SetTransportProtocolOption(tcp, tcpip.TCPTimeWaitTimeout(%d) failed: %s", tcpTimeWaitTimeout, err)
+	opt := tcpip.TCPTimeWaitTimeoutOption(tcpTimeWaitTimeout)
+	if err := c.Stack().SetTransportProtocolOption(tcp.ProtocolNumber, &opt); err != nil {
+		t.Fatalf("SetTransportProtocolOption(%d, &%T(%d)): %s", tcp.ProtocolNumber, opt, opt, err)
 	}
 
 	c.CreateConnected(789, 30000, -1 /* epRcvBuf */)
@@ -4204,11 +4210,15 @@ func TestDefaultBufferSizes(t *testing.T) {
 	checkRecvBufferSize(t, ep, tcp.DefaultReceiveBufferSize)
 
 	// Change the default send buffer size.
-	if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.SendBufferSizeOption{
-		Min:     1,
-		Default: tcp.DefaultSendBufferSize * 2,
-		Max:     tcp.DefaultSendBufferSize * 20}); err != nil {
-		t.Fatalf("SetTransportProtocolOption failed: %s", err)
+	{
+		opt := tcpip.TCPSendBufferSizeRangeOption{
+			Min:     1,
+			Default: tcp.DefaultSendBufferSize * 2,
+			Max:     tcp.DefaultSendBufferSize * 20,
+		}
+		if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, &opt); err != nil {
+			t.Fatalf("SetTransportProtocolOption(%d, &%#v): %s", tcp.ProtocolNumber, opt, err)
+		}
 	}
 
 	ep.Close()
@@ -4221,11 +4231,15 @@ func TestDefaultBufferSizes(t *testing.T) {
 	checkRecvBufferSize(t, ep, tcp.DefaultReceiveBufferSize)
 
 	// Change the default receive buffer size.
-	if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.ReceiveBufferSizeOption{
-		Min:     1,
-		Default: tcp.DefaultReceiveBufferSize * 3,
-		Max:     tcp.DefaultReceiveBufferSize * 30}); err != nil {
-		t.Fatalf("SetTransportProtocolOption failed: %v", err)
+	{
+		opt := tcpip.TCPReceiveBufferSizeRangeOption{
+			Min:     1,
+			Default: tcp.DefaultReceiveBufferSize * 3,
+			Max:     tcp.DefaultReceiveBufferSize * 30,
+		}
+		if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, &opt); err != nil {
+			t.Fatalf("SetTransportProtocolOption(%d, &%#v): %s", tcp.ProtocolNumber, opt, err)
+		}
 	}
 
 	ep.Close()
@@ -4252,12 +4266,18 @@ func TestMinMaxBufferSizes(t *testing.T) {
 	defer ep.Close()
 
 	// Change the min/max values for send/receive
-	if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.ReceiveBufferSizeOption{Min: 200, Default: tcp.DefaultReceiveBufferSize * 2, Max: tcp.DefaultReceiveBufferSize * 20}); err != nil {
-		t.Fatalf("SetTransportProtocolOption failed: %s", err)
+	{
+		opt := tcpip.TCPReceiveBufferSizeRangeOption{Min: 200, Default: tcp.DefaultReceiveBufferSize * 2, Max: tcp.DefaultReceiveBufferSize * 20}
+		if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, &opt); err != nil {
+			t.Fatalf("SetTransportProtocolOption(%d, &%#v): %s", tcp.ProtocolNumber, opt, err)
+		}
 	}
 
-	if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.SendBufferSizeOption{Min: 300, Default: tcp.DefaultSendBufferSize * 3, Max: tcp.DefaultSendBufferSize * 30}); err != nil {
-		t.Fatalf("SetTransportProtocolOption failed: %s", err)
+	{
+		opt := tcpip.TCPSendBufferSizeRangeOption{Min: 300, Default: tcp.DefaultSendBufferSize * 3, Max: tcp.DefaultSendBufferSize * 30}
+		if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, &opt); err != nil {
+			t.Fatalf("SetTransportProtocolOption(%d, &%#v): %s", tcp.ProtocolNumber, opt, err)
+		}
 	}
 
 	// Set values below the min.
@@ -4718,8 +4738,8 @@ func TestStackSetCongestionControl(t *testing.T) {
 				t.Fatalf("s.TransportProtocolOption(%v, %v) = %s", tcp.ProtocolNumber, &oldCC, err)
 			}
 
-			if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tc.cc); err != tc.err {
-				t.Fatalf("s.SetTransportProtocolOption(%v, %v) = %v, want %v", tcp.ProtocolNumber, tc.cc, err, tc.err)
+			if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, &tc.cc); err != tc.err {
+				t.Fatalf("s.SetTransportProtocolOption(%d, &%T(%s)) = %s, want = %s", tcp.ProtocolNumber, tc.cc, tc.cc, err, tc.err)
 			}
 
 			var cc tcpip.CongestionControlOption
@@ -4751,12 +4771,12 @@ func TestStackAvailableCongestionControl(t *testing.T) {
 	s := c.Stack()
 
 	// Query permitted congestion control algorithms.
-	var aCC tcpip.AvailableCongestionControlOption
+	var aCC tcpip.TCPAvailableCongestionControlOption
 	if err := s.TransportProtocolOption(tcp.ProtocolNumber, &aCC); err != nil {
 		t.Fatalf("s.TransportProtocolOption(%v, %v) = %v", tcp.ProtocolNumber, &aCC, err)
 	}
-	if got, want := aCC, tcpip.AvailableCongestionControlOption("reno cubic"); got != want {
-		t.Fatalf("got tcpip.AvailableCongestionControlOption: %v, want: %v", got, want)
+	if got, want := aCC, tcpip.TCPAvailableCongestionControlOption("reno cubic"); got != want {
+		t.Fatalf("got tcpip.TCPAvailableCongestionControlOption: %v, want: %v", got, want)
 	}
 }
 
@@ -4767,18 +4787,18 @@ func TestStackSetAvailableCongestionControl(t *testing.T) {
 	s := c.Stack()
 
 	// Setting AvailableCongestionControlOption should fail.
-	aCC := tcpip.AvailableCongestionControlOption("xyz")
+	aCC := tcpip.TCPAvailableCongestionControlOption("xyz")
 	if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, &aCC); err == nil {
-		t.Fatalf("s.TransportProtocolOption(%v, %v) = nil, want non-nil", tcp.ProtocolNumber, &aCC)
+		t.Fatalf("s.SetTransportProtocolOption(%d, &%T(%s)) = nil, want non-nil", tcp.ProtocolNumber, aCC, aCC)
 	}
 
 	// Verify that we still get the expected list of congestion control options.
-	var cc tcpip.AvailableCongestionControlOption
+	var cc tcpip.TCPAvailableCongestionControlOption
 	if err := s.TransportProtocolOption(tcp.ProtocolNumber, &cc); err != nil {
-		t.Fatalf("s.TransportProtocolOption(%v, %v) = %v", tcp.ProtocolNumber, &cc, err)
+		t.Fatalf("s.TransportProtocolOptio(%d, &%T(%s)): %s", tcp.ProtocolNumber, cc, cc, err)
 	}
-	if got, want := cc, tcpip.AvailableCongestionControlOption("reno cubic"); got != want {
-		t.Fatalf("got tcpip.AvailableCongestionControlOption: %v, want: %v", got, want)
+	if got, want := cc, tcpip.TCPAvailableCongestionControlOption("reno cubic"); got != want {
+		t.Fatalf("got tcpip.TCPAvailableCongestionControlOption = %s, want = %s", got, want)
 	}
 }
 
@@ -4842,8 +4862,8 @@ func TestEndpointSetCongestionControl(t *testing.T) {
 func enableCUBIC(t *testing.T, c *context.Context) {
 	t.Helper()
 	opt := tcpip.CongestionControlOption("cubic")
-	if err := c.Stack().SetTransportProtocolOption(tcp.ProtocolNumber, opt); err != nil {
-		t.Fatalf("c.s.SetTransportProtocolOption(tcp.ProtocolNumber, %s = %s", opt, err)
+	if err := c.Stack().SetTransportProtocolOption(tcp.ProtocolNumber, &opt); err != nil {
+		t.Fatalf("SetTransportProtocolOption(%d, &%T(%s)) %s", tcp.ProtocolNumber, opt, opt, err)
 	}
 }
 
@@ -5505,8 +5525,9 @@ func TestListenBacklogFullSynCookieInUse(t *testing.T) {
 	c := context.New(t, defaultMTU)
 	defer c.Cleanup()
 
-	if err := c.Stack().SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.TCPSynRcvdCountThresholdOption(1)); err != nil {
-		t.Fatalf("setting TCPSynRcvdCountThresholdOption to 1 failed: %s", err)
+	opt := tcpip.TCPSynRcvdCountThresholdOption(1)
+	if err := c.Stack().SetTransportProtocolOption(tcp.ProtocolNumber, &opt); err != nil {
+		t.Fatalf("SetTransportProtocolOption(%d, &%T(%d)): %s", tcp.ProtocolNumber, opt, opt, err)
 	}
 
 	// Create TCP endpoint.
@@ -5906,13 +5927,19 @@ func TestReceiveBufferAutoTuningApplicationLimited(t *testing.T) {
 	// the segment queue holding unprocessed packets is limited to 500.
 	const receiveBufferSize = 80 << 10 // 80KB.
 	const maxReceiveBufferSize = receiveBufferSize * 10
-	if err := stk.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.ReceiveBufferSizeOption{Min: 1, Default: receiveBufferSize, Max: maxReceiveBufferSize}); err != nil {
-		t.Fatalf("SetTransportProtocolOption failed: %s", err)
+	{
+		opt := tcpip.TCPReceiveBufferSizeRangeOption{Min: 1, Default: receiveBufferSize, Max: maxReceiveBufferSize}
+		if err := stk.SetTransportProtocolOption(tcp.ProtocolNumber, &opt); err != nil {
+			t.Fatalf("SetTransportProtocolOption(%d, &%#v): %s", tcp.ProtocolNumber, opt, err)
+		}
 	}
 
 	// Enable auto-tuning.
-	if err := stk.SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.ModerateReceiveBufferOption(true)); err != nil {
-		t.Fatalf("SetTransportProtocolOption failed: %s", err)
+	{
+		opt := tcpip.TCPModerateReceiveBufferOption(true)
+		if err := stk.SetTransportProtocolOption(tcp.ProtocolNumber, &opt); err != nil {
+			t.Fatalf("SetTransportProtocolOption(%d, &%T(%t)): %s", tcp.ProtocolNumber, opt, opt, err)
+		}
 	}
 	// Change the expected window scale to match the value needed for the
 	// maximum buffer size defined above.
@@ -6027,13 +6054,19 @@ func TestReceiveBufferAutoTuning(t *testing.T) {
 	// the segment queue holding unprocessed packets is limited to 300.
 	const receiveBufferSize = 80 << 10 // 80KB.
 	const maxReceiveBufferSize = receiveBufferSize * 10
-	if err := stk.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.ReceiveBufferSizeOption{Min: 1, Default: receiveBufferSize, Max: maxReceiveBufferSize}); err != nil {
-		t.Fatalf("SetTransportProtocolOption failed: %s", err)
+	{
+		opt := tcpip.TCPReceiveBufferSizeRangeOption{Min: 1, Default: receiveBufferSize, Max: maxReceiveBufferSize}
+		if err := stk.SetTransportProtocolOption(tcp.ProtocolNumber, &opt); err != nil {
+			t.Fatalf("SetTransportProtocolOption(%d, &%#v): %s", tcp.ProtocolNumber, opt, err)
+		}
 	}
 
 	// Enable auto-tuning.
-	if err := stk.SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.ModerateReceiveBufferOption(true)); err != nil {
-		t.Fatalf("SetTransportProtocolOption failed: %s", err)
+	{
+		opt := tcpip.TCPModerateReceiveBufferOption(true)
+		if err := stk.SetTransportProtocolOption(tcp.ProtocolNumber, &opt); err != nil {
+			t.Fatalf("SetTransportProtocolOption(%d, &%T(%t)): %s", tcp.ProtocolNumber, opt, opt, err)
+		}
 	}
 	// Change the expected window scale to match the value needed for the
 	// maximum buffer size used by stack.
@@ -6169,7 +6202,7 @@ func TestDelayEnabled(t *testing.T) {
 	checkDelayOption(t, c, false, false) // Delay is disabled by default.
 
 	for _, v := range []struct {
-		delayEnabled    tcp.DelayEnabled
+		delayEnabled    tcpip.TCPDelayEnabled
 		wantDelayOption bool
 	}{
 		{delayEnabled: false, wantDelayOption: false},
@@ -6177,17 +6210,17 @@ func TestDelayEnabled(t *testing.T) {
 	} {
 		c := context.New(t, defaultMTU)
 		defer c.Cleanup()
-		if err := c.Stack().SetTransportProtocolOption(tcp.ProtocolNumber, v.delayEnabled); err != nil {
-			t.Fatalf("SetTransportProtocolOption(tcp, %t) failed: %s", v.delayEnabled, err)
+		if err := c.Stack().SetTransportProtocolOption(tcp.ProtocolNumber, &v.delayEnabled); err != nil {
+			t.Fatalf("SetTransportProtocolOption(%d, &%T(%t)): %s", tcp.ProtocolNumber, v.delayEnabled, v.delayEnabled, err)
 		}
 		checkDelayOption(t, c, v.delayEnabled, v.wantDelayOption)
 	}
 }
 
-func checkDelayOption(t *testing.T, c *context.Context, wantDelayEnabled tcp.DelayEnabled, wantDelayOption bool) {
+func checkDelayOption(t *testing.T, c *context.Context, wantDelayEnabled tcpip.TCPDelayEnabled, wantDelayOption bool) {
 	t.Helper()
 
-	var gotDelayEnabled tcp.DelayEnabled
+	var gotDelayEnabled tcpip.TCPDelayEnabled
 	if err := c.Stack().TransportProtocolOption(tcp.ProtocolNumber, &gotDelayEnabled); err != nil {
 		t.Fatalf("TransportProtocolOption(tcp, &gotDelayEnabled) failed: %s", err)
 	}
@@ -6625,8 +6658,9 @@ func TestTCPTimeWaitDuplicateFINExtendsTimeWait(t *testing.T) {
 	// Set TCPTimeWaitTimeout to 5 seconds so that sockets are marked closed
 	// after 5 seconds in TIME_WAIT state.
 	tcpTimeWaitTimeout := 5 * time.Second
-	if err := c.Stack().SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.TCPTimeWaitTimeoutOption(tcpTimeWaitTimeout)); err != nil {
-		t.Fatalf("c.stack.SetTransportProtocolOption(tcp, tcpip.TCPLingerTimeoutOption(%d) failed: %s", tcpTimeWaitTimeout, err)
+	opt := tcpip.TCPTimeWaitTimeoutOption(tcpTimeWaitTimeout)
+	if err := c.Stack().SetTransportProtocolOption(tcp.ProtocolNumber, &opt); err != nil {
+		t.Fatalf("SetTransportProtocolOption(%d, &%T(%s)): %s", tcp.ProtocolNumber, opt, tcpTimeWaitTimeout, err)
 	}
 
 	want := c.Stack().Stats().TCP.EstablishedClosed.Value() + 1
@@ -6775,8 +6809,9 @@ func TestTCPCloseWithData(t *testing.T) {
 	// Set TCPTimeWaitTimeout to 5 seconds so that sockets are marked closed
 	// after 5 seconds in TIME_WAIT state.
 	tcpTimeWaitTimeout := 5 * time.Second
-	if err := c.Stack().SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.TCPTimeWaitTimeoutOption(tcpTimeWaitTimeout)); err != nil {
-		t.Fatalf("c.stack.SetTransportProtocolOption(tcp, tcpip.TCPLingerTimeoutOption(%d) failed: %s", tcpTimeWaitTimeout, err)
+	opt := tcpip.TCPTimeWaitTimeoutOption(tcpTimeWaitTimeout)
+	if err := c.Stack().SetTransportProtocolOption(tcp.ProtocolNumber, &opt); err != nil {
+		t.Fatalf("SetTransportProtocolOption(%d, &%T(%s)): %s", tcp.ProtocolNumber, opt, tcpTimeWaitTimeout, err)
 	}
 
 	wq := &waiter.Queue{}
@@ -7462,9 +7497,10 @@ func TestSetStackTimeWaitReuse(t *testing.T) {
 	}
 
 	for _, tc := range testCases {
-		err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.TCPTimeWaitReuseOption(tc.v))
+		opt := tcpip.TCPTimeWaitReuseOption(tc.v)
+		err := s.SetTransportProtocolOption(tcp.ProtocolNumber, &opt)
 		if got, want := err, tc.err; got != want {
-			t.Fatalf("s.TransportProtocolOption(%v, %v) = %v, want %v", tcp.ProtocolNumber, tc.v, err, tc.err)
+			t.Fatalf("s.SetTransportProtocolOption(%d, &%T(%d)) = %s, want = %s", tcp.ProtocolNumber, tc.v, tc.v, err, tc.err)
 		}
 		if tc.err != nil {
 			continue
diff --git a/pkg/tcpip/transport/tcp/tcp_timestamp_test.go b/pkg/tcpip/transport/tcp/tcp_timestamp_test.go
index 8edbff964e..44593ed986 100644
--- a/pkg/tcpip/transport/tcp/tcp_timestamp_test.go
+++ b/pkg/tcpip/transport/tcp/tcp_timestamp_test.go
@@ -131,8 +131,9 @@ func timeStampEnabledAccept(t *testing.T, cookieEnabled bool, wndScale int, wndS
 	defer c.Cleanup()
 
 	if cookieEnabled {
-		if err := c.Stack().SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.TCPSynRcvdCountThresholdOption(0)); err != nil {
-			t.Fatalf("setting TCPSynRcvdCountThresholdOption to 0 failed: %s", err)
+		var opt tcpip.TCPSynRcvdCountThresholdOption
+		if err := c.Stack().SetTransportProtocolOption(tcp.ProtocolNumber, &opt); err != nil {
+			t.Fatalf("SetTransportProtocolOption(%d, &%T(%d)): %s", tcp.ProtocolNumber, opt, opt, err)
 		}
 	}
 
@@ -192,8 +193,9 @@ func timeStampDisabledAccept(t *testing.T, cookieEnabled bool, wndScale int, wnd
 	defer c.Cleanup()
 
 	if cookieEnabled {
-		if err := c.Stack().SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.TCPSynRcvdCountThresholdOption(0)); err != nil {
-			t.Fatalf("setting TCPSynRcvdCountThresholdOption to 0 failed: %s", err)
+		var opt tcpip.TCPSynRcvdCountThresholdOption
+		if err := c.Stack().SetTransportProtocolOption(tcp.ProtocolNumber, &opt); err != nil {
+			t.Fatalf("SetTransportProtocolOption(%d, &%T(%d)): %s", tcp.ProtocolNumber, opt, opt, err)
 		}
 	}
 
diff --git a/pkg/tcpip/transport/tcp/testing/context/context.go b/pkg/tcpip/transport/tcp/testing/context/context.go
index 8bb5e5f6da..baf7df197f 100644
--- a/pkg/tcpip/transport/tcp/testing/context/context.go
+++ b/pkg/tcpip/transport/tcp/testing/context/context.go
@@ -146,19 +146,22 @@ func New(t *testing.T, mtu uint32) *Context {
 	const sendBufferSize = 1 << 20 // 1 MiB
 	const recvBufferSize = 1 << 20 // 1 MiB
 	// Allow minimum send/receive buffer sizes to be 1 during tests.
-	if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.SendBufferSizeOption{Min: 1, Default: sendBufferSize, Max: 10 * sendBufferSize}); err != nil {
-		t.Fatalf("SetTransportProtocolOption failed: %s", err)
+	sendBufOpt := tcpip.TCPSendBufferSizeRangeOption{Min: 1, Default: sendBufferSize, Max: 10 * sendBufferSize}
+	if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, &sendBufOpt); err != nil {
+		t.Fatalf("SetTransportProtocolOption(%d, &%#v) failed: %s", tcp.ProtocolNumber, sendBufOpt, err)
 	}
 
-	if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.ReceiveBufferSizeOption{Min: 1, Default: recvBufferSize, Max: 10 * recvBufferSize}); err != nil {
-		t.Fatalf("SetTransportProtocolOption failed: %s", err)
+	rcvBufOpt := tcpip.TCPReceiveBufferSizeRangeOption{Min: 1, Default: recvBufferSize, Max: 10 * recvBufferSize}
+	if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, &rcvBufOpt); err != nil {
+		t.Fatalf("SetTransportProtocolOption(%d, &%#v) failed: %s", tcp.ProtocolNumber, rcvBufOpt, err)
 	}
 
 	// Increase minimum RTO in tests to avoid test flakes due to early
 	// retransmit in case the test executors are overloaded and cause timers
 	// to fire earlier than expected.
-	if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.TCPMinRTOOption(3*time.Second)); err != nil {
-		t.Fatalf("failed to set stack-wide minRTO: %s", err)
+	minRTOOpt := tcpip.TCPMinRTOOption(3 * time.Second)
+	if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, &minRTOOpt); err != nil {
+		t.Fatalf("s.SetTransportProtocolOption(%d, &%T(%d)): %s", tcp.ProtocolNumber, minRTOOpt, minRTOOpt, err)
 	}
 
 	// Some of the congestion control tests send up to 640 packets, we so
@@ -1096,7 +1099,7 @@ func (c *Context) PassiveConnectWithOptions(maxPayload, wndScale int, synOptions
 // SACKEnabled returns true if the TCP Protocol option SACKEnabled is set to true
 // for the Stack in the context.
 func (c *Context) SACKEnabled() bool {
-	var v tcp.SACKEnabled
+	var v tcpip.TCPSACKEnabled
 	if err := c.Stack().TransportProtocolOption(tcp.ProtocolNumber, &v); err != nil {
 		// Stack doesn't support SACK. So just return.
 		return false
diff --git a/pkg/tcpip/transport/udp/protocol.go b/pkg/tcpip/transport/udp/protocol.go
index f65751dd45..3f87e8057e 100644
--- a/pkg/tcpip/transport/udp/protocol.go
+++ b/pkg/tcpip/transport/udp/protocol.go
@@ -202,12 +202,12 @@ func (p *protocol) HandleUnknownDestinationPacket(r *stack.Route, id stack.Trans
 }
 
 // SetOption implements stack.TransportProtocol.SetOption.
-func (p *protocol) SetOption(option interface{}) *tcpip.Error {
+func (*protocol) SetOption(tcpip.SettableTransportProtocolOption) *tcpip.Error {
 	return tcpip.ErrUnknownProtocolOption
 }
 
 // Option implements stack.TransportProtocol.Option.
-func (p *protocol) Option(option interface{}) *tcpip.Error {
+func (*protocol) Option(tcpip.GettableTransportProtocolOption) *tcpip.Error {
 	return tcpip.ErrUnknownProtocolOption
 }
 
diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go
index 246ae3c3eb..a136da21a5 100644
--- a/runsc/boot/loader.go
+++ b/runsc/boot/loader.go
@@ -1045,22 +1045,30 @@ func newEmptySandboxNetworkStack(clock tcpip.Clock, uniqueID stack.UniqueID) (in
 	})}
 
 	// Enable SACK Recovery.
-	if err := s.Stack.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.SACKEnabled(true)); err != nil {
-		return nil, fmt.Errorf("failed to enable SACK: %s", err)
+	{
+		opt := tcpip.TCPSACKEnabled(true)
+		if err := s.Stack.SetTransportProtocolOption(tcp.ProtocolNumber, &opt); err != nil {
+			return nil, fmt.Errorf("SetTransportProtocolOption(%d, &%T(%t)): %s", tcp.ProtocolNumber, opt, opt, err)
+		}
 	}
 
 	// Set default TTLs as required by socket/netstack.
-	opt := tcpip.DefaultTTLOption(netstack.DefaultTTL)
-	if err := s.Stack.SetNetworkProtocolOption(ipv4.ProtocolNumber, &opt); err != nil {
-		return nil, fmt.Errorf("SetNetworkProtocolOption(%d, &%T(%d)): %s", ipv4.ProtocolNumber, opt, opt, err)
-	}
-	if err := s.Stack.SetNetworkProtocolOption(ipv6.ProtocolNumber, &opt); err != nil {
-		return nil, fmt.Errorf("SetNetworkProtocolOption(%d, &%T(%d)): %s", ipv6.ProtocolNumber, opt, opt, err)
+	{
+		opt := tcpip.DefaultTTLOption(netstack.DefaultTTL)
+		if err := s.Stack.SetNetworkProtocolOption(ipv4.ProtocolNumber, &opt); err != nil {
+			return nil, fmt.Errorf("SetNetworkProtocolOption(%d, &%T(%d)): %s", ipv4.ProtocolNumber, opt, opt, err)
+		}
+		if err := s.Stack.SetNetworkProtocolOption(ipv6.ProtocolNumber, &opt); err != nil {
+			return nil, fmt.Errorf("SetNetworkProtocolOption(%d, &%T(%d)): %s", ipv6.ProtocolNumber, opt, opt, err)
+		}
 	}
 
 	// Enable Receive Buffer Auto-Tuning.
-	if err := s.Stack.SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.ModerateReceiveBufferOption(true)); err != nil {
-		return nil, fmt.Errorf("SetTransportProtocolOption failed: %s", err)
+	{
+		opt := tcpip.TCPModerateReceiveBufferOption(true)
+		if err := s.Stack.SetTransportProtocolOption(tcp.ProtocolNumber, &opt); err != nil {
+			return nil, fmt.Errorf("SetTransportProtocolOption(%d, &%T(%t)): %s", tcp.ProtocolNumber, opt, opt, err)
+		}
 	}
 
 	return &s, nil
diff --git a/test/benchmarks/tcp/tcp_proxy.go b/test/benchmarks/tcp/tcp_proxy.go
index 4b7ca7a14a..6cabfb451c 100644
--- a/test/benchmarks/tcp/tcp_proxy.go
+++ b/test/benchmarks/tcp/tcp_proxy.go
@@ -228,19 +228,26 @@ func newNetstackImpl(mode string) (impl, error) {
 	})
 
 	// Set protocol options.
-	if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.SACKEnabled(*sack)); err != nil {
-		return nil, fmt.Errorf("SetTransportProtocolOption for SACKEnabled failed: %s", err)
+	{
+		opt := tcpip.TCPSACKEnabled(*sack)
+		if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, &opt); err != nil {
+			return nil, fmt.Errorf("SetTransportProtocolOption(%d, &%T(%t)): %s", tcp.ProtocolNumber, opt, opt, err)
+		}
 	}
 
 	// Enable Receive Buffer Auto-Tuning.
-	if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.ModerateReceiveBufferOption(*moderateRecvBuf)); err != nil {
-		return nil, fmt.Errorf("SetTransportProtocolOption failed: %s", err)
+	{
+		opt := tcpip.TCPModerateReceiveBufferOption(*moderateRecvBuf)
+		if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, &opt); err != nil {
+			return nil, fmt.Errorf("SetTransportProtocolOption(%d, &%T(%t)): %s", tcp.ProtocolNumber, opt, opt, err)
+		}
 	}
 
 	// Set Congestion Control to cubic if requested.
 	if *cubic {
-		if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.CongestionControlOption("cubic")); err != nil {
-			return nil, fmt.Errorf("SetTransportProtocolOption for CongestionControlOption(cubic) failed: %s", err)
+		opt := tcpip.CongestionControlOption("cubic")
+		if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, &opt); err != nil {
+			return nil, fmt.Errorf("SetTransportProtocolOption(%d, &%T(%s)): %s", tcp.ProtocolNumber, opt, opt, err)
 		}
 	}
 

From 03a529d8ac52ab0227a0bbd85aaa3d8808b11e59 Mon Sep 17 00:00:00 2001
From: Bhasker Hariharan <bhaskerh@google.com>
Date: Tue, 8 Sep 2020 12:29:26 -0700
Subject: [PATCH 154/211] Fix data race in tcp.GetSockOpt.

e.ID can't be read without holding e.mu. GetSockOpt was reading e.ID
when looking up OriginalDst without holding e.mu.

PiperOrigin-RevId: 330562293
---
 pkg/tcpip/transport/tcp/endpoint.go | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pkg/tcpip/transport/tcp/endpoint.go b/pkg/tcpip/transport/tcp/endpoint.go
index 8cb769d585..6d5046a3dc 100644
--- a/pkg/tcpip/transport/tcp/endpoint.go
+++ b/pkg/tcpip/transport/tcp/endpoint.go
@@ -2019,8 +2019,10 @@ func (e *endpoint) GetSockOpt(opt tcpip.GettableSocketOption) *tcpip.Error {
 		e.UnlockUser()
 
 	case *tcpip.OriginalDestinationOption:
+		e.LockUser()
 		ipt := e.stack.IPTables()
 		addr, port, err := ipt.OriginalDst(e.ID)
+		e.UnlockUser()
 		if err != nil {
 			return err
 		}

From 982ac0e46a1ed6a76ef09d1e6eb7a19ed20b03c8 Mon Sep 17 00:00:00 2001
From: Tiwei Bie <tiwei.btw@antgroup.com>
Date: Tue, 8 Sep 2020 15:50:29 +0800
Subject: [PATCH 155/211] Fix the use after nil check on
 args.MountNamespaceVFS2

The args.MountNamespaceVFS2 is used again after the nil check,
instead, mntnsVFS2 which holds the expected reference should be
used. This patch fixes this issue.

Fixes: #3855

Signed-off-by: Tiwei Bie <tiwei.btw@antgroup.com>
---
 pkg/sentry/kernel/kernel.go | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/pkg/sentry/kernel/kernel.go b/pkg/sentry/kernel/kernel.go
index 402aa1718f..22f9bb0061 100644
--- a/pkg/sentry/kernel/kernel.go
+++ b/pkg/sentry/kernel/kernel.go
@@ -888,17 +888,18 @@ func (k *Kernel) CreateProcess(args CreateProcessArgs) (*ThreadGroup, ThreadID,
 		opener    fsbridge.Lookup
 		fsContext *FSContext
 		mntns     *fs.MountNamespace
+		mntnsVFS2 *vfs.MountNamespace
 	)
 
 	if VFS2Enabled {
-		mntnsVFS2 := args.MountNamespaceVFS2
+		mntnsVFS2 = args.MountNamespaceVFS2
 		if mntnsVFS2 == nil {
 			// MountNamespaceVFS2 adds a reference to the namespace, which is
 			// transferred to the new process.
 			mntnsVFS2 = k.globalInit.Leader().MountNamespaceVFS2()
 		}
 		// Get the root directory from the MountNamespace.
-		root := args.MountNamespaceVFS2.Root()
+		root := mntnsVFS2.Root()
 		// The call to newFSContext below will take a reference on root, so we
 		// don't need to hold this one.
 		defer root.DecRef(ctx)
@@ -1008,7 +1009,7 @@ func (k *Kernel) CreateProcess(args CreateProcessArgs) (*ThreadGroup, ThreadID,
 		UTSNamespace:            args.UTSNamespace,
 		IPCNamespace:            args.IPCNamespace,
 		AbstractSocketNamespace: args.AbstractSocketNamespace,
-		MountNamespaceVFS2:      args.MountNamespaceVFS2,
+		MountNamespaceVFS2:      mntnsVFS2,
 		ContainerID:             args.ContainerID,
 	}
 	t, err := k.tasks.NewTask(config)

From 8a4c4aed6d6af1a5c0f8c0dc27b6177016c8617f Mon Sep 17 00:00:00 2001
From: Sam Balana <sbalana@google.com>
Date: Tue, 8 Sep 2020 12:48:28 -0700
Subject: [PATCH 156/211] Increase resolution timeout for TestCacheResolution

Fixes pkg/tcpip/stack:stack_test flake experienced while running
TestCacheResolution with gotsan. This occurs when the test-runner takes longer
than the resolution timeout to call linkAddrCache.get.

In this test we don't care about the resolution timeout, so set it to the
maximum and rely on test-runner timeouts to avoid deadlocks.

PiperOrigin-RevId: 330566250
---
 pkg/tcpip/stack/linkaddrcache_test.go | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/pkg/tcpip/stack/linkaddrcache_test.go b/pkg/tcpip/stack/linkaddrcache_test.go
index 14fb4239bd..33806340e1 100644
--- a/pkg/tcpip/stack/linkaddrcache_test.go
+++ b/pkg/tcpip/stack/linkaddrcache_test.go
@@ -16,6 +16,7 @@ package stack
 
 import (
 	"fmt"
+	"math"
 	"sync/atomic"
 	"testing"
 	"time"
@@ -191,7 +192,13 @@ func TestCacheReplace(t *testing.T) {
 }
 
 func TestCacheResolution(t *testing.T) {
-	c := newLinkAddrCache(1<<63-1, 250*time.Millisecond, 1)
+	// There is a race condition causing this test to fail when the executor
+	// takes longer than the resolution timeout to call linkAddrCache.get. This
+	// is especially common when this test is run with gotsan.
+	//
+	// Using a large resolution timeout decreases the probability of experiencing
+	// this race condition and does not affect how long this test takes to run.
+	c := newLinkAddrCache(1<<63-1, math.MaxInt64, 1)
 	linkRes := &testLinkAddressResolver{cache: c}
 	for i, ta := range testAddrs {
 		got, err := getBlocking(c, ta.addr, linkRes)

From f2f92e52f6548b3b29d561d6d334a4f1fdbd8437 Mon Sep 17 00:00:00 2001
From: Fabricio Voznika <fvoznika@google.com>
Date: Tue, 8 Sep 2020 13:58:50 -0700
Subject: [PATCH 157/211] Honor readonly flag for root mount

Updates #1487

PiperOrigin-RevId: 330580699
---
 pkg/sentry/fsimpl/devtmpfs/devtmpfs.go        |   2 +-
 pkg/sentry/fsimpl/devtmpfs/devtmpfs_test.go   |   2 +-
 .../fsimpl/ext/benchmark/benchmark_test.go    |   6 +-
 pkg/sentry/fsimpl/ext/ext_test.go             |   6 +-
 pkg/sentry/fsimpl/fuse/dev_test.go            |   2 +-
 pkg/sentry/fsimpl/kernfs/kernfs_test.go       |   2 +-
 pkg/sentry/fsimpl/proc/tasks_test.go          |   2 +-
 pkg/sentry/fsimpl/sys/sys_test.go             |   2 +-
 pkg/sentry/fsimpl/tmpfs/benchmark_test.go     |   4 +-
 pkg/sentry/fsimpl/tmpfs/pipe_test.go          |   2 +-
 pkg/sentry/fsimpl/tmpfs/tmpfs_test.go         |   2 +-
 pkg/sentry/vfs/mount.go                       |   6 +-
 runsc/boot/fs.go                              |   8 +-
 runsc/boot/loader_test.go                     |   4 +-
 runsc/boot/vfs.go                             |  35 +++-
 runsc/container/container_test.go             | 180 +++++++++++-------
 runsc/container/multi_container_test.go       |  21 +-
 runsc/container/shared_volume_test.go         |  12 +-
 18 files changed, 178 insertions(+), 120 deletions(-)

diff --git a/pkg/sentry/fsimpl/devtmpfs/devtmpfs.go b/pkg/sentry/fsimpl/devtmpfs/devtmpfs.go
index 52f44f66db..a23094e540 100644
--- a/pkg/sentry/fsimpl/devtmpfs/devtmpfs.go
+++ b/pkg/sentry/fsimpl/devtmpfs/devtmpfs.go
@@ -80,7 +80,7 @@ type Accessor struct {
 // NewAccessor returns an Accessor that supports creation of device special
 // files in the devtmpfs instance registered with name fsTypeName in vfsObj.
 func NewAccessor(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *auth.Credentials, fsTypeName string) (*Accessor, error) {
-	mntns, err := vfsObj.NewMountNamespace(ctx, creds, "devtmpfs" /* source */, fsTypeName, &vfs.GetFilesystemOptions{})
+	mntns, err := vfsObj.NewMountNamespace(ctx, creds, "devtmpfs" /* source */, fsTypeName, &vfs.MountOptions{})
 	if err != nil {
 		return nil, err
 	}
diff --git a/pkg/sentry/fsimpl/devtmpfs/devtmpfs_test.go b/pkg/sentry/fsimpl/devtmpfs/devtmpfs_test.go
index 827a608cb8..3a38b8bb40 100644
--- a/pkg/sentry/fsimpl/devtmpfs/devtmpfs_test.go
+++ b/pkg/sentry/fsimpl/devtmpfs/devtmpfs_test.go
@@ -48,7 +48,7 @@ func setupDevtmpfs(t *testing.T) (context.Context, *auth.Credentials, *vfs.Virtu
 	})
 
 	// Create a test mount namespace with devtmpfs mounted at "/dev".
-	mntns, err := vfsObj.NewMountNamespace(ctx, creds, "tmpfs" /* source */, "tmpfs" /* fsTypeName */, &vfs.GetFilesystemOptions{})
+	mntns, err := vfsObj.NewMountNamespace(ctx, creds, "tmpfs" /* source */, "tmpfs" /* fsTypeName */, &vfs.MountOptions{})
 	if err != nil {
 		t.Fatalf("failed to create tmpfs root mount: %v", err)
 	}
diff --git a/pkg/sentry/fsimpl/ext/benchmark/benchmark_test.go b/pkg/sentry/fsimpl/ext/benchmark/benchmark_test.go
index a2cc9b59f7..c349b886ee 100644
--- a/pkg/sentry/fsimpl/ext/benchmark/benchmark_test.go
+++ b/pkg/sentry/fsimpl/ext/benchmark/benchmark_test.go
@@ -59,7 +59,11 @@ func setUp(b *testing.B, imagePath string) (context.Context, *vfs.VirtualFilesys
 	vfsObj.MustRegisterFilesystemType("extfs", ext.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{
 		AllowUserMount: true,
 	})
-	mntns, err := vfsObj.NewMountNamespace(ctx, creds, imagePath, "extfs", &vfs.GetFilesystemOptions{InternalData: int(f.Fd())})
+	mntns, err := vfsObj.NewMountNamespace(ctx, creds, imagePath, "extfs", &vfs.MountOptions{
+		GetFilesystemOptions: vfs.GetFilesystemOptions{
+			InternalData: int(f.Fd()),
+		},
+	})
 	if err != nil {
 		f.Close()
 		return nil, nil, nil, nil, err
diff --git a/pkg/sentry/fsimpl/ext/ext_test.go b/pkg/sentry/fsimpl/ext/ext_test.go
index 2dbaee2874..0989558cd7 100644
--- a/pkg/sentry/fsimpl/ext/ext_test.go
+++ b/pkg/sentry/fsimpl/ext/ext_test.go
@@ -71,7 +71,11 @@ func setUp(t *testing.T, imagePath string) (context.Context, *vfs.VirtualFilesys
 	vfsObj.MustRegisterFilesystemType("extfs", FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{
 		AllowUserMount: true,
 	})
-	mntns, err := vfsObj.NewMountNamespace(ctx, creds, localImagePath, "extfs", &vfs.GetFilesystemOptions{InternalData: int(f.Fd())})
+	mntns, err := vfsObj.NewMountNamespace(ctx, creds, localImagePath, "extfs", &vfs.MountOptions{
+		GetFilesystemOptions: vfs.GetFilesystemOptions{
+			InternalData: int(f.Fd()),
+		},
+	})
 	if err != nil {
 		f.Close()
 		return nil, nil, nil, nil, err
diff --git a/pkg/sentry/fsimpl/fuse/dev_test.go b/pkg/sentry/fsimpl/fuse/dev_test.go
index 1ffe7ccd2e..aedc2fa390 100644
--- a/pkg/sentry/fsimpl/fuse/dev_test.go
+++ b/pkg/sentry/fsimpl/fuse/dev_test.go
@@ -342,7 +342,7 @@ func setup(t *testing.T) *testutil.System {
 		AllowUserMount: true,
 	})
 
-	mntns, err := k.VFS().NewMountNamespace(ctx, creds, "", "tmpfs", &vfs.GetFilesystemOptions{})
+	mntns, err := k.VFS().NewMountNamespace(ctx, creds, "", "tmpfs", &vfs.MountOptions{})
 	if err != nil {
 		t.Fatalf("NewMountNamespace(): %v", err)
 	}
diff --git a/pkg/sentry/fsimpl/kernfs/kernfs_test.go b/pkg/sentry/fsimpl/kernfs/kernfs_test.go
index 675587c6b9..09806a3f2c 100644
--- a/pkg/sentry/fsimpl/kernfs/kernfs_test.go
+++ b/pkg/sentry/fsimpl/kernfs/kernfs_test.go
@@ -52,7 +52,7 @@ func newTestSystem(t *testing.T, rootFn RootDentryFn) *testutil.System {
 	v.MustRegisterFilesystemType("testfs", &fsType{rootFn: rootFn}, &vfs.RegisterFilesystemTypeOptions{
 		AllowUserMount: true,
 	})
-	mns, err := v.NewMountNamespace(ctx, creds, "", "testfs", &vfs.GetFilesystemOptions{})
+	mns, err := v.NewMountNamespace(ctx, creds, "", "testfs", &vfs.MountOptions{})
 	if err != nil {
 		t.Fatalf("Failed to create testfs root mount: %v", err)
 	}
diff --git a/pkg/sentry/fsimpl/proc/tasks_test.go b/pkg/sentry/fsimpl/proc/tasks_test.go
index d82b3d2f37..f693f9060d 100644
--- a/pkg/sentry/fsimpl/proc/tasks_test.go
+++ b/pkg/sentry/fsimpl/proc/tasks_test.go
@@ -104,7 +104,7 @@ func setup(t *testing.T) *testutil.System {
 		AllowUserMount: true,
 	})
 
-	mntns, err := k.VFS().NewMountNamespace(ctx, creds, "", tmpfs.Name, &vfs.GetFilesystemOptions{})
+	mntns, err := k.VFS().NewMountNamespace(ctx, creds, "", tmpfs.Name, &vfs.MountOptions{})
 	if err != nil {
 		t.Fatalf("NewMountNamespace(): %v", err)
 	}
diff --git a/pkg/sentry/fsimpl/sys/sys_test.go b/pkg/sentry/fsimpl/sys/sys_test.go
index 9fd38b2957..0a0d914cc1 100644
--- a/pkg/sentry/fsimpl/sys/sys_test.go
+++ b/pkg/sentry/fsimpl/sys/sys_test.go
@@ -38,7 +38,7 @@ func newTestSystem(t *testing.T) *testutil.System {
 		AllowUserMount: true,
 	})
 
-	mns, err := k.VFS().NewMountNamespace(ctx, creds, "", sys.Name, &vfs.GetFilesystemOptions{})
+	mns, err := k.VFS().NewMountNamespace(ctx, creds, "", sys.Name, &vfs.MountOptions{})
 	if err != nil {
 		t.Fatalf("Failed to create new mount namespace: %v", err)
 	}
diff --git a/pkg/sentry/fsimpl/tmpfs/benchmark_test.go b/pkg/sentry/fsimpl/tmpfs/benchmark_test.go
index e5a4218e86..5209a17af9 100644
--- a/pkg/sentry/fsimpl/tmpfs/benchmark_test.go
+++ b/pkg/sentry/fsimpl/tmpfs/benchmark_test.go
@@ -182,7 +182,7 @@ func BenchmarkVFS2TmpfsStat(b *testing.B) {
 			vfsObj.MustRegisterFilesystemType("tmpfs", tmpfs.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{
 				AllowUserMount: true,
 			})
-			mntns, err := vfsObj.NewMountNamespace(ctx, creds, "", "tmpfs", &vfs.GetFilesystemOptions{})
+			mntns, err := vfsObj.NewMountNamespace(ctx, creds, "", "tmpfs", &vfs.MountOptions{})
 			if err != nil {
 				b.Fatalf("failed to create tmpfs root mount: %v", err)
 			}
@@ -376,7 +376,7 @@ func BenchmarkVFS2TmpfsMountStat(b *testing.B) {
 			vfsObj.MustRegisterFilesystemType("tmpfs", tmpfs.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{
 				AllowUserMount: true,
 			})
-			mntns, err := vfsObj.NewMountNamespace(ctx, creds, "", "tmpfs", &vfs.GetFilesystemOptions{})
+			mntns, err := vfsObj.NewMountNamespace(ctx, creds, "", "tmpfs", &vfs.MountOptions{})
 			if err != nil {
 				b.Fatalf("failed to create tmpfs root mount: %v", err)
 			}
diff --git a/pkg/sentry/fsimpl/tmpfs/pipe_test.go b/pkg/sentry/fsimpl/tmpfs/pipe_test.go
index ec2701d8b8..be29a2363d 100644
--- a/pkg/sentry/fsimpl/tmpfs/pipe_test.go
+++ b/pkg/sentry/fsimpl/tmpfs/pipe_test.go
@@ -158,7 +158,7 @@ func setup(t *testing.T) (context.Context, *auth.Credentials, *vfs.VirtualFilesy
 	vfsObj.MustRegisterFilesystemType("tmpfs", FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{
 		AllowUserMount: true,
 	})
-	mntns, err := vfsObj.NewMountNamespace(ctx, creds, "", "tmpfs", &vfs.GetFilesystemOptions{})
+	mntns, err := vfsObj.NewMountNamespace(ctx, creds, "", "tmpfs", &vfs.MountOptions{})
 	if err != nil {
 		t.Fatalf("failed to create tmpfs root mount: %v", err)
 	}
diff --git a/pkg/sentry/fsimpl/tmpfs/tmpfs_test.go b/pkg/sentry/fsimpl/tmpfs/tmpfs_test.go
index 6f3e3ae6f4..99c8e3c0f9 100644
--- a/pkg/sentry/fsimpl/tmpfs/tmpfs_test.go
+++ b/pkg/sentry/fsimpl/tmpfs/tmpfs_test.go
@@ -41,7 +41,7 @@ func newTmpfsRoot(ctx context.Context) (*vfs.VirtualFilesystem, vfs.VirtualDentr
 	vfsObj.MustRegisterFilesystemType("tmpfs", FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{
 		AllowUserMount: true,
 	})
-	mntns, err := vfsObj.NewMountNamespace(ctx, creds, "", "tmpfs", &vfs.GetFilesystemOptions{})
+	mntns, err := vfsObj.NewMountNamespace(ctx, creds, "", "tmpfs", &vfs.MountOptions{})
 	if err != nil {
 		return nil, vfs.VirtualDentry{}, nil, fmt.Errorf("failed to create tmpfs root mount: %v", err)
 	}
diff --git a/pkg/sentry/vfs/mount.go b/pkg/sentry/vfs/mount.go
index db5fb3bb12..06ca919893 100644
--- a/pkg/sentry/vfs/mount.go
+++ b/pkg/sentry/vfs/mount.go
@@ -154,13 +154,13 @@ type MountNamespace struct {
 // NewMountNamespace returns a new mount namespace with a root filesystem
 // configured by the given arguments. A reference is taken on the returned
 // MountNamespace.
-func (vfs *VirtualFilesystem) NewMountNamespace(ctx context.Context, creds *auth.Credentials, source, fsTypeName string, opts *GetFilesystemOptions) (*MountNamespace, error) {
+func (vfs *VirtualFilesystem) NewMountNamespace(ctx context.Context, creds *auth.Credentials, source, fsTypeName string, opts *MountOptions) (*MountNamespace, error) {
 	rft := vfs.getFilesystemType(fsTypeName)
 	if rft == nil {
 		ctx.Warningf("Unknown filesystem type: %s", fsTypeName)
 		return nil, syserror.ENODEV
 	}
-	fs, root, err := rft.fsType.GetFilesystem(ctx, vfs, creds, source, *opts)
+	fs, root, err := rft.fsType.GetFilesystem(ctx, vfs, creds, source, opts.GetFilesystemOptions)
 	if err != nil {
 		return nil, err
 	}
@@ -169,7 +169,7 @@ func (vfs *VirtualFilesystem) NewMountNamespace(ctx context.Context, creds *auth
 		mountpoints: make(map[*Dentry]uint32),
 	}
 	mntns.EnableLeakCheck()
-	mntns.root = newMount(vfs, fs, root, mntns, &MountOptions{})
+	mntns.root = newMount(vfs, fs, root, mntns, opts)
 	return mntns, nil
 }
 
diff --git a/runsc/boot/fs.go b/runsc/boot/fs.go
index e2c5f5fb1d..ddf2884563 100644
--- a/runsc/boot/fs.go
+++ b/runsc/boot/fs.go
@@ -254,7 +254,7 @@ func mustFindFilesystem(name string) fs.Filesystem {
 
 // addSubmountOverlay overlays the inode over a ramfs tree containing the given
 // paths.
-func addSubmountOverlay(ctx context.Context, inode *fs.Inode, submounts []string) (*fs.Inode, error) {
+func addSubmountOverlay(ctx context.Context, inode *fs.Inode, submounts []string, mf fs.MountSourceFlags) (*fs.Inode, error) {
 	// Construct a ramfs tree of mount points. The contents never
 	// change, so this can be fully caching. There's no real
 	// filesystem backing this tree, so we set the filesystem to
@@ -264,7 +264,7 @@ func addSubmountOverlay(ctx context.Context, inode *fs.Inode, submounts []string
 	if err != nil {
 		return nil, fmt.Errorf("creating mount tree: %v", err)
 	}
-	overlayInode, err := fs.NewOverlayRoot(ctx, inode, mountTree, fs.MountSourceFlags{})
+	overlayInode, err := fs.NewOverlayRoot(ctx, inode, mountTree, mf)
 	if err != nil {
 		return nil, fmt.Errorf("adding mount overlay: %v", err)
 	}
@@ -741,7 +741,7 @@ func (c *containerMounter) createRootMount(ctx context.Context, conf *config.Con
 	// for submount paths.  "/dev" "/sys" "/proc" and "/tmp" are always
 	// mounted even if they are not in the spec.
 	submounts := append(subtargets("/", c.mounts), "/dev", "/sys", "/proc", "/tmp")
-	rootInode, err = addSubmountOverlay(ctx, rootInode, submounts)
+	rootInode, err = addSubmountOverlay(ctx, rootInode, submounts, mf)
 	if err != nil {
 		return nil, fmt.Errorf("adding submount overlay: %v", err)
 	}
@@ -851,7 +851,7 @@ func (c *containerMounter) mountSubmount(ctx context.Context, conf *config.Confi
 	submounts := subtargets(m.Destination, c.mounts)
 	if len(submounts) > 0 {
 		log.Infof("Adding submount overlay over %q", m.Destination)
-		inode, err = addSubmountOverlay(ctx, inode, submounts)
+		inode, err = addSubmountOverlay(ctx, inode, submounts, mf)
 		if err != nil {
 			return fmt.Errorf("adding submount overlay: %v", err)
 		}
diff --git a/runsc/boot/loader_test.go b/runsc/boot/loader_test.go
index dc9861389c..bf9ec5d382 100644
--- a/runsc/boot/loader_test.go
+++ b/runsc/boot/loader_test.go
@@ -491,9 +491,9 @@ func TestCreateMountNamespaceVFS2(t *testing.T) {
 			}
 
 			ctx := l.k.SupervisorContext()
-			mns, err := mntr.setupVFS2(ctx, l.root.conf, &l.root.procArgs)
+			mns, err := mntr.mountAll(l.root.conf, &l.root.procArgs)
 			if err != nil {
-				t.Fatalf("failed to setupVFS2: %v", err)
+				t.Fatalf("mountAll: %v", err)
 			}
 
 			root := mns.Root()
diff --git a/runsc/boot/vfs.go b/runsc/boot/vfs.go
index 66b6cf19b9..7844ea28ce 100644
--- a/runsc/boot/vfs.go
+++ b/runsc/boot/vfs.go
@@ -134,7 +134,7 @@ func registerFilesystems(k *kernel.Kernel) error {
 }
 
 func setupContainerVFS2(ctx context.Context, conf *config.Config, mntr *containerMounter, procArgs *kernel.CreateProcessArgs) error {
-	mns, err := mntr.setupVFS2(ctx, conf, procArgs)
+	mns, err := mntr.mountAll(conf, procArgs)
 	if err != nil {
 		return fmt.Errorf("failed to setupFS: %w", err)
 	}
@@ -149,7 +149,7 @@ func setupContainerVFS2(ctx context.Context, conf *config.Config, mntr *containe
 	return nil
 }
 
-func (c *containerMounter) setupVFS2(ctx context.Context, conf *config.Config, procArgs *kernel.CreateProcessArgs) (*vfs.MountNamespace, error) {
+func (c *containerMounter) mountAll(conf *config.Config, procArgs *kernel.CreateProcessArgs) (*vfs.MountNamespace, error) {
 	log.Infof("Configuring container's file system with VFS2")
 
 	// Create context with root credentials to mount the filesystem (the current
@@ -172,24 +172,44 @@ func (c *containerMounter) setupVFS2(ctx context.Context, conf *config.Config, p
 	if err := c.mountSubmountsVFS2(rootCtx, conf, mns, rootCreds); err != nil {
 		return nil, fmt.Errorf("mounting submounts vfs2: %w", err)
 	}
+
+	if c.root.Readonly || conf.Overlay {
+		// Switch to ReadOnly after all submounts were setup.
+		root := mns.Root()
+		defer root.DecRef(rootCtx)
+		if err := c.k.VFS().SetMountReadOnly(root.Mount(), true); err != nil {
+			return nil, fmt.Errorf(`failed to set mount at "/" readonly: %v`, err)
+		}
+	}
+
 	return mns, nil
 }
 
+// createMountNamespaceVFS2 creates the container's root mount and namespace.
+// The mount is created ReadWrite to allow mount point for submounts to be
+// created. ** The caller is responsible to switch to ReadOnly if needed **
 func (c *containerMounter) createMountNamespaceVFS2(ctx context.Context, conf *config.Config, creds *auth.Credentials) (*vfs.MountNamespace, error) {
 	fd := c.fds.remove()
-	opts := p9MountData(fd, conf.FileAccess, true /* vfs2 */)
+	data := p9MountData(fd, conf.FileAccess, true /* vfs2 */)
 
 	if conf.OverlayfsStaleRead {
 		// We can't check for overlayfs here because sandbox is chroot'ed and gofer
 		// can only send mount options for specs.Mounts (specs.Root is missing
 		// Options field). So assume root is always on top of overlayfs.
-		opts = append(opts, "overlayfs_stale_read")
+		data = append(data, "overlayfs_stale_read")
 	}
 
 	log.Infof("Mounting root over 9P, ioFD: %d", fd)
-	mns, err := c.k.VFS().NewMountNamespace(ctx, creds, "", gofer.Name, &vfs.GetFilesystemOptions{
-		Data: strings.Join(opts, ","),
-	})
+	opts := &vfs.MountOptions{
+		// Always mount as ReadWrite to allow other mounts on top of it. It'll be
+		// made ReadOnly in the caller (if needed).
+		ReadOnly: false,
+		GetFilesystemOptions: vfs.GetFilesystemOptions{
+			Data: strings.Join(data, ","),
+		},
+		InternalMount: true,
+	}
+	mns, err := c.k.VFS().NewMountNamespace(ctx, creds, "", gofer.Name, opts)
 	if err != nil {
 		return nil, fmt.Errorf("setting up mount namespace: %w", err)
 	}
@@ -227,6 +247,7 @@ func (c *containerMounter) mountSubmountsVFS2(ctx context.Context, conf *config.
 			if err := c.k.VFS().SetMountReadOnly(mnt, false); err != nil {
 				return fmt.Errorf("failed to set mount at %q readwrite: %v", submount.Destination, err)
 			}
+			// Restore back to ReadOnly at the end.
 			defer func() {
 				if err := c.k.VFS().SetMountReadOnly(mnt, true); err != nil {
 					panic(fmt.Sprintf("failed to restore mount at %q back to readonly: %v", submount.Destination, err))
diff --git a/runsc/container/container_test.go b/runsc/container/container_test.go
index 6082068c75..33ada5bb9b 100644
--- a/runsc/container/container_test.go
+++ b/runsc/container/container_test.go
@@ -41,6 +41,7 @@ import (
 	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
 	"gvisor.dev/gvisor/pkg/sync"
 	"gvisor.dev/gvisor/pkg/test/testutil"
+	"gvisor.dev/gvisor/pkg/urpc"
 	"gvisor.dev/gvisor/runsc/boot/platforms"
 	"gvisor.dev/gvisor/runsc/config"
 	"gvisor.dev/gvisor/runsc/specutils"
@@ -1490,6 +1491,8 @@ func TestMountNewDir(t *testing.T) {
 				Source:      srcDir,
 				Type:        "bind",
 			})
+			// Extra points for creating the mount with a readonly root.
+			spec.Root.Readonly = true
 
 			if err := run(spec, conf); err != nil {
 				t.Fatalf("error running sandbox: %v", err)
@@ -1499,17 +1502,17 @@ func TestMountNewDir(t *testing.T) {
 }
 
 func TestReadonlyRoot(t *testing.T) {
-	for name, conf := range configsWithVFS2(t, overlay) {
+	for name, conf := range configsWithVFS2(t, all...) {
 		t.Run(name, func(t *testing.T) {
-			spec := testutil.NewSpecWithArgs("/bin/touch", "/foo")
+			spec := testutil.NewSpecWithArgs("sleep", "100")
 			spec.Root.Readonly = true
+
 			_, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf)
 			if err != nil {
 				t.Fatalf("error setting up container: %v", err)
 			}
 			defer cleanup()
 
-			// Create, start and wait for the container.
 			args := Args{
 				ID:        testutil.RandomContainerID(),
 				Spec:      spec,
@@ -1524,12 +1527,82 @@ func TestReadonlyRoot(t *testing.T) {
 				t.Fatalf("error starting container: %v", err)
 			}
 
-			ws, err := c.Wait()
+			// Read mounts to check that root is readonly.
+			out, ws, err := executeCombinedOutput(c, "/bin/sh", "-c", "mount | grep ' / '")
+			if err != nil || ws != 0 {
+				t.Fatalf("exec failed, ws: %v, err: %v", ws, err)
+			}
+			t.Logf("root mount: %q", out)
+			if !strings.Contains(string(out), "(ro)") {
+				t.Errorf("root not mounted readonly: %q", out)
+			}
+
+			// Check that file cannot be created.
+			ws, err = execute(c, "/bin/touch", "/foo")
 			if err != nil {
-				t.Fatalf("error waiting on container: %v", err)
+				t.Fatalf("touch file in ro mount: %v", err)
 			}
 			if !ws.Exited() || syscall.Errno(ws.ExitStatus()) != syscall.EPERM {
-				t.Fatalf("container failed, waitStatus: %v", ws)
+				t.Fatalf("wrong waitStatus: %v", ws)
+			}
+		})
+	}
+}
+
+func TestReadonlyMount(t *testing.T) {
+	for name, conf := range configsWithVFS2(t, all...) {
+		t.Run(name, func(t *testing.T) {
+			dir, err := ioutil.TempDir(testutil.TmpDir(), "ro-mount")
+			if err != nil {
+				t.Fatalf("ioutil.TempDir() failed: %v", err)
+			}
+			spec := testutil.NewSpecWithArgs("sleep", "100")
+			spec.Mounts = append(spec.Mounts, specs.Mount{
+				Destination: dir,
+				Source:      dir,
+				Type:        "bind",
+				Options:     []string{"ro"},
+			})
+			spec.Root.Readonly = false
+
+			_, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf)
+			if err != nil {
+				t.Fatalf("error setting up container: %v", err)
+			}
+			defer cleanup()
+
+			args := Args{
+				ID:        testutil.RandomContainerID(),
+				Spec:      spec,
+				BundleDir: bundleDir,
+			}
+			c, err := New(conf, args)
+			if err != nil {
+				t.Fatalf("error creating container: %v", err)
+			}
+			defer c.Destroy()
+			if err := c.Start(conf); err != nil {
+				t.Fatalf("error starting container: %v", err)
+			}
+
+			// Read mounts to check that volume is readonly.
+			cmd := fmt.Sprintf("mount | grep ' %s '", dir)
+			out, ws, err := executeCombinedOutput(c, "/bin/sh", "-c", cmd)
+			if err != nil || ws != 0 {
+				t.Fatalf("exec failed, ws: %v, err: %v", ws, err)
+			}
+			t.Logf("mount: %q", out)
+			if !strings.Contains(string(out), "(ro)") {
+				t.Errorf("volume not mounted readonly: %q", out)
+			}
+
+			// Check that file cannot be created.
+			ws, err = execute(c, "/bin/touch", path.Join(dir, "file"))
+			if err != nil {
+				t.Fatalf("touch file in ro mount: %v", err)
+			}
+			if !ws.Exited() || syscall.Errno(ws.ExitStatus()) != syscall.EPERM {
+				t.Fatalf("wrong WaitStatus: %v", ws)
 			}
 		})
 	}
@@ -1616,54 +1689,6 @@ func TestUIDMap(t *testing.T) {
 	}
 }
 
-func TestReadonlyMount(t *testing.T) {
-	for name, conf := range configsWithVFS2(t, overlay) {
-		t.Run(name, func(t *testing.T) {
-			dir, err := ioutil.TempDir(testutil.TmpDir(), "ro-mount")
-			spec := testutil.NewSpecWithArgs("/bin/touch", path.Join(dir, "file"))
-			if err != nil {
-				t.Fatalf("ioutil.TempDir() failed: %v", err)
-			}
-			spec.Mounts = append(spec.Mounts, specs.Mount{
-				Destination: dir,
-				Source:      dir,
-				Type:        "bind",
-				Options:     []string{"ro"},
-			})
-			spec.Root.Readonly = false
-
-			_, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf)
-			if err != nil {
-				t.Fatalf("error setting up container: %v", err)
-			}
-			defer cleanup()
-
-			// Create, start and wait for the container.
-			args := Args{
-				ID:        testutil.RandomContainerID(),
-				Spec:      spec,
-				BundleDir: bundleDir,
-			}
-			c, err := New(conf, args)
-			if err != nil {
-				t.Fatalf("error creating container: %v", err)
-			}
-			defer c.Destroy()
-			if err := c.Start(conf); err != nil {
-				t.Fatalf("error starting container: %v", err)
-			}
-
-			ws, err := c.Wait()
-			if err != nil {
-				t.Fatalf("error waiting on container: %v", err)
-			}
-			if !ws.Exited() || syscall.Errno(ws.ExitStatus()) != syscall.EPERM {
-				t.Fatalf("container failed, waitStatus: %v", ws)
-			}
-		})
-	}
-}
-
 // TestAbbreviatedIDs checks that runsc supports using abbreviated container
 // IDs in place of full IDs.
 func TestAbbreviatedIDs(t *testing.T) {
@@ -2116,21 +2141,13 @@ func TestMountPropagation(t *testing.T) {
 
 	// Check that mount didn't propagate to private mount.
 	privFile := filepath.Join(priv, "mnt", "file")
-	execArgs := &control.ExecArgs{
-		Filename: "/usr/bin/test",
-		Argv:     []string{"test", "!", "-f", privFile},
-	}
-	if ws, err := cont.executeSync(execArgs); err != nil || ws != 0 {
+	if ws, err := execute(cont, "/usr/bin/test", "!", "-f", privFile); err != nil || ws != 0 {
 		t.Fatalf("exec: test ! -f %q, ws: %v, err: %v", privFile, ws, err)
 	}
 
 	// Check that mount propagated to slave mount.
 	slaveFile := filepath.Join(slave, "mnt", "file")
-	execArgs = &control.ExecArgs{
-		Filename: "/usr/bin/test",
-		Argv:     []string{"test", "-f", slaveFile},
-	}
-	if ws, err := cont.executeSync(execArgs); err != nil || ws != 0 {
+	if ws, err := execute(cont, "/usr/bin/test", "-f", slaveFile); err != nil || ws != 0 {
 		t.Fatalf("exec: test -f %q, ws: %v, err: %v", privFile, ws, err)
 	}
 }
@@ -2196,11 +2213,7 @@ func TestMountSymlink(t *testing.T) {
 			// Check that symlink was resolved and mount was created where the symlink
 			// is pointing to.
 			file := path.Join(target, "file")
-			execArgs := &control.ExecArgs{
-				Filename: "/usr/bin/test",
-				Argv:     []string{"test", "-f", file},
-			}
-			if ws, err := cont.executeSync(execArgs); err != nil || ws != 0 {
+			if ws, err := execute(cont, "/usr/bin/test", "-f", file); err != nil || ws != 0 {
 				t.Fatalf("exec: test -f %q, ws: %v, err: %v", file, ws, err)
 			}
 		})
@@ -2326,6 +2339,35 @@ func TestTTYField(t *testing.T) {
 	}
 }
 
+func execute(cont *Container, name string, arg ...string) (syscall.WaitStatus, error) {
+	args := &control.ExecArgs{
+		Filename: name,
+		Argv:     append([]string{name}, arg...),
+	}
+	return cont.executeSync(args)
+}
+
+func executeCombinedOutput(cont *Container, name string, arg ...string) ([]byte, syscall.WaitStatus, error) {
+	r, w, err := os.Pipe()
+	if err != nil {
+		return nil, 0, err
+	}
+	defer r.Close()
+
+	args := &control.ExecArgs{
+		Filename:    name,
+		Argv:        append([]string{name}, arg...),
+		FilePayload: urpc.FilePayload{Files: []*os.File{os.Stdin, w, w}},
+	}
+	ws, err := cont.executeSync(args)
+	w.Close()
+	if err != nil {
+		return nil, 0, err
+	}
+	out, err := ioutil.ReadAll(r)
+	return out, ws, err
+}
+
 // executeSync synchronously executes a new process.
 func (cont *Container) executeSync(args *control.ExecArgs) (syscall.WaitStatus, error) {
 	pid, err := cont.Execute(args)
diff --git a/runsc/container/multi_container_test.go b/runsc/container/multi_container_test.go
index 5b790c6c8d..952215ec19 100644
--- a/runsc/container/multi_container_test.go
+++ b/runsc/container/multi_container_test.go
@@ -1517,8 +1517,7 @@ func TestMultiContainerGoferKilled(t *testing.T) {
 	}
 
 	// Check that container isn't running anymore.
-	args := &control.ExecArgs{Argv: []string{"/bin/true"}}
-	if _, err := c.executeSync(args); err == nil {
+	if _, err := execute(c, "/bin/true"); err == nil {
 		t.Fatalf("Container %q was not stopped after gofer death", c.ID)
 	}
 
@@ -1533,8 +1532,7 @@ func TestMultiContainerGoferKilled(t *testing.T) {
 		if err := waitForProcessList(c, pl); err != nil {
 			t.Errorf("Container %q was affected by another container: %v", c.ID, err)
 		}
-		args := &control.ExecArgs{Argv: []string{"/bin/true"}}
-		if _, err := c.executeSync(args); err != nil {
+		if _, err := execute(c, "/bin/true"); err != nil {
 			t.Fatalf("Container %q was affected by another container: %v", c.ID, err)
 		}
 	}
@@ -1556,8 +1554,7 @@ func TestMultiContainerGoferKilled(t *testing.T) {
 
 	// Check that entire sandbox isn't running anymore.
 	for _, c := range containers {
-		args := &control.ExecArgs{Argv: []string{"/bin/true"}}
-		if _, err := c.executeSync(args); err == nil {
+		if _, err := execute(c, "/bin/true"); err == nil {
 			t.Fatalf("Container %q was not stopped after gofer death", c.ID)
 		}
 	}
@@ -1719,12 +1716,11 @@ func TestMultiContainerHomeEnvDir(t *testing.T) {
 				homeDirs[name] = homeFile
 			}
 
-			// We will sleep in the root container in order to ensure that
-			// the root container doesn't terminate before sub containers can be
-			// created.
+			// We will sleep in the root container in order to ensure that the root
+			//container doesn't terminate before sub containers can be created.
 			rootCmd := []string{"/bin/sh", "-c", fmt.Sprintf("printf \"$HOME\" > %s; sleep 1000", homeDirs["root"].Name())}
 			subCmd := []string{"/bin/sh", "-c", fmt.Sprintf("printf \"$HOME\" > %s", homeDirs["sub"].Name())}
-			execCmd := []string{"/bin/sh", "-c", fmt.Sprintf("printf \"$HOME\" > %s", homeDirs["exec"].Name())}
+			execCmd := fmt.Sprintf("printf \"$HOME\" > %s", homeDirs["exec"].Name())
 
 			// Setup the containers, a root container and sub container.
 			specConfig, ids := createSpecs(rootCmd, subCmd)
@@ -1735,9 +1731,8 @@ func TestMultiContainerHomeEnvDir(t *testing.T) {
 			defer cleanup()
 
 			// Exec into the root container synchronously.
-			args := &control.ExecArgs{Argv: execCmd}
-			if _, err := containers[0].executeSync(args); err != nil {
-				t.Errorf("error executing %+v: %v", args, err)
+			if _, err := execute(containers[0], "/bin/sh", "-c", execCmd); err != nil {
+				t.Errorf("error executing %+v: %v", execCmd, err)
 			}
 
 			// Wait for the subcontainer to finish.
diff --git a/runsc/container/shared_volume_test.go b/runsc/container/shared_volume_test.go
index 4ea8fefeea..cb5bffb89b 100644
--- a/runsc/container/shared_volume_test.go
+++ b/runsc/container/shared_volume_test.go
@@ -168,11 +168,7 @@ func TestSharedVolume(t *testing.T) {
 
 func checkFile(c *Container, filename string, want []byte) error {
 	cpy := filename + ".copy"
-	argsCp := &control.ExecArgs{
-		Filename: "/bin/cp",
-		Argv:     []string{"cp", "-f", filename, cpy},
-	}
-	if _, err := c.executeSync(argsCp); err != nil {
+	if _, err := execute(c, "/bin/cp", "-f", filename, cpy); err != nil {
 		return fmt.Errorf("unexpected error copying file %q to %q: %v", filename, cpy, err)
 	}
 	got, err := ioutil.ReadFile(cpy)
@@ -235,11 +231,7 @@ func TestSharedVolumeFile(t *testing.T) {
 	}
 
 	// Append to file inside the container and check that content is not lost.
-	argsAppend := &control.ExecArgs{
-		Filename: "/bin/bash",
-		Argv:     []string{"bash", "-c", "echo -n sandbox- >> " + filename},
-	}
-	if _, err := c.executeSync(argsAppend); err != nil {
+	if _, err := execute(c, "/bin/bash", "-c", "echo -n sandbox- >> "+filename); err != nil {
 		t.Fatalf("unexpected error appending file %q: %v", filename, err)
 	}
 	want = []byte("host-sandbox-")

From a530de2b29c5f5fa0236ce95f7bc788effab446a Mon Sep 17 00:00:00 2001
From: Ayush Ranjan <ayushranjan@google.com>
Date: Tue, 8 Sep 2020 14:40:57 -0700
Subject: [PATCH 158/211] [vfs] overlayfs: decref VD when not using it.

overlay/filesystem.go:lookupLocked() did not DecRef the VD on some error paths
when it would not end up saving or using the VD.

PiperOrigin-RevId: 330589742
---
 pkg/sentry/fsimpl/overlay/filesystem.go | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pkg/sentry/fsimpl/overlay/filesystem.go b/pkg/sentry/fsimpl/overlay/filesystem.go
index 87afeeaf30..b530851c52 100644
--- a/pkg/sentry/fsimpl/overlay/filesystem.go
+++ b/pkg/sentry/fsimpl/overlay/filesystem.go
@@ -211,6 +211,7 @@ func (fs *filesystem) lookupLocked(ctx context.Context, parent *dentry, name str
 			lookupErr = err
 			return false
 		}
+		defer childVD.DecRef(ctx)
 
 		mask := uint32(linux.STATX_TYPE)
 		if !existsOnAnyLayer {
@@ -249,6 +250,7 @@ func (fs *filesystem) lookupLocked(ctx context.Context, parent *dentry, name str
 		}
 
 		// Update child to include this layer.
+		childVD.IncRef()
 		if isUpper {
 			child.upperVD = childVD
 			child.copiedUp = 1

From 0170be90f67b4dc075710d1cd82ccd8d3ac9156a Mon Sep 17 00:00:00 2001
From: gVisor bot <gvisor-bot@google.com>
Date: Tue, 8 Sep 2020 15:52:01 -0700
Subject: [PATCH 159/211] Implement ioctl with enable verity

ioctl with FS_IOC_ENABLE_VERITY is added to verity file system to enable
a file as verity file. For a file, a Merkle tree is built with its data.
For a directory, a Merkle tree is built with the root hashes of its
children.

PiperOrigin-RevId: 330604368
---
 pkg/abi/linux/ioctl.go             |   5 ++
 pkg/sentry/fsimpl/verity/BUILD     |   2 +
 pkg/sentry/fsimpl/verity/verity.go | 128 +++++++++++++++++++++++++++++
 3 files changed, 135 insertions(+)

diff --git a/pkg/abi/linux/ioctl.go b/pkg/abi/linux/ioctl.go
index d6dbedc3e5..a4fe7501da 100644
--- a/pkg/abi/linux/ioctl.go
+++ b/pkg/abi/linux/ioctl.go
@@ -113,6 +113,11 @@ const (
 	_IOC_DIRSHIFT  = _IOC_SIZESHIFT + _IOC_SIZEBITS
 )
 
+// Constants from uapi/linux/fsverity.h.
+const (
+	FS_IOC_ENABLE_VERITY = 1082156677
+)
+
 // IOC outputs the result of _IOC macro in asm-generic/ioctl.h.
 func IOC(dir, typ, nr, size uint32) uint32 {
 	return uint32(dir)<<_IOC_DIRSHIFT | typ<<_IOC_TYPESHIFT | nr<<_IOC_NRSHIFT | size<<_IOC_SIZESHIFT
diff --git a/pkg/sentry/fsimpl/verity/BUILD b/pkg/sentry/fsimpl/verity/BUILD
index 326c4ed902..d28450e53d 100644
--- a/pkg/sentry/fsimpl/verity/BUILD
+++ b/pkg/sentry/fsimpl/verity/BUILD
@@ -14,11 +14,13 @@ go_library(
         "//pkg/context",
         "//pkg/fspath",
         "//pkg/merkletree",
+        "//pkg/sentry/arch",
         "//pkg/sentry/fs/lock",
         "//pkg/sentry/kernel/auth",
         "//pkg/sentry/socket/unix/transport",
         "//pkg/sentry/vfs",
         "//pkg/sync",
         "//pkg/syserror",
+        "//pkg/usermem",
     ],
 )
diff --git a/pkg/sentry/fsimpl/verity/verity.go b/pkg/sentry/fsimpl/verity/verity.go
index eedb5f4848..0bac8e9382 100644
--- a/pkg/sentry/fsimpl/verity/verity.go
+++ b/pkg/sentry/fsimpl/verity/verity.go
@@ -22,16 +22,21 @@
 package verity
 
 import (
+	"strconv"
 	"sync/atomic"
 
 	"gvisor.dev/gvisor/pkg/abi/linux"
 	"gvisor.dev/gvisor/pkg/context"
 	"gvisor.dev/gvisor/pkg/fspath"
+
+	"gvisor.dev/gvisor/pkg/merkletree"
+	"gvisor.dev/gvisor/pkg/sentry/arch"
 	fslock "gvisor.dev/gvisor/pkg/sentry/fs/lock"
 	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
 	"gvisor.dev/gvisor/pkg/sentry/vfs"
 	"gvisor.dev/gvisor/pkg/sync"
 	"gvisor.dev/gvisor/pkg/syserror"
+	"gvisor.dev/gvisor/pkg/usermem"
 )
 
 // Name is the default filesystem name.
@@ -471,6 +476,129 @@ func (fd *fileDescription) SetStat(ctx context.Context, opts vfs.SetStatOptions)
 	return syserror.EPERM
 }
 
+// generateMerkle generates a Merkle tree file for fd. If fd points to a file
+// /foo/bar, a Merkle tree file /foo/.merkle.verity.bar is generated. The root
+// hash of the generated Merkle tree and the data size is returned.
+// If fd points to a regular file, the data is the content of the file. If fd
+// points to a directory, the data is all root hahes of its children, written
+// to the Merkle tree file.
+func (fd *fileDescription) generateMerkle(ctx context.Context) ([]byte, uint64, error) {
+	fdReader := vfs.FileReadWriteSeeker{
+		FD:  fd.lowerFD,
+		Ctx: ctx,
+	}
+	merkleReader := vfs.FileReadWriteSeeker{
+		FD:  fd.merkleReader,
+		Ctx: ctx,
+	}
+	merkleWriter := vfs.FileReadWriteSeeker{
+		FD:  fd.merkleWriter,
+		Ctx: ctx,
+	}
+	var rootHash []byte
+	var dataSize uint64
+
+	switch atomic.LoadUint32(&fd.d.mode) & linux.S_IFMT {
+	case linux.S_IFREG:
+		// For a regular file, generate a Merkle tree based on its
+		// content.
+		var err error
+		stat, err := fd.lowerFD.Stat(ctx, vfs.StatOptions{})
+		if err != nil {
+			return nil, 0, err
+		}
+		dataSize = stat.Size
+
+		rootHash, err = merkletree.Generate(&fdReader, int64(dataSize), &merkleReader, &merkleWriter, false /* dataAndTreeInSameFile */)
+		if err != nil {
+			return nil, 0, err
+		}
+	case linux.S_IFDIR:
+		// For a directory, generate a Merkle tree based on the root
+		// hashes of its children that has already been written to the
+		// Merkle tree file.
+		merkleStat, err := fd.merkleReader.Stat(ctx, vfs.StatOptions{})
+		if err != nil {
+			return nil, 0, err
+		}
+		dataSize = merkleStat.Size
+
+		rootHash, err = merkletree.Generate(&merkleReader, int64(dataSize), &merkleReader, &merkleWriter, true /* dataAndTreeInSameFile */)
+		if err != nil {
+			return nil, 0, err
+		}
+	default:
+		// TODO(b/167728857): Investigate whether and how we should
+		// enable other types of file.
+		return nil, 0, syserror.EINVAL
+	}
+	return rootHash, dataSize, nil
+}
+
+// enableVerity enables verity features on fd by generating a Merkle tree file
+// and stores its root hash in its parent directory's Merkle tree.
+func (fd *fileDescription) enableVerity(ctx context.Context, uio usermem.IO, args arch.SyscallArguments) (uintptr, error) {
+	if !fd.d.fs.allowRuntimeEnable {
+		return 0, syserror.EPERM
+	}
+
+	// Lock to prevent other threads performing enable or access the file
+	// while it's being enabled.
+	verityMu.Lock()
+	defer verityMu.Unlock()
+
+	if fd.lowerFD == nil || fd.merkleReader == nil || fd.merkleWriter == nil || fd.parentMerkleWriter == nil {
+		panic("Unexpected verity fd: missing expected underlying fds")
+	}
+
+	rootHash, dataSize, err := fd.generateMerkle(ctx)
+	if err != nil {
+		return 0, err
+	}
+
+	stat, err := fd.parentMerkleWriter.Stat(ctx, vfs.StatOptions{})
+	if err != nil {
+		return 0, err
+	}
+
+	// Write the root hash of fd to the parent directory's Merkle tree
+	// file, as it should be part of the parent Merkle tree data.
+	// parentMerkleWriter is open with O_APPEND, so it should write
+	// directly to the end of the file.
+	if _, err = fd.parentMerkleWriter.Write(ctx, usermem.BytesIOSequence(rootHash), vfs.WriteOptions{}); err != nil {
+		return 0, err
+	}
+
+	// Record the offset of the root hash of fd in parent directory's
+	// Merkle tree file.
+	if err := fd.merkleWriter.SetXattr(ctx, &vfs.SetXattrOptions{
+		Name:  merkleOffsetInParentXattr,
+		Value: strconv.Itoa(int(stat.Size)),
+	}); err != nil {
+		return 0, err
+	}
+
+	// Record the size of the data being hashed for fd.
+	if err := fd.merkleWriter.SetXattr(ctx, &vfs.SetXattrOptions{
+		Name:  merkleSizeXattr,
+		Value: strconv.Itoa(int(dataSize)),
+	}); err != nil {
+		return 0, err
+	}
+	fd.d.rootHash = append(fd.d.rootHash, rootHash...)
+	return 0, nil
+}
+
+// Ioctl implements vfs.FileDescriptionImpl.Ioctl.
+func (fd *fileDescription) Ioctl(ctx context.Context, uio usermem.IO, args arch.SyscallArguments) (uintptr, error) {
+	switch cmd := args[1].Uint(); cmd {
+	case linux.FS_IOC_ENABLE_VERITY:
+		return fd.enableVerity(ctx, uio, args)
+	default:
+		return fd.lowerFD.Ioctl(ctx, uio, args)
+	}
+}
+
 // LockPOSIX implements vfs.FileDescriptionImpl.LockPOSIX.
 func (fd *fileDescription) LockPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, start, length uint64, whence int16, block fslock.Blocker) error {
 	return fd.Locks().LockPOSIX(ctx, &fd.vfsfd, uid, t, start, length, whence, block)

From 6b2ba821c791ab1713cada80d9b722496224f663 Mon Sep 17 00:00:00 2001
From: gVisor bot <gvisor-bot@google.com>
Date: Tue, 8 Sep 2020 15:54:22 -0700
Subject: [PATCH 160/211] Add check for both child and childMerkle ENOENT

The check in verity walk returns error for non ENOENT cases, and all
ENOENT results should be checked. This case was missing.

PiperOrigin-RevId: 330604771
---
 pkg/sentry/fsimpl/verity/filesystem.go | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/pkg/sentry/fsimpl/verity/filesystem.go b/pkg/sentry/fsimpl/verity/filesystem.go
index e944fd5d2e..2cf0a38c99 100644
--- a/pkg/sentry/fsimpl/verity/filesystem.go
+++ b/pkg/sentry/fsimpl/verity/filesystem.go
@@ -414,6 +414,14 @@ func (fs *filesystem) lookupAndVerifyLocked(ctx context.Context, parent *dentry,
 			}
 			panic(fmt.Sprintf("Expected Merkle file for target %s but none found", parentPath+"/"+name))
 		}
+	} else if childErr == syserror.ENOENT && childMerkleErr == syserror.ENOENT {
+		// Both the child and the corresponding Merkle tree are missing.
+		// This could be an unexpected modification or due to incorrect
+		// parameter.
+		// TODO(b/167752508): Investigate possible ways to differentiate
+		// cases that both files are deleted from cases that they never
+		// exist in the file system.
+		panic(fmt.Sprintf("Failed to find file %s", parentPath+"/"+name))
 	}
 
 	mask := uint32(linux.STATX_TYPE | linux.STATX_MODE | linux.STATX_UID | linux.STATX_GID)

From 397dc3ae5d2d4f836a7820a618c432ba49e4f4a7 Mon Sep 17 00:00:00 2001
From: Ayush Ranjan <ayushranjan@google.com>
Date: Tue, 8 Sep 2020 17:54:13 -0700
Subject: [PATCH 161/211] [vfs] overlayfs: Fix socket tests.

- BindSocketThenOpen test was expecting the incorrect error when opening
  a socket. Fixed that.
- VirtualFilesystem.BindEndpointAt should not require pop.Path.Begin.Ok()
  because the filesystem implementations do not need to walk to the parent
  dentry. This check also exists for MknodAt, MkdirAt, RmdirAt, SymlinkAt and
  UnlinkAt but those filesystem implementations also need to walk to the parent
  denty. So that check is valid. Added some syscall tests to test this.

PiperOrigin-RevId: 330625220
---
 pkg/sentry/vfs/vfs.go          | 16 ++++++++++------
 test/syscalls/linux/mkdir.cc   |  7 +++++++
 test/syscalls/linux/mknod.cc   |  8 ++++++++
 test/syscalls/linux/symlink.cc | 10 ++++++++++
 test/syscalls/linux/unlink.cc  | 14 ++++++++++++++
 5 files changed, 49 insertions(+), 6 deletions(-)

diff --git a/pkg/sentry/vfs/vfs.go b/pkg/sentry/vfs/vfs.go
index 6825d81a56..ed1cf99ba2 100644
--- a/pkg/sentry/vfs/vfs.go
+++ b/pkg/sentry/vfs/vfs.go
@@ -297,6 +297,8 @@ func (vfs *VirtualFilesystem) LinkAt(ctx context.Context, creds *auth.Credential
 // MkdirAt creates a directory at the given path.
 func (vfs *VirtualFilesystem) MkdirAt(ctx context.Context, creds *auth.Credentials, pop *PathOperation, opts *MkdirOptions) error {
 	if !pop.Path.Begin.Ok() {
+		// pop.Path should not be empty in operations that create/delete files.
+		// This is consistent with mkdirat(dirfd, "", mode).
 		if pop.Path.Absolute {
 			return syserror.EEXIST
 		}
@@ -333,6 +335,8 @@ func (vfs *VirtualFilesystem) MkdirAt(ctx context.Context, creds *auth.Credentia
 // error from the syserror package.
 func (vfs *VirtualFilesystem) MknodAt(ctx context.Context, creds *auth.Credentials, pop *PathOperation, opts *MknodOptions) error {
 	if !pop.Path.Begin.Ok() {
+		// pop.Path should not be empty in operations that create/delete files.
+		// This is consistent with mknodat(dirfd, "", mode, dev).
 		if pop.Path.Absolute {
 			return syserror.EEXIST
 		}
@@ -518,6 +522,8 @@ func (vfs *VirtualFilesystem) RenameAt(ctx context.Context, creds *auth.Credenti
 // RmdirAt removes the directory at the given path.
 func (vfs *VirtualFilesystem) RmdirAt(ctx context.Context, creds *auth.Credentials, pop *PathOperation) error {
 	if !pop.Path.Begin.Ok() {
+		// pop.Path should not be empty in operations that create/delete files.
+		// This is consistent with unlinkat(dirfd, "", AT_REMOVEDIR).
 		if pop.Path.Absolute {
 			return syserror.EBUSY
 		}
@@ -599,6 +605,8 @@ func (vfs *VirtualFilesystem) StatFSAt(ctx context.Context, creds *auth.Credenti
 // SymlinkAt creates a symbolic link at the given path with the given target.
 func (vfs *VirtualFilesystem) SymlinkAt(ctx context.Context, creds *auth.Credentials, pop *PathOperation, target string) error {
 	if !pop.Path.Begin.Ok() {
+		// pop.Path should not be empty in operations that create/delete files.
+		// This is consistent with symlinkat(oldpath, newdirfd, "").
 		if pop.Path.Absolute {
 			return syserror.EEXIST
 		}
@@ -631,6 +639,8 @@ func (vfs *VirtualFilesystem) SymlinkAt(ctx context.Context, creds *auth.Credent
 // UnlinkAt deletes the non-directory file at the given path.
 func (vfs *VirtualFilesystem) UnlinkAt(ctx context.Context, creds *auth.Credentials, pop *PathOperation) error {
 	if !pop.Path.Begin.Ok() {
+		// pop.Path should not be empty in operations that create/delete files.
+		// This is consistent with unlinkat(dirfd, "", 0).
 		if pop.Path.Absolute {
 			return syserror.EBUSY
 		}
@@ -662,12 +672,6 @@ func (vfs *VirtualFilesystem) UnlinkAt(ctx context.Context, creds *auth.Credenti
 
 // BoundEndpointAt gets the bound endpoint at the given path, if one exists.
 func (vfs *VirtualFilesystem) BoundEndpointAt(ctx context.Context, creds *auth.Credentials, pop *PathOperation, opts *BoundEndpointOptions) (transport.BoundEndpoint, error) {
-	if !pop.Path.Begin.Ok() {
-		if pop.Path.Absolute {
-			return nil, syserror.ECONNREFUSED
-		}
-		return nil, syserror.ENOENT
-	}
 	rp := vfs.getResolvingPath(creds, pop)
 	for {
 		bep, err := rp.mount.fs.impl.BoundEndpointAt(ctx, rp, *opts)
diff --git a/test/syscalls/linux/mkdir.cc b/test/syscalls/linux/mkdir.cc
index 4036a92754..27758203d1 100644
--- a/test/syscalls/linux/mkdir.cc
+++ b/test/syscalls/linux/mkdir.cc
@@ -82,6 +82,13 @@ TEST_F(MkdirTest, FailsOnDirWithoutWritePerms) {
               SyscallFailsWithErrno(EACCES));
 }
 
+TEST_F(MkdirTest, MkdirAtEmptyPath) {
+  ASSERT_THAT(mkdir(dirname_.c_str(), 0777), SyscallSucceeds());
+  auto fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(dirname_, O_RDONLY | O_DIRECTORY, 0666));
+  EXPECT_THAT(mkdirat(fd.get(), "", 0777), SyscallFailsWithErrno(ENOENT));
+}
+
 }  // namespace
 
 }  // namespace testing
diff --git a/test/syscalls/linux/mknod.cc b/test/syscalls/linux/mknod.cc
index 2ba8c11b8c..89e4564e84 100644
--- a/test/syscalls/linux/mknod.cc
+++ b/test/syscalls/linux/mknod.cc
@@ -203,6 +203,14 @@ TEST(MknodTest, FifoTruncNoOp) {
   EXPECT_THAT(ftruncate(wfd.get(), 0), SyscallFailsWithErrno(EINVAL));
 }
 
+TEST(MknodTest, MknodAtEmptyPath) {
+  auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  auto fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(dir.path(), O_RDONLY | O_DIRECTORY, 0666));
+  EXPECT_THAT(mknodat(fd.get(), "", S_IFREG | 0777, 0),
+              SyscallFailsWithErrno(ENOENT));
+}
+
 }  // namespace
 
 }  // namespace testing
diff --git a/test/syscalls/linux/symlink.cc b/test/syscalls/linux/symlink.cc
index aa1f32c859..a5d7efe942 100644
--- a/test/syscalls/linux/symlink.cc
+++ b/test/syscalls/linux/symlink.cc
@@ -326,6 +326,16 @@ TEST(SymlinkTest, FollowUpdatesATime) {
   EXPECT_LT(st_before_follow.st_atime, st_after_follow.st_atime);
 }
 
+TEST(SymlinkTest, SymlinkAtEmptyPath) {
+  auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+
+  auto fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(dir.path(), O_RDONLY | O_DIRECTORY, 0666));
+  EXPECT_THAT(symlinkat(file.path().c_str(), fd.get(), ""),
+              SyscallFailsWithErrno(ENOENT));
+}
+
 class ParamSymlinkTest : public ::testing::TestWithParam<std::string> {};
 
 // Test that creating an existing symlink with creat will create the target.
diff --git a/test/syscalls/linux/unlink.cc b/test/syscalls/linux/unlink.cc
index 2040375c95..061e2e0f1d 100644
--- a/test/syscalls/linux/unlink.cc
+++ b/test/syscalls/linux/unlink.cc
@@ -208,6 +208,20 @@ TEST(RmdirTest, CanRemoveWithTrailingSlashes) {
   ASSERT_THAT(rmdir(slashslash.c_str()), SyscallSucceeds());
 }
 
+TEST(UnlinkTest, UnlinkAtEmptyPath) {
+  auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+
+  auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(dir.path()));
+  auto fd = ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR, 0666));
+  EXPECT_THAT(unlinkat(fd.get(), "", 0), SyscallFailsWithErrno(ENOENT));
+
+  auto dirInDir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(dir.path()));
+  auto dirFD = ASSERT_NO_ERRNO_AND_VALUE(
+      Open(dirInDir.path(), O_RDONLY | O_DIRECTORY, 0666));
+  EXPECT_THAT(unlinkat(dirFD.get(), "", AT_REMOVEDIR),
+              SyscallFailsWithErrno(ENOENT));
+}
+
 }  // namespace
 
 }  // namespace testing

From f9fa8b118f0f1e4a9fbe7fa5f5a367ba8105ddf0 Mon Sep 17 00:00:00 2001
From: Jamie Liu <jamieliu@google.com>
Date: Tue, 8 Sep 2020 18:31:17 -0700
Subject: [PATCH 162/211] Implement synthetic mountpoints for kernfs.

PiperOrigin-RevId: 330629897
---
 pkg/sentry/fsimpl/kernfs/BUILD                |   1 +
 pkg/sentry/fsimpl/kernfs/filesystem.go        |   5 +-
 .../fsimpl/kernfs/synthetic_directory.go      | 102 ++++++++++++++++++
 3 files changed, 107 insertions(+), 1 deletion(-)
 create mode 100644 pkg/sentry/fsimpl/kernfs/synthetic_directory.go

diff --git a/pkg/sentry/fsimpl/kernfs/BUILD b/pkg/sentry/fsimpl/kernfs/BUILD
index 637dca70cf..5e91e0536c 100644
--- a/pkg/sentry/fsimpl/kernfs/BUILD
+++ b/pkg/sentry/fsimpl/kernfs/BUILD
@@ -83,6 +83,7 @@ go_library(
         "slot_list.go",
         "static_directory_refs.go",
         "symlink.go",
+        "synthetic_directory.go",
     ],
     visibility = ["//pkg/sentry:internal"],
     deps = [
diff --git a/pkg/sentry/fsimpl/kernfs/filesystem.go b/pkg/sentry/fsimpl/kernfs/filesystem.go
index c428053e8b..d7d3e8f483 100644
--- a/pkg/sentry/fsimpl/kernfs/filesystem.go
+++ b/pkg/sentry/fsimpl/kernfs/filesystem.go
@@ -360,7 +360,10 @@ func (fs *Filesystem) MkdirAt(ctx context.Context, rp *vfs.ResolvingPath, opts v
 	defer rp.Mount().EndWrite()
 	childVFSD, err := parentInode.NewDir(ctx, pc, opts)
 	if err != nil {
-		return err
+		if !opts.ForSyntheticMountpoint || err == syserror.EEXIST {
+			return err
+		}
+		childVFSD = newSyntheticDirectory(rp.Credentials(), opts.Mode)
 	}
 	parentVFSD.Impl().(*Dentry).InsertChild(pc, childVFSD.Impl().(*Dentry))
 	return nil
diff --git a/pkg/sentry/fsimpl/kernfs/synthetic_directory.go b/pkg/sentry/fsimpl/kernfs/synthetic_directory.go
new file mode 100644
index 0000000000..01ba72fa8f
--- /dev/null
+++ b/pkg/sentry/fsimpl/kernfs/synthetic_directory.go
@@ -0,0 +1,102 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package kernfs
+
+import (
+	"fmt"
+
+	"gvisor.dev/gvisor/pkg/abi/linux"
+	"gvisor.dev/gvisor/pkg/context"
+	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+	"gvisor.dev/gvisor/pkg/sentry/vfs"
+	"gvisor.dev/gvisor/pkg/syserror"
+)
+
+// syntheticDirectory implements kernfs.Inode for a directory created by
+// MkdirAt(ForSyntheticMountpoint=true).
+//
+// +stateify savable
+type syntheticDirectory struct {
+	InodeAttrs
+	InodeNoStatFS
+	InodeNoopRefCount
+	InodeNoDynamicLookup
+	InodeNotSymlink
+	OrderedChildren
+
+	locks vfs.FileLocks
+}
+
+var _ Inode = (*syntheticDirectory)(nil)
+
+func newSyntheticDirectory(creds *auth.Credentials, perm linux.FileMode) *vfs.Dentry {
+	inode := &syntheticDirectory{}
+	inode.Init(creds, 0 /* devMajor */, 0 /* devMinor */, 0 /* ino */, perm)
+	d := &Dentry{}
+	d.Init(inode)
+	return &d.vfsd
+}
+
+func (dir *syntheticDirectory) Init(creds *auth.Credentials, devMajor, devMinor uint32, ino uint64, perm linux.FileMode) {
+	if perm&^linux.PermissionsMask != 0 {
+		panic(fmt.Sprintf("perm contains non-permission bits: %#o", perm))
+	}
+	dir.InodeAttrs.Init(creds, devMajor, devMinor, ino, linux.S_IFDIR|perm)
+	dir.OrderedChildren.Init(OrderedChildrenOptions{
+		Writable: true,
+	})
+}
+
+// Open implements Inode.Open.
+func (dir *syntheticDirectory) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
+	fd, err := NewGenericDirectoryFD(rp.Mount(), vfsd, &dir.OrderedChildren, &dir.locks, &opts, GenericDirectoryFDOptions{})
+	if err != nil {
+		return nil, err
+	}
+	return &fd.vfsfd, nil
+}
+
+// NewFile implements Inode.NewFile.
+func (dir *syntheticDirectory) NewFile(ctx context.Context, name string, opts vfs.OpenOptions) (*vfs.Dentry, error) {
+	return nil, syserror.EPERM
+}
+
+// NewDir implements Inode.NewDir.
+func (dir *syntheticDirectory) NewDir(ctx context.Context, name string, opts vfs.MkdirOptions) (*vfs.Dentry, error) {
+	if !opts.ForSyntheticMountpoint {
+		return nil, syserror.EPERM
+	}
+	subdird := newSyntheticDirectory(auth.CredentialsFromContext(ctx), opts.Mode&linux.PermissionsMask)
+	if err := dir.OrderedChildren.Insert(name, subdird); err != nil {
+		subdird.DecRef(ctx)
+		return nil, err
+	}
+	return subdird, nil
+}
+
+// NewLink implements Inode.NewLink.
+func (dir *syntheticDirectory) NewLink(ctx context.Context, name string, target Inode) (*vfs.Dentry, error) {
+	return nil, syserror.EPERM
+}
+
+// NewSymlink implements Inode.NewSymlink.
+func (dir *syntheticDirectory) NewSymlink(ctx context.Context, name, target string) (*vfs.Dentry, error) {
+	return nil, syserror.EPERM
+}
+
+// NewNode implements Inode.NewNode.
+func (dir *syntheticDirectory) NewNode(ctx context.Context, name string, opts vfs.MknodOptions) (*vfs.Dentry, error) {
+	return nil, syserror.EPERM
+}

From a29cc274a1478db552f6a98efe52ecdf96c71cd8 Mon Sep 17 00:00:00 2001
From: Ian Lewis <ianlewis@google.com>
Date: Tue, 8 Sep 2020 21:57:28 -0700
Subject: [PATCH 163/211] Add a Docker Compose tutorial

Adds a Docker Compose tutorial to the website that shows how to start a
Wordpress site and includes information about how to get DNS working.

Fixes #115

PiperOrigin-RevId: 330652842
---
 g3doc/user_guide/tutorials/BUILD             |  13 ++-
 g3doc/user_guide/tutorials/docker-compose.md | 100 +++++++++++++++++++
 g3doc/user_guide/tutorials/docker.md         |   8 +-
 website/BUILD                                |   1 +
 4 files changed, 117 insertions(+), 5 deletions(-)
 create mode 100644 g3doc/user_guide/tutorials/docker-compose.md

diff --git a/g3doc/user_guide/tutorials/BUILD b/g3doc/user_guide/tutorials/BUILD
index 405026a333..f405349b3f 100644
--- a/g3doc/user_guide/tutorials/BUILD
+++ b/g3doc/user_guide/tutorials/BUILD
@@ -14,6 +14,15 @@ doc(
     weight = "10",
 )
 
+doc(
+    name = "docker_compose",
+    src = "docker-compose.md",
+    category = "User Guide",
+    permalink = "/docs/tutorials/docker-compose/",
+    subcategory = "Tutorials",
+    weight = "20",
+)
+
 doc(
     name = "kubernetes",
     src = "kubernetes.md",
@@ -24,7 +33,7 @@ doc(
     ],
     permalink = "/docs/tutorials/kubernetes/",
     subcategory = "Tutorials",
-    weight = "20",
+    weight = "30",
 )
 
 doc(
@@ -33,5 +42,5 @@ doc(
     category = "User Guide",
     permalink = "/docs/tutorials/cni/",
     subcategory = "Tutorials",
-    weight = "30",
+    weight = "40",
 )
diff --git a/g3doc/user_guide/tutorials/docker-compose.md b/g3doc/user_guide/tutorials/docker-compose.md
new file mode 100644
index 0000000000..3284231f80
--- /dev/null
+++ b/g3doc/user_guide/tutorials/docker-compose.md
@@ -0,0 +1,100 @@
+# Wordpress with Docker Compose
+
+This page shows you how to deploy a sample [WordPress][wordpress] site using
+[Docker Compose][docker-compose].
+
+### Before you begin
+
+[Follow these instructions][docker-install] to install runsc with Docker. This
+document assumes that Docker and Docker Compose are installed and the runtime
+name chosen for gVisor is `runsc`.
+
+### Configuration
+
+We'll start by creating the `docker-compose.yaml` file to specify our services.
+We will specify two services, a `wordpress` service for the Wordpress Apache
+server, and a `db` service for MySQL. We will configure Wordpress to connect to
+MySQL via the `db` service host name.
+
+> **Note:** Docker Compose uses it's own network by default and allows services
+> to communicate using their service name. Docker Compose does this by setting
+> up a DNS server at IP address 127.0.0.11 and configuring containers to use it
+> via [resolv.conf][resolv.conf]. This IP is not addressable inside a gVisor
+> sandbox so it's important that we set the DNS IP address to the alternative
+> `8.8.8.8` and use a network that allows routing to it. See
+> [Networking in Compose][compose-networking] for more details.
+
+> **Note:** The `runtime` field was removed from services in the 3.x version of
+> the API in versions of docker-compose < 1.27.0. You will need to write your
+> `docker-compose.yaml` file using the 2.x format or use docker-compose >=
+> 1.27.0. See this [issue](https://github.com/docker/compose/issues/6239) for
+> more details.
+
+```yaml
+version: '2.3'
+
+services:
+   db:
+     image: mysql:5.7
+     volumes:
+       - db_data:/var/lib/mysql
+     restart: always
+     environment:
+       MYSQL_ROOT_PASSWORD: somewordpress
+       MYSQL_DATABASE: wordpress
+       MYSQL_USER: wordpress
+       MYSQL_PASSWORD: wordpress
+     # All services must be on the same network to communicate.
+     network_mode: "bridge"
+
+   wordpress:
+     depends_on:
+       - db
+     # When using the "bridge" network specify links.
+     links:
+       - db
+     image: wordpress:latest
+     ports:
+       - "8080:80"
+     restart: always
+     environment:
+       WORDPRESS_DB_HOST: db:3306
+       WORDPRESS_DB_USER: wordpress
+       WORDPRESS_DB_PASSWORD: wordpress
+       WORDPRESS_DB_NAME: wordpress
+     # Specify the dns address if needed.
+     dns:
+       - 8.8.8.8
+     # All services must be on the same network to communicate.
+     network_mode: "bridge"
+     # Specify the runtime used by Docker. Must be set up in
+     #  /etc/docker/daemon.json.
+     runtime: "runsc"
+
+volumes:
+    db_data: {}
+```
+
+Once you have a `docker-compose.yaml` in the current directory you can start the
+containers:
+
+```bash
+docker-compose up
+```
+
+Once the containers have started you can access wordpress at
+http://localhost:8080.
+
+Congrats! You now how a working wordpress site up and running using Docker
+Compose.
+
+### What's next
+
+Learn how to deploy [WordPress with Kubernetes][wordpress-k8s].
+
+[docker-compose]: https://docs.docker.com/compose/
+[docker-install]: ../quick_start/docker.md
+[wordpress]: https://wordpress.com/
+[resolv.conf]: https://man7.org/linux/man-pages/man5/resolv.conf.5.html
+[wordpress-k8s]: kubernetes.md
+[compose-networking]: https://docs.docker.com/compose/networking/
diff --git a/g3doc/user_guide/tutorials/docker.md b/g3doc/user_guide/tutorials/docker.md
index 7055600385..9ca01da2ac 100644
--- a/g3doc/user_guide/tutorials/docker.md
+++ b/g3doc/user_guide/tutorials/docker.md
@@ -60,9 +60,11 @@ Congratulations! You have just deployed a WordPress site using Docker.
 
 ### What's next
 
-[Learn how to deploy WordPress with Kubernetes][wordpress-k8s].
+Learn how to deploy WordPress with [Kubernetes][wordpress-k8s] or
+[Docker Compose][wordpress-compose].
 
 [docker]: https://www.docker.com/
-[docker-install]: /docs/user_guide/quick_start/docker/
+[docker-install]: ../quick_start/docker.md
 [wordpress]: https://wordpress.com/
-[wordpress-k8s]: /docs/tutorials/kubernetes/
+[wordpress-k8s]: kubernetes.md
+[wordpress-compose]: docker-compose.md
diff --git a/website/BUILD b/website/BUILD
index 7b61d13c89..6d92d91031 100644
--- a/website/BUILD
+++ b/website/BUILD
@@ -157,6 +157,7 @@ docs(
         "//g3doc/user_guide/quick_start:oci",
         "//g3doc/user_guide/tutorials:cni",
         "//g3doc/user_guide/tutorials:docker",
+        "//g3doc/user_guide/tutorials:docker_compose",
         "//g3doc/user_guide/tutorials:kubernetes",
     ],
 )

From a17a13c5e1b5f42a3ad2c1f269cfd0aeb918348f Mon Sep 17 00:00:00 2001
From: Ian Lewis <ianlewis@google.com>
Date: Wed, 9 Sep 2020 09:06:26 -0700
Subject: [PATCH 164/211] Add syntax highlighting to website

Adds a syntax highlighting theme css so that code snippets are highlighted
properly.

PiperOrigin-RevId: 330733737
---
 images/jekyll/Dockerfile |  7 ++++++-
 images/jekyll/build.sh   | 21 +++++++++++++++++++++
 website/css/main.scss    | 15 ++++++++++-----
 3 files changed, 37 insertions(+), 6 deletions(-)
 create mode 100755 images/jekyll/build.sh

diff --git a/images/jekyll/Dockerfile b/images/jekyll/Dockerfile
index ba039ba159..ae19f3bfc3 100644
--- a/images/jekyll/Dockerfile
+++ b/images/jekyll/Dockerfile
@@ -1,5 +1,6 @@
 FROM jekyll/jekyll:4.0.0
 USER root
+
 RUN gem install \
         html-proofer:3.10.2 \
         nokogiri:1.10.1 \
@@ -10,5 +11,9 @@ RUN gem install \
         jekyll-relative-links:0.6.1 \
         jekyll-feed:0.13.0 \
         jekyll-sitemap:1.4.0
+
+# checks.rb is used with html-proofer for presubmit checks.
 COPY checks.rb /checks.rb
-CMD ["/usr/gem/gems/jekyll-4.0.0/exe/jekyll", "build", "-t", "-s", "/input", "-d", "/output"]
+
+COPY build.sh /build.sh
+CMD ["/build.sh"]
diff --git a/images/jekyll/build.sh b/images/jekyll/build.sh
new file mode 100755
index 0000000000..bfceb27816
--- /dev/null
+++ b/images/jekyll/build.sh
@@ -0,0 +1,21 @@
+#!/bin/bash
+
+# Copyright 2020 The gVisor Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -euxo pipefail
+
+# Generate the syntax highlighting css file.
+/usr/gem/bin/rougify style github >/input/_sass/syntax.css
+/usr/gem/bin/jekyll build -t -s /input -d /output
diff --git a/website/css/main.scss b/website/css/main.scss
index 06106833f0..4b3b7b500c 100644
--- a/website/css/main.scss
+++ b/website/css/main.scss
@@ -1,5 +1,10 @@
-@import 'style.scss';
-@import 'front.scss';
-@import 'navbar.scss';
-@import 'sidebar.scss';
-@import 'footer.scss';
+// The main style sheet for gvisor.dev
+
+// NOTE: Do not include file extensions to import .sass and .css files seamlessly.
+@import "style";
+@import "front";
+@import "navbar";
+@import "sidebar";
+@import "footer";
+// syntax is generated by rougify.
+@import "syntax";

From 29739be946d3fb0a257625a514b3a068afc686d2 Mon Sep 17 00:00:00 2001
From: Ian Lewis <ianlewis@google.com>
Date: Wed, 9 Sep 2020 10:08:06 -0700
Subject: [PATCH 165/211] Fix formatting for Kubernetes tutorial

PiperOrigin-RevId: 330745430
---
 g3doc/user_guide/tutorials/kubernetes.md | 186 ++++++++++++++++++-----
 1 file changed, 144 insertions(+), 42 deletions(-)

diff --git a/g3doc/user_guide/tutorials/kubernetes.md b/g3doc/user_guide/tutorials/kubernetes.md
index d2a94b1b7b..1ec6e71e9b 100644
--- a/g3doc/user_guide/tutorials/kubernetes.md
+++ b/g3doc/user_guide/tutorials/kubernetes.md
@@ -23,12 +23,12 @@ gcloud beta container node-pools create sandbox-pool --cluster=${CLUSTER_NAME} -
 If you prefer to use the console, select your cluster and select the **ADD NODE
 POOL** button:
 
-![+ ADD NODE POOL](./node-pool-button.png)
+![+ ADD NODE POOL](node-pool-button.png)
 
 Then select the **Image type** with **Containerd** and select **Enable sandbox
 with gVisor** option. Select other options as you like:
 
-![+ NODE POOL](./add-node-pool.png)
+![+ NODE POOL](add-node-pool.png)
 
 ### Check that gVisor is enabled
 
@@ -57,47 +57,149 @@ curl -LO https://k8s.io/examples/application/wordpress/mysql-deployment.yaml
 Add a **spec.template.spec.runtimeClassName** set to **gvisor** to both files,
 as shown below:
 
-**wordpress-deployment.yaml:** ```yaml apiVersion: v1 kind: Service metadata:
-name: wordpress labels: app: wordpress spec: ports: - port: 80 selector: app:
-wordpress tier: frontend
-
-## type: LoadBalancer
-
-apiVersion: v1 kind: PersistentVolumeClaim metadata: name: wp-pv-claim labels:
-app: wordpress spec: accessModes: - ReadWriteOnce resources: requests:
-
-## storage: 20Gi
-
-apiVersion: apps/v1 kind: Deployment metadata: name: wordpress labels: app:
-wordpress spec: selector: matchLabels: app: wordpress tier: frontend strategy:
-type: Recreate template: metadata: labels: app: wordpress tier: frontend spec:
-runtimeClassName: gvisor # ADD THIS LINE containers: - image:
-wordpress:4.8-apache name: wordpress env: - name: WORDPRESS_DB_HOST value:
-wordpress-mysql - name: WORDPRESS_DB_PASSWORD valueFrom: secretKeyRef: name:
-mysql-pass key: password ports: - containerPort: 80 name: wordpress
-volumeMounts: - name: wordpress-persistent-storage mountPath: /var/www/html
-volumes: - name: wordpress-persistent-storage persistentVolumeClaim: claimName:
-wp-pv-claim ```
-
-**mysql-deployment.yaml:** ```yaml apiVersion: v1 kind: Service metadata: name:
-wordpress-mysql labels: app: wordpress spec: ports: - port: 3306 selector: app:
-wordpress tier: mysql
-
-## clusterIP: None
-
-apiVersion: v1 kind: PersistentVolumeClaim metadata: name: mysql-pv-claim
-labels: app: wordpress spec: accessModes: - ReadWriteOnce resources: requests:
-
-## storage: 20Gi
+**wordpress-deployment.yaml:**
+
+```yaml
+apiVersion: v1
+kind: Service
+metadata:
+  name: wordpress
+  labels:
+    app: wordpress
+spec:
+  ports:
+    - port: 80
+  selector:
+    app: wordpress
+    tier: frontend
+  type: LoadBalancer
+---
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: wp-pv-claim
+  labels:
+    app: wordpress
+spec:
+  accessModes:
+    - ReadWriteOnce
+  resources:
+    requests:
+      storage: 20Gi
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: wordpress
+  labels:
+    app: wordpress
+spec:
+  selector:
+    matchLabels:
+      app: wordpress
+      tier: frontend
+  strategy:
+    type: Recreate
+  template:
+    metadata:
+      labels:
+        app: wordpress
+        tier: frontend
+    spec:
+      runtimeClassName: gvisor   # ADD THIS LINE
+      containers:
+      - image: wordpress:4.8-apache
+        name: wordpress
+        env:
+        - name: WORDPRESS_DB_HOST
+          value: wordpress-mysql
+        - name: WORDPRESS_DB_PASSWORD
+          valueFrom:
+            secretKeyRef:
+              name: mysql-pass
+              key: password
+        ports:
+        - containerPort: 80
+          name: wordpress
+        volumeMounts:
+        - name: wordpress-persistent-storage
+          mountPath: /var/www/html
+      volumes:
+      - name: wordpress-persistent-storage
+        persistentVolumeClaim:
+          claimName: wp-pv-claim
+```
 
-apiVersion: apps/v1 kind: Deployment metadata: name: wordpress-mysql labels:
-app: wordpress spec: selector: matchLabels: app: wordpress tier: mysql strategy:
-type: Recreate template: metadata: labels: app: wordpress tier: mysql spec:
-runtimeClassName: gvisor # ADD THIS LINE containers: - image: mysql:5.6 name:
-mysql env: - name: MYSQL_ROOT_PASSWORD valueFrom: secretKeyRef: name: mysql-pass
-key: password ports: - containerPort: 3306 name: mysql volumeMounts: - name:
-mysql-persistent-storage mountPath: /var/lib/mysql volumes: - name:
-mysql-persistent-storage persistentVolumeClaim: claimName: mysql-pv-claim ```
+**mysql-deployment.yaml:**
+
+```yaml
+apiVersion: v1
+kind: Service
+metadata:
+  name: wordpress-mysql
+  labels:
+    app: wordpress
+spec:
+  ports:
+    - port: 3306
+  selector:
+    app: wordpress
+    tier: mysql
+  clusterIP: None
+---
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: mysql-pv-claim
+  labels:
+    app: wordpress
+spec:
+  accessModes:
+    - ReadWriteOnce
+  resources:
+    requests:
+      storage: 20Gi
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: wordpress-mysql
+  labels:
+    app: wordpress
+spec:
+  selector:
+    matchLabels:
+      app: wordpress
+      tier: mysql
+  strategy:
+    type: Recreate
+  template:
+    metadata:
+      labels:
+        app: wordpress
+        tier: mysql
+    spec:
+      runtimeClassName: gvisor   # ADD THIS LINE
+      containers:
+      - image: mysql:5.6
+        name: mysql
+        env:
+        - name: MYSQL_ROOT_PASSWORD
+          valueFrom:
+            secretKeyRef:
+              name: mysql-pass
+              key: password
+        ports:
+        - containerPort: 3306
+          name: mysql
+        volumeMounts:
+        - name: mysql-persistent-storage
+          mountPath: /var/lib/mysql
+      volumes:
+      - name: mysql-persistent-storage
+        persistentVolumeClaim:
+          claimName: mysql-pv-claim
+```
 
 Note that apart from `runtimeClassName: gvisor`, nothing else about the
 Deployment has is changed.

From 031dd3fc2127e65c4187666999c348d3965a1d38 Mon Sep 17 00:00:00 2001
From: Jamie Liu <jamieliu@google.com>
Date: Wed, 9 Sep 2020 12:47:24 -0700
Subject: [PATCH 166/211] Don't sched_setaffinity in ptrace platform.

PiperOrigin-RevId: 330777900
---
 pkg/sentry/platform/ptrace/BUILD              |  1 -
 pkg/sentry/platform/ptrace/filters.go         |  9 ++-
 pkg/sentry/platform/ptrace/subprocess.go      |  5 --
 .../ptrace/subprocess_linux_unsafe.go         | 61 -------------------
 4 files changed, 4 insertions(+), 72 deletions(-)

diff --git a/pkg/sentry/platform/ptrace/BUILD b/pkg/sentry/platform/ptrace/BUILD
index e04165fbf9..fc43cc3c0a 100644
--- a/pkg/sentry/platform/ptrace/BUILD
+++ b/pkg/sentry/platform/ptrace/BUILD
@@ -30,7 +30,6 @@ go_library(
         "//pkg/safecopy",
         "//pkg/seccomp",
         "//pkg/sentry/arch",
-        "//pkg/sentry/hostcpu",
         "//pkg/sentry/memmap",
         "//pkg/sentry/platform",
         "//pkg/sentry/platform/interrupt",
diff --git a/pkg/sentry/platform/ptrace/filters.go b/pkg/sentry/platform/ptrace/filters.go
index 1e07cfd0d0..b0970e3569 100644
--- a/pkg/sentry/platform/ptrace/filters.go
+++ b/pkg/sentry/platform/ptrace/filters.go
@@ -24,10 +24,9 @@ import (
 // SyscallFilters returns syscalls made exclusively by the ptrace platform.
 func (*PTrace) SyscallFilters() seccomp.SyscallRules {
 	return seccomp.SyscallRules{
-		unix.SYS_GETCPU:            {},
-		unix.SYS_SCHED_SETAFFINITY: {},
-		syscall.SYS_PTRACE:         {},
-		syscall.SYS_TGKILL:         {},
-		syscall.SYS_WAIT4:          {},
+		unix.SYS_GETCPU:    {},
+		syscall.SYS_PTRACE: {},
+		syscall.SYS_TGKILL: {},
+		syscall.SYS_WAIT4:  {},
 	}
 }
diff --git a/pkg/sentry/platform/ptrace/subprocess.go b/pkg/sentry/platform/ptrace/subprocess.go
index e1d54d8a25..812ab80efb 100644
--- a/pkg/sentry/platform/ptrace/subprocess.go
+++ b/pkg/sentry/platform/ptrace/subprocess.go
@@ -518,11 +518,6 @@ func (s *subprocess) switchToApp(c *context, ac arch.Context) bool {
 	}
 	defer c.interrupt.Disable()
 
-	// Ensure that the CPU set is bound appropriately; this makes the
-	// emulation below several times faster, presumably by avoiding
-	// interprocessor wakeups and by simplifying the schedule.
-	t.bind()
-
 	// Set registers.
 	if err := t.setRegs(regs); err != nil {
 		panic(fmt.Sprintf("ptrace set regs (%+v) failed: %v", regs, err))
diff --git a/pkg/sentry/platform/ptrace/subprocess_linux_unsafe.go b/pkg/sentry/platform/ptrace/subprocess_linux_unsafe.go
index 245b207220..533e454971 100644
--- a/pkg/sentry/platform/ptrace/subprocess_linux_unsafe.go
+++ b/pkg/sentry/platform/ptrace/subprocess_linux_unsafe.go
@@ -18,29 +18,12 @@
 package ptrace
 
 import (
-	"sync/atomic"
 	"syscall"
 	"unsafe"
 
-	"golang.org/x/sys/unix"
 	"gvisor.dev/gvisor/pkg/abi/linux"
-	"gvisor.dev/gvisor/pkg/sentry/hostcpu"
-	"gvisor.dev/gvisor/pkg/sync"
 )
 
-// maskPool contains reusable CPU masks for setting affinity. Unfortunately,
-// runtime.NumCPU doesn't actually record the number of CPUs on the system, it
-// just records the number of CPUs available in the scheduler affinity set at
-// startup. This may a) change over time and b) gives a number far lower than
-// the maximum indexable CPU. To prevent lots of allocation in the hot path, we
-// use a pool to store large masks that we can reuse during bind.
-var maskPool = sync.Pool{
-	New: func() interface{} {
-		const maxCPUs = 1024 // Not a hard limit; see below.
-		return make([]uintptr, maxCPUs/64)
-	},
-}
-
 // unmaskAllSignals unmasks all signals on the current thread.
 //
 //go:nosplit
@@ -49,47 +32,3 @@ func unmaskAllSignals() syscall.Errno {
 	_, _, errno := syscall.RawSyscall6(syscall.SYS_RT_SIGPROCMASK, linux.SIG_SETMASK, uintptr(unsafe.Pointer(&set)), 0, linux.SignalSetSize, 0, 0)
 	return errno
 }
-
-// setCPU sets the CPU affinity.
-func (t *thread) setCPU(cpu uint32) error {
-	mask := maskPool.Get().([]uintptr)
-	n := int(cpu / 64)
-	v := uintptr(1 << uintptr(cpu%64))
-	if n >= len(mask) {
-		// See maskPool note above. We've actually exceeded the number
-		// of available cores. Grow the mask and return it.
-		mask = make([]uintptr, n+1)
-	}
-	mask[n] |= v
-	if _, _, errno := syscall.RawSyscall(
-		unix.SYS_SCHED_SETAFFINITY,
-		uintptr(t.tid),
-		uintptr(len(mask)*8),
-		uintptr(unsafe.Pointer(&mask[0]))); errno != 0 {
-		return errno
-	}
-	mask[n] &^= v
-	maskPool.Put(mask)
-	return nil
-}
-
-// bind attempts to ensure that the thread is on the same CPU as the current
-// thread. This provides no guarantees as it is fundamentally a racy operation:
-// CPU sets may change and we may be rescheduled in the middle of this
-// operation. As a result, no failures are reported.
-//
-// Precondition: the current runtime thread should be locked.
-func (t *thread) bind() {
-	currentCPU := hostcpu.GetCPU()
-
-	if oldCPU := atomic.SwapUint32(&t.cpu, currentCPU); oldCPU != currentCPU {
-		// Set the affinity on the thread and save the CPU for next
-		// round; we don't expect CPUs to bounce around too frequently.
-		//
-		// (It's worth noting that we could move CPUs between this point
-		// and when the tracee finishes executing. But that would be
-		// roughly the status quo anyways -- we're just maximizing our
-		// chances of colocation, not guaranteeing it.)
-		t.setCPU(currentCPU)
-	}
-}

From c7d09207e10ddc3d796422edcc6d26531e6106da Mon Sep 17 00:00:00 2001
From: Andrei Vagin <avagin@google.com>
Date: Wed, 9 Sep 2020 10:38:48 -0700
Subject: [PATCH 167/211] github: run actions for feature branches

Signed-off-by: Andrei Vagin <avagin@google.com>
---
 .github/workflows/build.yml | 2 ++
 .github/workflows/go.yml    | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index cf782a580f..1e1677ab53 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -3,9 +3,11 @@ on:
   push:
     branches:
       - master
+      - feature/**
   pull_request:
     branches:
       - master
+      - feature/**
 
 jobs:
   default:
diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml
index b51c221581..99f474a9cd 100644
--- a/.github/workflows/go.yml
+++ b/.github/workflows/go.yml
@@ -3,9 +3,11 @@ on:
   push:
     branches:
       - master
+      - feature/**
   pull_request:
     branches:
       - master
+      - feature/**
 
 jobs:
   generate:

From 1138c0ec66aa5366b5891dface1c70c294de9001 Mon Sep 17 00:00:00 2001
From: Craig Chi <craigchi@google.com>
Date: Mon, 17 Aug 2020 15:33:19 -0700
Subject: [PATCH 168/211] Extend integration test to test sequence of FUSE
 operation

Original FUSE integration test has limited capabilities. To test more
situations, the new integration test framework introduces a protocol
to communicate between testing thread and the FUSE server. In summary,
this change includes:

1. Remove CompareResult() and break SetExpected() into
   SetServerResponse() and GetServerActualRequest(). We no longer set
   up an expected request because we want to retrieve the actual FUSE
   request made to the FUSE server and check in the testing thread.

2. Declare a serial buffer data structure to save the received requests
   and expected responses sequentially. The data structure contains a
   cursor to indicate the progress of accessing. This change makes
   sequential SetServerResponse() and GetServerActualRequest() possible.

3. Replace 2 single directional pipes with 1 bi-directional socketpair.
   A protocol which starts with FuseTestCmd is used between the testing
   thread and the FUSE server to provide various functionality.

Fixes #3405
---
 pkg/sentry/fsimpl/fuse/dev.go |   8 +
 test/fuse/BUILD               |   1 -
 test/fuse/README.md           | 165 ++++++++++++++------
 test/fuse/linux/fuse_base.cc  | 277 +++++++++++++++++++++++-----------
 test/fuse/linux/fuse_base.h   | 172 ++++++++++++++++-----
 test/fuse/linux/stat_test.cc  |  85 +++++------
 6 files changed, 477 insertions(+), 231 deletions(-)

diff --git a/pkg/sentry/fsimpl/fuse/dev.go b/pkg/sentry/fsimpl/fuse/dev.go
index e522ff9a07..0efd2d90d9 100644
--- a/pkg/sentry/fsimpl/fuse/dev.go
+++ b/pkg/sentry/fsimpl/fuse/dev.go
@@ -307,6 +307,14 @@ func (fd *DeviceFD) writeLocked(ctx context.Context, src usermem.IOSequence, opt
 
 // Readiness implements vfs.FileDescriptionImpl.Readiness.
 func (fd *DeviceFD) Readiness(mask waiter.EventMask) waiter.EventMask {
+	fd.mu.Lock()
+	defer fd.mu.Unlock()
+	return fd.readinessLocked(mask)
+}
+
+// readinessLocked implements checking the readiness of the fuse device while
+// locked with DeviceFD.mu.
+func (fd *DeviceFD) readinessLocked(mask waiter.EventMask) waiter.EventMask {
 	var ready waiter.EventMask
 	ready |= waiter.EventOut // FD is always writable
 	if !fd.queue.Empty() {
diff --git a/test/fuse/BUILD b/test/fuse/BUILD
index 56157c96bc..385920e171 100644
--- a/test/fuse/BUILD
+++ b/test/fuse/BUILD
@@ -5,5 +5,4 @@ package(licenses = ["notice"])
 syscall_test(
     fuse = "True",
     test = "//test/fuse/linux:stat_test",
-    vfs2 = "True",
 )
diff --git a/test/fuse/README.md b/test/fuse/README.md
index 734c3a4e39..c5909a166c 100644
--- a/test/fuse/README.md
+++ b/test/fuse/README.md
@@ -1,55 +1,90 @@
 # gVisor FUSE Test Suite
 
-This is an integration test suite for fuse(4) filesystem. It runs under both
-gVisor and Linux, and ensures compatibility between the two. This test suite is
-based on system calls test.
+This is an integration test suite for fuse(4) filesystem. It runs under gVisor
+sandbox container with VFS2 and FUSE function enabled.
 
-This document describes the framework of fuse integration test and the
-guidelines that should be followed when adding new fuse tests.
+This document describes the framework of FUSE integration test, how to use it,
+and the guidelines that should be followed when adding new testing features.
 
 ## Integration Test Framework
 
-Please refer to the figure below. `>` is entering the function, `<` is leaving
-the function, and `=` indicates sequentially entering and leaving.
+By inheriting the `FuseTest` class defined in `linux/fuse_base.h`, every test
+fixture can run in an environment with `mount_point_` mounted by a fake FUSE
+server. It creates a `socketpair(2)` to send and receive control commands and
+data between the client and the server. Because the FUSE server runs in the
+background thread, gTest cannot catch its assertion failure immediately. Thus,
+`TearDown()` function sends command to the FUSE server to check if all gTest
+assertion in the server are successful and all requests and preset responses
+are consumed.
+
+## Communication Diagram
+
+Diagram below describes how a testing thread communicates with the FUSE server
+to achieve integration test.
+
+For the following diagram, `>` means entering the function, `<` is leaving the
+function, and `=` indicates sequentially entering and leaving. Not necessarily
+follow exactly the below diagram due to the nature of a multi-threaded system,
+however, it is still helpful to know when the client waits for the server to
+complete a command and when the server awaits the next instruction.
 
 ```
- |  Client (Test Main Process)         |  Server (FUSE Daemon)
+ |  Client (Testing Thread)            |  Server (FUSE Server Thread)
  |                                     |
  |  >TEST_F()                          |
  |    >SetUp()                         |
  |      =MountFuse()                   |
  |      >SetUpFuseServer()             |
- |        [create communication pipes] |
- |        =fork()                      |        =fork()
- |        >WaitCompleted()             |
- |          [wait for MarkDone()]      |
- |                                     |        =ConsumeFuseInit()
- |                                     |        =MarkDone()
- |        <WaitCompleted()             |
+ |        [create communication socket]|
+ |        =fork()                      |      =fork()
+ |        [wait server complete]       |
+ |                                     |      =ServerConsumeFuseInit()
+ |                                     |      =ServerCompleteWith()
  |      <SetUpFuseServer()             |
  |    <SetUp()                         |
- |    >SetExpected()                   |
- |      [construct expected reaction]  |
- |                                     |        >FuseLoop()
- |                                     |          >ReceiveExpected()
- |                                     |            [wait data from pipe]
- |      [write data to pipe]           |
- |      [wait for MarkDone()]          |
+ |    [testing main]                   |
+ |                                     |      >ServerFuseLoop()
+ |                                     |        [poll on socket and fd]
+ |    >SetServerResponse()             |
+ |      [write data to socket]         |
+ |      [wait server complete]         |
+ |                                     |        [socket event occurs]
+ |                                     |        >ServerHandleCommand()
+ |                                     |          >ServerReceiveResponse()
+ |                                     |            [read data from socket]
  |                                     |            [save data to memory]
- |                                     |            =MarkDone()
- |    <SetExpected()                   |
- |                                     |          <ReceiveExpected()
- |                                     |          >read()
- |                                     |            [wait for fs operation]
+ |                                     |          <ServerReceiveResponse()
+ |                                     |          =ServerCompleteWith()
+ |    <SetServerResponse()             |
+ |                                     |        <ServerHandleCommand()
  |    >[Do fs operation]               |
  |      [wait for fs response]         |
- |                                     |          <read()
- |                                     |          =CompareRequest()
- |                                     |          =write() [write fs response]
+ |                                     |        [fd event occurs]
+ |                                     |        >ServerProcessFuseRequest()
+ |                                     |          =[read fs request]
+ |                                     |          =[save fs request to memory]
+ |                                     |          =[write fs response]
  |    <[Do fs operation]               |
+ |                                     |        <ServerProcessFuseRequest()
+ |                                     |
  |    =[Test fs operation result]      |
- |    =[wait for MarkDone()]           |
- |                                     |          =MarkDone()
+ |                                     |
+ |    >GetServerActualRequest()        |
+ |      [write data to socket]         |
+ |      [wait data from server]        |
+ |                                     |        [socket event occurs]
+ |                                     |        >ServerHandleCommand()
+ |                                     |          >ServerSendReceivedRequest()
+ |                                     |            [write data to socket]
+ |      [read data from socket]        |
+ |      [wait server complete]         |
+ |                                     |          <ServerSendReceivedRequest()
+ |                                     |          =ServerCompleteWith()
+ |    <GetServerActualRequest()        |
+ |                                     |        <ServerHandleCommand()
+ |                                     |
+ |    =[Test actual request]           |
+ |                                     |
  |    >TearDown()                      |
  |      =UnmountFuse()                 |
  |    <TearDown()                      |
@@ -58,8 +93,8 @@ the function, and `=` indicates sequentially entering and leaving.
 
 ## Running the tests
 
-Based on syscall tests, fuse tests can run in different environments. To enable
-fuse testing environment, the test targets should be appended with `_fuse`.
+Based on syscall tests, FUSE tests generate targets only with vfs2 and fuse
+enabled. The corresponding targets end in `_fuse`.
 
 For example, to run fuse test in `stat_test.cc`:
 
@@ -75,19 +110,17 @@ $ bazel test --test_tag_filters=fuse //test/fuse/...
 
 ## Writing a new FUSE test
 
-1.  Add test targets in `BUILD` and `linux/BUILD`.
-2.  Inherit your test from `FuseTest` base class. It allows you to:
-    -   Run a fake FUSE server in background during each test setup.
-    -   Create pipes for communication and provide utility functions.
-    -   Stop FUSE server after test completes.
-3.  Customize your comparison function for request assessment in FUSE server.
-4.  Add the mapping of the size of structs if you are working on new FUSE
-    opcode.
-    -   Please update `FuseTest::GetPayloadSize()` for each new FUSE opcode.
-5.  Build the expected request-response pair of your FUSE operation.
-6.  Call `SetExpected()` function to inject the expected reaction.
-7.  Check the response and/or errors.
-8.  Finally call `WaitCompleted()` to ensure the FUSE server acts correctly.
+1. Add test targets in `BUILD` and `linux/BUILD`.
+2. Inherit your test from `FuseTest` base class. It allows you to:
+  - Fork a fake FUSE server in background during each test setup.
+  - Create a pair of sockets for communication and provide utility functions.
+  - Stop FUSE server and check if error occurs in it after test completes.
+3. Build the expected opcode-response pairs of your FUSE operation.
+4. Call `SetServerResponse()` to preset the next expected opcode and response.
+5. Do real filesystem operations (FUSE is mounted at `mount_point_`).
+6. Check FUSE response and/or errors.
+7. Retrieve FUSE request by `GetServerActualRequest()`.
+8. Check if the request is as expected.
 
 A few customized matchers used in syscalls test are encouraged to test the
 outcome of filesystem operations. Such as:
@@ -101,3 +134,41 @@ SyscallFailsWithErrno(...)
 
 Please refer to [test/syscalls/README.md](../syscalls/README.md) for further
 details.
+
+## Writing a new FuseTestCmd
+
+A `FuseTestCmd` is a control protocol used in the communication between the
+testing thread and the FUSE server. Such commands are sent from the testing
+thread to the FUSE server to set up, control, or inspect the behavior of the
+FUSE server in response to a sequence of FUSE requests.
+
+The lifecycle of a command contains following steps:
+
+1. The testing thread sends a `FuseTestCmd` via socket and waits for completion.
+2. The FUSE server receives the command and does corresponding action.
+3. (Optional) The testing thread reads data from socket.
+4. The FUSE server sends a success indicator via socket after processing.
+5. The testing thread gets the success signal and continues testing.
+
+The success indicator, i.e. `WaitServerComplete()`, is crucial at the end of
+each `FuseTestCmd` sent from the testing thread. Because we don't want to begin
+filesystem operation if the requests have not been completely set up. Also, to
+test FUSE interactions in a sequential manner, concurrent requests are not
+supported now.
+
+To add a new `FuseTestCmd`, one must comply with following format:
+
+1. Add a new `FuseTestCmd` enum class item defined in `linux/fuse_base.h`
+2. Add a `SetServerXXX()` or `GetServerXXX()` public function in `FuseTest`.
+   This is how the testing thread will call to send control message. Define how
+   many bytes you want to send along with the command and what you will expect
+   to receive. Finally it should block and wait for a success indicator from
+   the FUSE server.
+3. Add a `ServerReceiveXXX()` or `ServerSendXXX()` private function in
+   `FuseTest`. It is mandatory to set it private since only the FUSE server
+   (forked from `FuseTest` base class) can call it. This is the handler of a
+   specific `FuseTestCmd` and the format of the data should be consistent with
+   what client expects in the previous step.
+4. Add a case in the switch condition of `ServerHandleCommand()` to route the
+   command to the server handler described in the previous step.
+
diff --git a/test/fuse/linux/fuse_base.cc b/test/fuse/linux/fuse_base.cc
index 9c31244726..b7d8b2a1fa 100644
--- a/test/fuse/linux/fuse_base.cc
+++ b/test/fuse/linux/fuse_base.cc
@@ -16,17 +16,16 @@
 
 #include <fcntl.h>
 #include <linux/fuse.h>
-#include <string.h>
+#include <poll.h>
 #include <sys/mount.h>
+#include <sys/socket.h>
 #include <sys/stat.h>
 #include <sys/types.h>
 #include <sys/uio.h>
 #include <unistd.h>
 
-#include <iostream>
-
-#include "gtest/gtest.h"
 #include "absl/strings/str_format.h"
+#include "gtest/gtest.h"
 #include "test/util/posix_error.h"
 #include "test/util/temp_path.h"
 #include "test/util/test_util.h"
@@ -41,37 +40,47 @@ void FuseTest::SetUp() {
 
 void FuseTest::TearDown() { UnmountFuse(); }
 
-// Since CompareRequest is running in background thread, gTest assertions and
-// expectations won't directly reflect the test result. However, the FUSE
-// background server still connects to the same standard I/O as testing main
-// thread. So EXPECT_XX can still be used to show different results. To
-// ensure failed testing result is observable, return false and the result
-// will be sent to test main thread via pipe.
-bool FuseTest::CompareRequest(void* expected_mem, size_t expected_len,
-                              void* real_mem, size_t real_len) {
-  if (expected_len != real_len) return false;
-  return memcmp(expected_mem, real_mem, expected_len) == 0;
-}
+// Sends 3 parts of data to the FUSE server:
+//   1. The `kSetResponse` command
+//   2. The expected opcode
+//   3. The fake FUSE response
+// Then waits for the FUSE server to notify its completion.
+void FuseTest::SetServerResponse(uint32_t opcode,
+                                 std::vector<struct iovec>& iovecs) {
+  uint32_t cmd = static_cast<uint32_t>(FuseTestCmd::kSetResponse);
+  EXPECT_THAT(RetryEINTR(write)(sock_[0], &cmd, sizeof(cmd)),
+              SyscallSucceedsWithValue(sizeof(cmd)));
+
+  EXPECT_THAT(RetryEINTR(write)(sock_[0], &opcode, sizeof(opcode)),
+              SyscallSucceedsWithValue(sizeof(opcode)));
 
-// SetExpected is called by the testing main thread to set expected request-
-// response pair of a single FUSE operation.
-void FuseTest::SetExpected(struct iovec* iov_in, int iov_in_cnt,
-                           struct iovec* iov_out, int iov_out_cnt) {
-  EXPECT_THAT(RetryEINTR(writev)(set_expected_[1], iov_in, iov_in_cnt),
-              SyscallSucceedsWithValue(::testing::Gt(0)));
-  WaitCompleted();
+  EXPECT_THAT(RetryEINTR(writev)(sock_[0], iovecs.data(), iovecs.size()),
+              SyscallSucceeds());
 
-  EXPECT_THAT(RetryEINTR(writev)(set_expected_[1], iov_out, iov_out_cnt),
-              SyscallSucceedsWithValue(::testing::Gt(0)));
-  WaitCompleted();
+  WaitServerComplete();
 }
 
-// WaitCompleted waits for the FUSE server to finish its job and check if it
+// Waits for the FUSE server to finish its blocking job and check if it
 // completes without errors.
-void FuseTest::WaitCompleted() {
+void FuseTest::WaitServerComplete() {
   char success;
-  EXPECT_THAT(RetryEINTR(read)(done_[0], &success, sizeof(success)),
-              SyscallSucceedsWithValue(1));
+  EXPECT_THAT(RetryEINTR(read)(sock_[0], &success, sizeof(success)),
+              SyscallSucceedsWithValue(sizeof(success)));
+  EXPECT_EQ(success, static_cast<char>(1));
+}
+
+// Sends the `kGetRequest` command to the FUSE server, then reads the next
+// request into iovec struct. The order of calling this function should be
+// the same as the one of SetServerResponse().
+void FuseTest::GetServerActualRequest(std::vector<struct iovec>& iovecs) {
+  uint32_t cmd = static_cast<uint32_t>(FuseTestCmd::kGetRequest);
+  EXPECT_THAT(RetryEINTR(write)(sock_[0], &cmd, sizeof(cmd)),
+              SyscallSucceedsWithValue(sizeof(cmd)));
+
+  EXPECT_THAT(RetryEINTR(readv)(sock_[0], iovecs.data(), iovecs.size()),
+              SyscallSucceeds());
+
+  WaitServerComplete();
 }
 
 void FuseTest::MountFuse() {
@@ -81,7 +90,7 @@ void FuseTest::MountFuse() {
   mount_point_ = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
   EXPECT_THAT(mount("fuse", mount_point_.path().c_str(), "fuse",
                     MS_NODEV | MS_NOSUID, mount_opts.c_str()),
-              SyscallSucceedsWithValue(0));
+              SyscallSucceeds());
 }
 
 void FuseTest::UnmountFuse() {
@@ -89,11 +98,11 @@ void FuseTest::UnmountFuse() {
   // TODO(gvisor.dev/issue/3330): ensure the process is terminated successfully.
 }
 
-// ConsumeFuseInit consumes the first FUSE request and returns the
-// corresponding PosixError.
-PosixError FuseTest::ConsumeFuseInit() {
+// Consumes the first FUSE request and returns the corresponding PosixError.
+PosixError FuseTest::ServerConsumeFuseInit() {
+  std::vector<char> buf(FUSE_MIN_READ_BUFFER);
   RETURN_ERROR_IF_SYSCALL_FAIL(
-      RetryEINTR(read)(dev_fd_, buf_.data(), buf_.size()));
+      RetryEINTR(read)(dev_fd_, buf.data(), buf.size()));
 
   struct iovec iov_out[2];
   struct fuse_out_header out_header = {
@@ -115,60 +124,67 @@ PosixError FuseTest::ConsumeFuseInit() {
   return NoError();
 }
 
-// ReceiveExpected reads 1 pair of expected fuse request-response `iovec`s
-// from pipe and save them into member variables of this testing instance.
-void FuseTest::ReceiveExpected() {
-  // Set expected fuse_in request.
-  EXPECT_THAT(len_in_ = RetryEINTR(read)(set_expected_[0], mem_in_.data(),
-                                         mem_in_.size()),
-              SyscallSucceedsWithValue(::testing::Gt(0)));
-  MarkDone(len_in_ > 0);
+// Reads 1 expected opcode and a fake response from socket and save them into
+// the serial buffer of this testing instance.
+void FuseTest::ServerReceiveResponse() {
+  ssize_t len;
+  uint32_t opcode;
+  std::vector<char> buf(FUSE_MIN_READ_BUFFER);
+  EXPECT_THAT(RetryEINTR(read)(sock_[1], &opcode, sizeof(opcode)),
+              SyscallSucceedsWithValue(sizeof(opcode)));
 
-  // Set expected fuse_out response.
-  EXPECT_THAT(len_out_ = RetryEINTR(read)(set_expected_[0], mem_out_.data(),
-                                          mem_out_.size()),
-              SyscallSucceedsWithValue(::testing::Gt(0)));
-  MarkDone(len_out_ > 0);
+  EXPECT_THAT(len = RetryEINTR(read)(sock_[1], buf.data(), buf.size()),
+              SyscallSucceeds());
+
+  responses_.AddMemBlock(opcode, buf.data(), len);
 }
 
-// MarkDone writes 1 byte of success indicator through pipe.
-void FuseTest::MarkDone(bool success) {
-  char data = success ? 1 : 0;
-  EXPECT_THAT(RetryEINTR(write)(done_[1], &data, sizeof(data)),
-              SyscallSucceedsWithValue(1));
+// Writes 1 byte of success indicator through socket.
+void FuseTest::ServerCompleteWith(bool success) {
+  char data = static_cast<char>(success);
+  EXPECT_THAT(RetryEINTR(write)(sock_[1], &data, sizeof(data)),
+              SyscallSucceedsWithValue(sizeof(data)));
 }
 
-// FuseLoop is the implementation of the fake FUSE server. Read from /dev/fuse,
-// compare the request by CompareRequest (use derived function if specified),
-// and write the expected response to /dev/fuse.
-void FuseTest::FuseLoop() {
-  bool success = true;
-  ssize_t len = 0;
+// ServerFuseLoop is the implementation of the fake FUSE server. Monitors 2
+// file descriptors: /dev/fuse and sock_[1]. Events from /dev/fuse are FUSE
+// requests and events from sock_[1] are FUSE testing commands, leading by
+// a FuseTestCmd data to indicate the command.
+void FuseTest::ServerFuseLoop() {
+  const int nfds = 2;
+  struct pollfd fds[nfds] = {
+      {
+          .fd = dev_fd_,
+          .events = POLL_IN | POLLHUP | POLLERR | POLLNVAL,
+      },
+      {
+          .fd = sock_[1],
+          .events = POLL_IN | POLLHUP | POLLERR | POLLNVAL,
+      },
+  };
+
   while (true) {
-    ReceiveExpected();
+    ASSERT_THAT(poll(fds, nfds, -1), SyscallSucceeds());
 
-    EXPECT_THAT(len = RetryEINTR(read)(dev_fd_, buf_.data(), buf_.size()),
-                SyscallSucceedsWithValue(len_in_));
-    if (len != len_in_) success = false;
+    for (int fd_idx = 0; fd_idx < nfds; ++fd_idx) {
+      if (fds[fd_idx].revents == 0) continue;
 
-    if (!CompareRequest(buf_.data(), len_in_, mem_in_.data(), len_in_)) {
-      std::cerr << "the FUSE request is not expected" << std::endl;
-      success = false;
+      ASSERT_EQ(fds[fd_idx].revents, POLL_IN);
+      if (fds[fd_idx].fd == sock_[1]) {
+        ServerHandleCommand();
+      } else if (fds[fd_idx].fd == dev_fd_) {
+        ServerProcessFuseRequest();
+      }
     }
-
-    EXPECT_THAT(len = RetryEINTR(write)(dev_fd_, mem_out_.data(), len_out_),
-                SyscallSucceedsWithValue(len_out_));
-    if (len != len_out_) success = false;
-    MarkDone(success);
   }
 }
 
-// SetUpFuseServer creates 2 pipes. First is for testing client to send the
-// expected request-response pair, and the other acts as a checkpoint for the
-// FUSE server to notify the client that it can proceed.
+// SetUpFuseServer creates 1 socketpair and fork the process. The parent thread
+// becomes testing thread and the child thread becomes the FUSE server running
+// in background. These 2 threads are connected via socketpair. sock_[0] is
+// opened in testing thread and sock_[1] is opened in the FUSE server.
 void FuseTest::SetUpFuseServer() {
-  ASSERT_THAT(pipe(set_expected_), SyscallSucceedsWithValue(0));
-  ASSERT_THAT(pipe(done_), SyscallSucceedsWithValue(0));
+  ASSERT_THAT(socketpair(AF_UNIX, SOCK_STREAM, 0, sock_), SyscallSucceeds());
 
   switch (fork()) {
     case -1:
@@ -177,31 +193,110 @@ void FuseTest::SetUpFuseServer() {
     case 0:
       break;
     default:
-      ASSERT_THAT(close(set_expected_[0]), SyscallSucceedsWithValue(0));
-      ASSERT_THAT(close(done_[1]), SyscallSucceedsWithValue(0));
-      WaitCompleted();
+      ASSERT_THAT(close(sock_[1]), SyscallSucceeds());
+      WaitServerComplete();
       return;
   }
 
-  ASSERT_THAT(close(set_expected_[1]), SyscallSucceedsWithValue(0));
-  ASSERT_THAT(close(done_[0]), SyscallSucceedsWithValue(0));
-
-  MarkDone(ConsumeFuseInit().ok());
-
-  FuseLoop();
+  // Begin child thread, i.e. the FUSE server.
+  ASSERT_THAT(close(sock_[0]), SyscallSucceeds());
+  ServerCompleteWith(ServerConsumeFuseInit().ok());
+  ServerFuseLoop();
   _exit(0);
 }
 
-// GetPayloadSize is a helper function to get the number of bytes of a
-// specific FUSE operation struct.
-size_t FuseTest::GetPayloadSize(uint32_t opcode, bool in) {
-  switch (opcode) {
-    case FUSE_INIT:
-      return in ? sizeof(struct fuse_init_in) : sizeof(struct fuse_init_out);
+// Reads FuseTestCmd sent from testing thread and routes to correct handler.
+// Since each command should be a blocking operation, a `ServerCompleteWith()`
+// is required after the switch keyword.
+void FuseTest::ServerHandleCommand() {
+  uint32_t cmd;
+  EXPECT_THAT(RetryEINTR(read)(sock_[1], &cmd, sizeof(cmd)),
+              SyscallSucceedsWithValue(sizeof(cmd)));
+
+  switch (static_cast<FuseTestCmd>(cmd)) {
+    case FuseTestCmd::kSetResponse:
+      ServerReceiveResponse();
+      break;
+    case FuseTestCmd::kGetRequest:
+      ServerSendReceivedRequest();
+      break;
     default:
+      FAIL() << "Unknown FuseTestCmd " << cmd;
       break;
   }
-  return 0;
+
+  ServerCompleteWith(!HasFailure());
+}
+
+// Sends the received request pointed by current cursor and advances cursor.
+void FuseTest::ServerSendReceivedRequest() {
+  if (requests_.End()) {
+    FAIL() << "No more received request.";
+    return;
+  }
+  auto mem_block = requests_.Next();
+  EXPECT_THAT(
+      RetryEINTR(write)(sock_[1], requests_.DataAtOffset(mem_block.offset),
+                        mem_block.len),
+      SyscallSucceedsWithValue(mem_block.len));
+}
+
+// Handles FUSE request. Reads request from /dev/fuse, checks if it has the
+// same opcode as expected, and responds with the saved fake FUSE response.
+// The FUSE request is copied to the serial buffer and can be retrieved one-
+// by-one by calling GetServerActualRequest from testing thread.
+void FuseTest::ServerProcessFuseRequest() {
+  ssize_t len;
+  std::vector<char> buf(FUSE_MIN_READ_BUFFER);
+
+  // Read FUSE request.
+  EXPECT_THAT(len = RetryEINTR(read)(dev_fd_, buf.data(), buf.size()),
+              SyscallSucceeds());
+  fuse_in_header* in_header = reinterpret_cast<fuse_in_header*>(buf.data());
+  requests_.AddMemBlock(in_header->opcode, buf.data(), len);
+
+  // Check if there is a corresponding response.
+  if (responses_.End()) {
+    GTEST_NONFATAL_FAILURE_("No more FUSE response is expected");
+    ServerRespondFuseError(in_header->unique);
+    return;
+  }
+  auto mem_block = responses_.Next();
+  if (in_header->opcode != mem_block.opcode) {
+    std::string message = absl::StrFormat("Expect opcode %d but got %d",
+                                          mem_block.opcode, in_header->opcode);
+    GTEST_NONFATAL_FAILURE_(message.c_str());
+    // We won't get correct response if opcode is not expected. Send error
+    // response here to avoid wrong parsing by VFS.
+    ServerRespondFuseError(in_header->unique);
+    return;
+  }
+
+  // Write FUSE response.
+  ServerRespondFuseSuccess(responses_, mem_block, in_header->unique);
+}
+
+void FuseTest::ServerRespondFuseSuccess(FuseMemBuffer& mem_buf,
+                                        const FuseMemBlock& block,
+                                        uint64_t unique) {
+  fuse_out_header* out_header =
+      reinterpret_cast<fuse_out_header*>(mem_buf.DataAtOffset(block.offset));
+
+  // Patch `unique` in fuse_out_header to avoid EINVAL caused by responding
+  // with an unknown `unique`.
+  out_header->unique = unique;
+  EXPECT_THAT(RetryEINTR(write)(dev_fd_, out_header, block.len),
+              SyscallSucceedsWithValue(block.len));
+}
+
+void FuseTest::ServerRespondFuseError(uint64_t unique) {
+  fuse_out_header out_header = {
+      .len = sizeof(struct fuse_out_header),
+      .error = ENOSYS,
+      .unique = unique,
+  };
+  EXPECT_THAT(RetryEINTR(write)(dev_fd_, &out_header, sizeof(out_header)),
+              SyscallSucceedsWithValue(sizeof(out_header)));
 }
 
 }  // namespace testing
diff --git a/test/fuse/linux/fuse_base.h b/test/fuse/linux/fuse_base.h
index 3a2f255a9d..b610d0f542 100644
--- a/test/fuse/linux/fuse_base.h
+++ b/test/fuse/linux/fuse_base.h
@@ -16,8 +16,10 @@
 #define GVISOR_TEST_FUSE_FUSE_BASE_H_
 
 #include <linux/fuse.h>
+#include <string.h>
 #include <sys/uio.h>
 
+#include <iostream>
 #include <vector>
 
 #include "gtest/gtest.h"
@@ -29,68 +31,156 @@ namespace testing {
 
 constexpr char kMountOpts[] = "rootmode=755,user_id=0,group_id=0";
 
-class FuseTest : public ::testing::Test {
+// Internal commands used to communicate between testing thread and the FUSE
+// server. See test/fuse/README.md for further detail.
+enum class FuseTestCmd {
+  kSetResponse = 0,
+  kGetRequest,
+};
+
+// Holds the information of a memory block in a serial buffer.
+struct FuseMemBlock {
+  uint32_t opcode;
+  size_t offset;
+  size_t len;
+};
+
+// A wrapper of a simple serial buffer that can be used with read(2) and
+// write(2). Contains a cursor to indicate accessing. This class is not thread-
+// safe and can only be used in single-thread version.
+class FuseMemBuffer {
  public:
-  FuseTest() {
-    buf_.resize(FUSE_MIN_READ_BUFFER);
-    mem_in_.resize(FUSE_MIN_READ_BUFFER);
-    mem_out_.resize(FUSE_MIN_READ_BUFFER);
+  FuseMemBuffer() : cursor_(0) {
+    // To read from /dev/fuse, a buffer needs at least FUSE_MIN_READ_BUFFER
+    // bytes to avoid EINVAL. FuseMemBuffer holds memory that can accommodate
+    // a sequence of FUSE request/response, so it is initiated with double
+    // minimal requirement.
+    mem_.resize(FUSE_MIN_READ_BUFFER * 2);
   }
+
+  // Returns whether there is no memory block.
+  bool Empty() { return blocks_.empty(); }
+
+  // Returns if there is no more remaining memory blocks.
+  bool End() { return cursor_ == blocks_.size(); }
+
+  // Returns how many bytes that have been received.
+  size_t UsedBytes() {
+    return Empty() ? 0 : blocks_.back().offset + blocks_.back().len;
+  }
+
+  // Returns the available bytes remains in the serial buffer.
+  size_t AvailBytes() { return mem_.size() - UsedBytes(); }
+
+  // Appends a memory block information that starts at the tail of the serial
+  // buffer. /dev/fuse requires at least FUSE_MIN_READ_BUFFER bytes to read, or
+  // it will issue EINVAL. If it is not enough, just double the buffer length.
+  void AddMemBlock(uint32_t opcode, void* data, size_t len) {
+    if (AvailBytes() < FUSE_MIN_READ_BUFFER) {
+      mem_.resize(mem_.size() << 1);
+    }
+    size_t offset = UsedBytes();
+    memcpy(mem_.data() + offset, data, len);
+    blocks_.push_back(FuseMemBlock{opcode, offset, len});
+  }
+
+  // Returns the memory address at a specific offset. Used with read(2) or
+  // write(2).
+  char* DataAtOffset(size_t offset) { return mem_.data() + offset; }
+
+  // Returns current memory block pointed by the cursor and increase by 1.
+  FuseMemBlock Next() {
+    if (End()) {
+      std::cerr << "Buffer is already exhausted." << std::endl;
+      return FuseMemBlock{};
+    }
+    return blocks_[cursor_++];
+  }
+
+  // Returns the number of the blocks that has not been requested.
+  size_t RemainingBlocks() { return blocks_.size() - cursor_; }
+
+ private:
+  size_t cursor_;
+  std::vector<FuseMemBlock> blocks_;
+  std::vector<char> mem_;
+};
+
+// FuseTest base class is useful in FUSE integration test. Inherit this class
+// to automatically set up a fake FUSE server and use the member functions
+// to manipulate with it. Refer to test/fuse/README.md for detailed explanation.
+class FuseTest : public ::testing::Test {
+ public:
   void SetUp() override;
   void TearDown() override;
 
-  // CompareRequest is used by the FUSE server and should be implemented to
-  // compare different FUSE operations. It compares the actual FUSE input
-  // request with the expected one set by `SetExpected()`.
-  virtual bool CompareRequest(void* expected_mem, size_t expected_len,
-                              void* real_mem, size_t real_len);
-
-  // SetExpected is called by the testing main thread. Writes a request-
-  // response pair into FUSE server's member variables via pipe.
-  void SetExpected(struct iovec* iov_in, int iov_in_cnt, struct iovec* iov_out,
-                   int iov_out_cnt);
+  // Called by the testing thread to set up a fake response for an expected
+  // opcode via socket. This can be used multiple times to define a sequence of
+  // expected FUSE reactions.
+  void SetServerResponse(uint32_t opcode, std::vector<struct iovec>& iovecs);
 
-  // WaitCompleted waits for FUSE server to complete its processing. It
-  // complains if the FUSE server responds failure during tests.
-  void WaitCompleted();
+  // Called by the testing thread to ask the FUSE server for its next received
+  // FUSE request. Be sure to use the corresponding struct of iovec to receive
+  // data from server.
+  void GetServerActualRequest(std::vector<struct iovec>& iovecs);
 
  protected:
   TempPath mount_point_;
 
  private:
+  // Opens /dev/fuse and inherit the file descriptor for the FUSE server.
   void MountFuse();
+
+  // Unmounts the mountpoint of the FUSE server.
   void UnmountFuse();
 
-  // ConsumeFuseInit is only used during FUSE server setup.
-  PosixError ConsumeFuseInit();
+  // Creates a socketpair for communication and forks FUSE server.
+  void SetUpFuseServer();
 
-  // ReceiveExpected is the FUSE server side's corresponding code of
-  // `SetExpected()`. Save the request-response pair into its memory.
-  void ReceiveExpected();
+  // Waits for FUSE server to complete its processing. Complains if the FUSE
+  // server responds any failure during tests.
+  void WaitServerComplete();
 
-  // MarkDone is used by the FUSE server to tell testing main if it's OK to
-  // proceed next command.
-  void MarkDone(bool success);
+  // The FUSE server stays here and waits next command or FUSE request until it
+  // is terminated.
+  void ServerFuseLoop();
 
-  // FuseLoop is where the FUSE server stay until it is terminated.
-  void FuseLoop();
+  // Used by the FUSE server to tell testing thread if it is OK to proceed next
+  // command. Will be issued after processing each FuseTestCmd.
+  void ServerCompleteWith(bool success);
 
-  // SetUpFuseServer creates 2 pipes for communication and forks FUSE server.
-  void SetUpFuseServer();
+  // Consumes the first FUSE request when mounting FUSE. Replies with a
+  // response with empty payload.
+  PosixError ServerConsumeFuseInit();
+
+  // A command switch that dispatch different FuseTestCmd to its handler.
+  void ServerHandleCommand();
+
+  // The FUSE server side's corresponding code of `SetServerResponse()`.
+  // Handles `kSetResponse` command. Saves the fake response into its output
+  // memory queue.
+  void ServerReceiveResponse();
+
+  // The FUSE server side's corresponding code of `GetServerActualRequest()`.
+  // Handles `kGetRequest` command. Sends the next received request pointed by
+  // the cursor.
+  void ServerSendReceivedRequest();
 
-  // GetPayloadSize is a helper function to get the number of bytes of a
-  // specific FUSE operation struct.
-  size_t GetPayloadSize(uint32_t opcode, bool in);
+  // Handles FUSE request sent to /dev/fuse by its saved responses.
+  void ServerProcessFuseRequest();
+
+  // Responds to FUSE request with a saved data.
+  void ServerRespondFuseSuccess(FuseMemBuffer& mem_buf,
+                                const FuseMemBlock& block, uint64_t unique);
+
+  // Responds an error header to /dev/fuse when bad thing happens.
+  void ServerRespondFuseError(uint64_t unique);
 
   int dev_fd_;
-  int set_expected_[2];
-  int done_[2];
-
-  std::vector<char> buf_;
-  std::vector<char> mem_in_;
-  std::vector<char> mem_out_;
-  ssize_t len_in_;
-  ssize_t len_out_;
+  int sock_[2];
+
+  FuseMemBuffer requests_;
+  FuseMemBuffer responses_;
 };
 
 }  // namespace testing
diff --git a/test/fuse/linux/stat_test.cc b/test/fuse/linux/stat_test.cc
index 172e098672..c2e5bd1cf2 100644
--- a/test/fuse/linux/stat_test.cc
+++ b/test/fuse/linux/stat_test.cc
@@ -33,20 +33,6 @@ namespace {
 
 class StatTest : public FuseTest {
  public:
-  bool CompareRequest(void* expected_mem, size_t expected_len, void* real_mem,
-                      size_t real_len) override {
-    if (expected_len != real_len) return false;
-    struct fuse_in_header* real_header =
-        reinterpret_cast<fuse_in_header*>(real_mem);
-
-    if (real_header->opcode != FUSE_GETATTR) {
-      std::cerr << "expect header opcode " << FUSE_GETATTR << " but got "
-                << real_header->opcode << std::endl;
-      return false;
-    }
-    return true;
-  }
-
   bool StatsAreEqual(struct stat expected, struct stat actual) {
     // device number will be dynamically allocated by kernel, we cannot know
     // in advance
@@ -56,25 +42,9 @@ class StatTest : public FuseTest {
 };
 
 TEST_F(StatTest, StatNormal) {
-  struct iovec iov_in[2];
-  struct iovec iov_out[2];
-
-  struct fuse_in_header in_header = {
-      .len = sizeof(struct fuse_in_header) + sizeof(struct fuse_getattr_in),
-      .opcode = FUSE_GETATTR,
-      .unique = 4,
-      .nodeid = 1,
-      .uid = 0,
-      .gid = 0,
-      .pid = 4,
-      .padding = 0,
-  };
-  struct fuse_getattr_in in_payload = {0};
-  iov_in[0].iov_len = sizeof(in_header);
-  iov_in[0].iov_base = &in_header;
-  iov_in[1].iov_len = sizeof(in_payload);
-  iov_in[1].iov_base = &in_payload;
-
+  // Set up fixture.
+  std::vector<struct iovec> iov_in(2);
+  std::vector<struct iovec> iov_out(2);
   mode_t expected_mode = S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH;
   struct timespec atime = {.tv_sec = 1595436289, .tv_nsec = 134150844};
   struct timespec mtime = {.tv_sec = 1595436290, .tv_nsec = 134150845};
@@ -82,7 +52,6 @@ TEST_F(StatTest, StatNormal) {
   struct fuse_out_header out_header = {
       .len = sizeof(struct fuse_out_header) + sizeof(struct fuse_attr_out),
       .error = 0,
-      .unique = 4,
   };
   struct fuse_attr attr = {
       .ino = 1,
@@ -109,11 +78,13 @@ TEST_F(StatTest, StatNormal) {
   iov_out[1].iov_len = sizeof(out_payload);
   iov_out[1].iov_base = &out_payload;
 
-  SetExpected(iov_in, 2, iov_out, 2);
+  SetServerResponse(FUSE_GETATTR, iov_out);
 
+  // Do integration test.
   struct stat stat_buf;
   EXPECT_THAT(stat(mount_point_.path().c_str(), &stat_buf), SyscallSucceeds());
 
+  // Check filesystem operation result.
   struct stat expected_stat = {
       .st_ino = attr.ino,
       .st_nlink = attr.nlink,
@@ -129,38 +100,50 @@ TEST_F(StatTest, StatNormal) {
       .st_ctim = ctime,
   };
   EXPECT_TRUE(StatsAreEqual(stat_buf, expected_stat));
-  WaitCompleted();
-}
-
-TEST_F(StatTest, StatNotFound) {
-  struct iovec iov_in[2];
-  struct iovec iov_out[2];
 
-  struct fuse_in_header in_header = {
-      .len = sizeof(struct fuse_in_header) + sizeof(struct fuse_getattr_in),
-      .opcode = FUSE_GETATTR,
-      .unique = 4,
-  };
-  struct fuse_getattr_in in_payload = {0};
+  // Check FUSE request.
+  struct fuse_in_header in_header;
+  struct fuse_getattr_in in_payload;
   iov_in[0].iov_len = sizeof(in_header);
   iov_in[0].iov_base = &in_header;
   iov_in[1].iov_len = sizeof(in_payload);
   iov_in[1].iov_base = &in_payload;
 
+  GetServerActualRequest(iov_in);
+  EXPECT_EQ(in_header.opcode, FUSE_GETATTR);
+  EXPECT_EQ(in_payload.getattr_flags, 0);
+  EXPECT_EQ(in_payload.fh, 0);
+}
+
+TEST_F(StatTest, StatNotFound) {
+  // Set up fixture.
+  std::vector<struct iovec> iov_in(2);
+  std::vector<struct iovec> iov_out(1);
   struct fuse_out_header out_header = {
       .len = sizeof(struct fuse_out_header),
       .error = -ENOENT,
-      .unique = 4,
   };
   iov_out[0].iov_len = sizeof(out_header);
   iov_out[0].iov_base = &out_header;
+  SetServerResponse(FUSE_GETATTR, iov_out);
 
-  SetExpected(iov_in, 2, iov_out, 1);
-
+  // Do integration test.
   struct stat stat_buf;
   EXPECT_THAT(stat(mount_point_.path().c_str(), &stat_buf),
               SyscallFailsWithErrno(ENOENT));
-  WaitCompleted();
+
+  // Check FUSE request.
+  struct fuse_in_header in_header;
+  struct fuse_getattr_in in_payload;
+  iov_in[0].iov_len = sizeof(in_header);
+  iov_in[0].iov_base = &in_header;
+  iov_in[1].iov_len = sizeof(in_payload);
+  iov_in[1].iov_base = &in_payload;
+
+  GetServerActualRequest(iov_in);
+  EXPECT_EQ(in_header.opcode, FUSE_GETATTR);
+  EXPECT_EQ(in_payload.getattr_flags, 0);
+  EXPECT_EQ(in_payload.fh, 0);
 }
 
 }  // namespace

From 38c3fb66d7e6242d76aa332bf60a53ea307bab29 Mon Sep 17 00:00:00 2001
From: Craig Chi <craigchi@google.com>
Date: Fri, 14 Aug 2020 09:54:35 -0700
Subject: [PATCH 169/211] Add functions in FUSE integration test to get metrics
 from FUSE server

This commit adds 3 utility functions to ensure all received requests
and preset responses are consumed.

1. Get number of unconsumed requests (received by the FUSE server but
   not consumed by the testing thread).
2. Get number of unsent responses (set by the testing thread but not
   processed by the FUSE server).
3. Get total bytes of the received requests (to ensure some operations
   don't trigger FUSE requests).

Fixes #3607
---
 test/fuse/README.md          | 27 ++++++++++++-----
 test/fuse/linux/fuse_base.cc | 58 ++++++++++++++++++++++++++++++++----
 test/fuse/linux/fuse_base.h  | 24 +++++++++++++++
 3 files changed, 96 insertions(+), 13 deletions(-)

diff --git a/test/fuse/README.md b/test/fuse/README.md
index c5909a166c..7a18397148 100644
--- a/test/fuse/README.md
+++ b/test/fuse/README.md
@@ -86,6 +86,20 @@ complete a command and when the server awaits the next instruction.
  |    =[Test actual request]           |
  |                                     |
  |    >TearDown()                      |
+ |      ...                            |
+ |      >GetServerNumUnsentResponses() |
+ |        [write data to socket]       |
+ |        [wait server complete]       |
+ |                                     |        [socket event arrive]
+ |                                     |        >ServerHandleCommand()
+ |                                     |          >ServerSendData()
+ |                                     |            [write data to socket]
+ |                                     |          <ServerSendData()
+ |                                     |          =ServerCompleteWith()
+ |        [read data from socket]      |
+ |        [test if all succeeded]      |
+ |      <GetServerNumUnsentResponses() |
+ |                                     |        <ServerHandleCommand()
  |      =UnmountFuse()                 |
  |    <TearDown()                      |
  |  <TEST_F()                          |
@@ -164,11 +178,10 @@ To add a new `FuseTestCmd`, one must comply with following format:
    many bytes you want to send along with the command and what you will expect
    to receive. Finally it should block and wait for a success indicator from
    the FUSE server.
-3. Add a `ServerReceiveXXX()` or `ServerSendXXX()` private function in
-   `FuseTest`. It is mandatory to set it private since only the FUSE server
-   (forked from `FuseTest` base class) can call it. This is the handler of a
-   specific `FuseTestCmd` and the format of the data should be consistent with
-   what client expects in the previous step.
-4. Add a case in the switch condition of `ServerHandleCommand()` to route the
-   command to the server handler described in the previous step.
+3. Add a handler logic in the switch condition of `ServerHandleCommand()`. Use
+   `ServerSendData()` or declare a new private function such as
+   `ServerReceiveXXX()` or `ServerSendXXX()`. It is mandatory to set it private
+   since only the FUSE server (forked from `FuseTest` base class) can call it.
+   This is the server part of the specific `FuseTestCmd` and the format of the
+   data should be consistent with what the client expects in the previous step.
 
diff --git a/test/fuse/linux/fuse_base.cc b/test/fuse/linux/fuse_base.cc
index b7d8b2a1fa..b1897cf88d 100644
--- a/test/fuse/linux/fuse_base.cc
+++ b/test/fuse/linux/fuse_base.cc
@@ -38,7 +38,11 @@ void FuseTest::SetUp() {
   SetUpFuseServer();
 }
 
-void FuseTest::TearDown() { UnmountFuse(); }
+void FuseTest::TearDown() {
+  EXPECT_EQ(GetServerNumUnconsumedRequests(), 0);
+  EXPECT_EQ(GetServerNumUnsentResponses(), 0);
+  UnmountFuse();
+}
 
 // Sends 3 parts of data to the FUSE server:
 //   1. The `kSetResponse` command
@@ -63,10 +67,10 @@ void FuseTest::SetServerResponse(uint32_t opcode,
 // Waits for the FUSE server to finish its blocking job and check if it
 // completes without errors.
 void FuseTest::WaitServerComplete() {
-  char success;
+  uint32_t success;
   EXPECT_THAT(RetryEINTR(read)(sock_[0], &success, sizeof(success)),
               SyscallSucceedsWithValue(sizeof(success)));
-  EXPECT_EQ(success, static_cast<char>(1));
+  ASSERT_EQ(success, 1);
 }
 
 // Sends the `kGetRequest` command to the FUSE server, then reads the next
@@ -83,6 +87,35 @@ void FuseTest::GetServerActualRequest(std::vector<struct iovec>& iovecs) {
   WaitServerComplete();
 }
 
+// Sends a FuseTestCmd command to the FUSE server, reads from the socket, and
+// returns the corresponding data.
+uint32_t FuseTest::GetServerData(uint32_t cmd) {
+  uint32_t data;
+  EXPECT_THAT(RetryEINTR(write)(sock_[0], &cmd, sizeof(cmd)),
+              SyscallSucceedsWithValue(sizeof(cmd)));
+
+  EXPECT_THAT(RetryEINTR(read)(sock_[0], &data, sizeof(data)),
+              SyscallSucceedsWithValue(sizeof(data)));
+
+  WaitServerComplete();
+  return data;
+}
+
+uint32_t FuseTest::GetServerNumUnconsumedRequests() {
+  return GetServerData(
+      static_cast<uint32_t>(FuseTestCmd::kGetNumUnconsumedRequests));
+}
+
+uint32_t FuseTest::GetServerNumUnsentResponses() {
+  return GetServerData(
+      static_cast<uint32_t>(FuseTestCmd::kGetNumUnsentResponses));
+}
+
+uint32_t FuseTest::GetServerTotalReceivedBytes() {
+  return GetServerData(
+      static_cast<uint32_t>(FuseTestCmd::kGetTotalReceivedBytes));
+}
+
 void FuseTest::MountFuse() {
   EXPECT_THAT(dev_fd_ = open("/dev/fuse", O_RDWR), SyscallSucceeds());
 
@@ -141,9 +174,8 @@ void FuseTest::ServerReceiveResponse() {
 
 // Writes 1 byte of success indicator through socket.
 void FuseTest::ServerCompleteWith(bool success) {
-  char data = static_cast<char>(success);
-  EXPECT_THAT(RetryEINTR(write)(sock_[1], &data, sizeof(data)),
-              SyscallSucceedsWithValue(sizeof(data)));
+  uint32_t data = success ? 1 : 0;
+  ServerSendData(data);
 }
 
 // ServerFuseLoop is the implementation of the fake FUSE server. Monitors 2
@@ -205,6 +237,11 @@ void FuseTest::SetUpFuseServer() {
   _exit(0);
 }
 
+void FuseTest::ServerSendData(uint32_t data) {
+  EXPECT_THAT(RetryEINTR(write)(sock_[1], &data, sizeof(data)),
+              SyscallSucceedsWithValue(sizeof(data)));
+}
+
 // Reads FuseTestCmd sent from testing thread and routes to correct handler.
 // Since each command should be a blocking operation, a `ServerCompleteWith()`
 // is required after the switch keyword.
@@ -220,6 +257,15 @@ void FuseTest::ServerHandleCommand() {
     case FuseTestCmd::kGetRequest:
       ServerSendReceivedRequest();
       break;
+    case FuseTestCmd::kGetTotalReceivedBytes:
+      ServerSendData(static_cast<uint32_t>(requests_.UsedBytes()));
+      break;
+    case FuseTestCmd::kGetNumUnconsumedRequests:
+      ServerSendData(static_cast<uint32_t>(requests_.RemainingBlocks()));
+      break;
+    case FuseTestCmd::kGetNumUnsentResponses:
+      ServerSendData(static_cast<uint32_t>(responses_.RemainingBlocks()));
+      break;
     default:
       FAIL() << "Unknown FuseTestCmd " << cmd;
       break;
diff --git a/test/fuse/linux/fuse_base.h b/test/fuse/linux/fuse_base.h
index b610d0f542..3f25229777 100644
--- a/test/fuse/linux/fuse_base.h
+++ b/test/fuse/linux/fuse_base.h
@@ -36,6 +36,9 @@ constexpr char kMountOpts[] = "rootmode=755,user_id=0,group_id=0";
 enum class FuseTestCmd {
   kSetResponse = 0,
   kGetRequest,
+  kGetNumUnconsumedRequests,
+  kGetNumUnsentResponses,
+  kGetTotalReceivedBytes,
 };
 
 // Holds the information of a memory block in a serial buffer.
@@ -124,6 +127,21 @@ class FuseTest : public ::testing::Test {
   // data from server.
   void GetServerActualRequest(std::vector<struct iovec>& iovecs);
 
+  // Called by the testing thread to query the number of unconsumed requests in
+  // the requests_ serial buffer of the FUSE server. TearDown() ensures all
+  // FUSE requests received by the FUSE server were consumed by the testing
+  // thread.
+  uint32_t GetServerNumUnconsumedRequests();
+
+  // Called by the testing thread to query the number of unsent responses in
+  // the responses_ serial buffer of the FUSE server. TearDown() ensures all
+  // preset FUSE responses were sent out by the FUSE server.
+  uint32_t GetServerNumUnsentResponses();
+
+  // Called by the testing thread to ask the FUSE server for its total received
+  // bytes from /dev/fuse.
+  uint32_t GetServerTotalReceivedBytes();
+
  protected:
   TempPath mount_point_;
 
@@ -137,6 +155,9 @@ class FuseTest : public ::testing::Test {
   // Creates a socketpair for communication and forks FUSE server.
   void SetUpFuseServer();
 
+  // Sends a FuseTestCmd and gets a uint32_t data from the FUSE server.
+  inline uint32_t GetServerData(uint32_t cmd);
+
   // Waits for FUSE server to complete its processing. Complains if the FUSE
   // server responds any failure during tests.
   void WaitServerComplete();
@@ -166,6 +187,9 @@ class FuseTest : public ::testing::Test {
   // the cursor.
   void ServerSendReceivedRequest();
 
+  // Sends a uint32_t data via socket.
+  inline void ServerSendData(uint32_t data);
+
   // Handles FUSE request sent to /dev/fuse by its saved responses.
   void ServerProcessFuseRequest();
 

From 1cdca36e8f4af13a1fa73ea39452cd1eaa141abd Mon Sep 17 00:00:00 2001
From: Craig Chi <craigchi@google.com>
Date: Fri, 14 Aug 2020 10:17:08 -0700
Subject: [PATCH 170/211] Add function generating array of iovec with different
 FUSE structs

This commit adds a function in the newly created fuse_util library,
which accepts a variable number of arguments and data structures.

Fixes #3609
---
 test/fuse/linux/BUILD        |  2 ++
 test/fuse/linux/fuse_base.cc | 10 +++---
 test/fuse/linux/stat_test.cc | 31 +++++------------
 test/util/BUILD              |  6 ++++
 test/util/fuse_util.h        | 67 ++++++++++++++++++++++++++++++++++++
 5 files changed, 87 insertions(+), 29 deletions(-)
 create mode 100644 test/util/fuse_util.h

diff --git a/test/fuse/linux/BUILD b/test/fuse/linux/BUILD
index 4871bb531c..e4a614e112 100644
--- a/test/fuse/linux/BUILD
+++ b/test/fuse/linux/BUILD
@@ -12,6 +12,7 @@ cc_binary(
     deps = [
         gtest,
         ":fuse_base",
+        "//test/util:fuse_util",
         "//test/util:test_main",
         "//test/util:test_util",
     ],
@@ -24,6 +25,7 @@ cc_library(
     hdrs = ["fuse_base.h"],
     deps = [
         gtest,
+        "//test/util:fuse_util",
         "//test/util:posix_error",
         "//test/util:temp_path",
         "//test/util:test_util",
diff --git a/test/fuse/linux/fuse_base.cc b/test/fuse/linux/fuse_base.cc
index b1897cf88d..c354e1dcb1 100644
--- a/test/fuse/linux/fuse_base.cc
+++ b/test/fuse/linux/fuse_base.cc
@@ -26,6 +26,7 @@
 
 #include "absl/strings/str_format.h"
 #include "gtest/gtest.h"
+#include "test/util/fuse_util.h"
 #include "test/util/posix_error.h"
 #include "test/util/temp_path.h"
 #include "test/util/test_util.h"
@@ -137,7 +138,6 @@ PosixError FuseTest::ServerConsumeFuseInit() {
   RETURN_ERROR_IF_SYSCALL_FAIL(
       RetryEINTR(read)(dev_fd_, buf.data(), buf.size()));
 
-  struct iovec iov_out[2];
   struct fuse_out_header out_header = {
       .len = sizeof(struct fuse_out_header) + sizeof(struct fuse_init_out),
       .error = 0,
@@ -148,12 +148,10 @@ PosixError FuseTest::ServerConsumeFuseInit() {
   struct fuse_init_out out_payload = {
       .major = 7,
   };
-  iov_out[0].iov_len = sizeof(out_header);
-  iov_out[0].iov_base = &out_header;
-  iov_out[1].iov_len = sizeof(out_payload);
-  iov_out[1].iov_base = &out_payload;
+  auto iov_out = FuseGenerateIovecs(out_header, out_payload);
 
-  RETURN_ERROR_IF_SYSCALL_FAIL(RetryEINTR(writev)(dev_fd_, iov_out, 2));
+  RETURN_ERROR_IF_SYSCALL_FAIL(
+      RetryEINTR(writev)(dev_fd_, iov_out.data(), iov_out.size()));
   return NoError();
 }
 
diff --git a/test/fuse/linux/stat_test.cc b/test/fuse/linux/stat_test.cc
index c2e5bd1cf2..9ab53f8d20 100644
--- a/test/fuse/linux/stat_test.cc
+++ b/test/fuse/linux/stat_test.cc
@@ -24,6 +24,7 @@
 
 #include "gtest/gtest.h"
 #include "test/fuse/linux/fuse_base.h"
+#include "test/util/fuse_util.h"
 #include "test/util/test_util.h"
 
 namespace gvisor {
@@ -43,16 +44,10 @@ class StatTest : public FuseTest {
 
 TEST_F(StatTest, StatNormal) {
   // Set up fixture.
-  std::vector<struct iovec> iov_in(2);
-  std::vector<struct iovec> iov_out(2);
   mode_t expected_mode = S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH;
   struct timespec atime = {.tv_sec = 1595436289, .tv_nsec = 134150844};
   struct timespec mtime = {.tv_sec = 1595436290, .tv_nsec = 134150845};
   struct timespec ctime = {.tv_sec = 1595436291, .tv_nsec = 134150846};
-  struct fuse_out_header out_header = {
-      .len = sizeof(struct fuse_out_header) + sizeof(struct fuse_attr_out),
-      .error = 0,
-  };
   struct fuse_attr attr = {
       .ino = 1,
       .size = 512,
@@ -70,14 +65,13 @@ TEST_F(StatTest, StatNormal) {
       .rdev = 12,
       .blksize = 4096,
   };
+  struct fuse_out_header out_header = {
+      .len = sizeof(struct fuse_out_header) + sizeof(struct fuse_attr_out),
+  };
   struct fuse_attr_out out_payload = {
       .attr = attr,
   };
-  iov_out[0].iov_len = sizeof(out_header);
-  iov_out[0].iov_base = &out_header;
-  iov_out[1].iov_len = sizeof(out_payload);
-  iov_out[1].iov_base = &out_payload;
-
+  auto iov_out = FuseGenerateIovecs(out_header, out_payload);
   SetServerResponse(FUSE_GETATTR, iov_out);
 
   // Do integration test.
@@ -104,10 +98,7 @@ TEST_F(StatTest, StatNormal) {
   // Check FUSE request.
   struct fuse_in_header in_header;
   struct fuse_getattr_in in_payload;
-  iov_in[0].iov_len = sizeof(in_header);
-  iov_in[0].iov_base = &in_header;
-  iov_in[1].iov_len = sizeof(in_payload);
-  iov_in[1].iov_base = &in_payload;
+  auto iov_in = FuseGenerateIovecs(in_header, in_payload);
 
   GetServerActualRequest(iov_in);
   EXPECT_EQ(in_header.opcode, FUSE_GETATTR);
@@ -117,14 +108,11 @@ TEST_F(StatTest, StatNormal) {
 
 TEST_F(StatTest, StatNotFound) {
   // Set up fixture.
-  std::vector<struct iovec> iov_in(2);
-  std::vector<struct iovec> iov_out(1);
   struct fuse_out_header out_header = {
       .len = sizeof(struct fuse_out_header),
       .error = -ENOENT,
   };
-  iov_out[0].iov_len = sizeof(out_header);
-  iov_out[0].iov_base = &out_header;
+  auto iov_out = FuseGenerateIovecs(out_header);
   SetServerResponse(FUSE_GETATTR, iov_out);
 
   // Do integration test.
@@ -135,10 +123,7 @@ TEST_F(StatTest, StatNotFound) {
   // Check FUSE request.
   struct fuse_in_header in_header;
   struct fuse_getattr_in in_payload;
-  iov_in[0].iov_len = sizeof(in_header);
-  iov_in[0].iov_base = &in_header;
-  iov_in[1].iov_len = sizeof(in_payload);
-  iov_in[1].iov_base = &in_payload;
+  auto iov_in = FuseGenerateIovecs(in_header, in_payload);
 
   GetServerActualRequest(iov_in);
   EXPECT_EQ(in_header.opcode, FUSE_GETATTR);
diff --git a/test/util/BUILD b/test/util/BUILD
index 2a17c33eeb..b0c2c2a5a9 100644
--- a/test/util/BUILD
+++ b/test/util/BUILD
@@ -45,6 +45,12 @@ cc_library(
     ],
 )
 
+cc_library(
+    name = "fuse_util",
+    testonly = 1,
+    hdrs = ["fuse_util.h"],
+)
+
 cc_library(
     name = "proc_util",
     testonly = 1,
diff --git a/test/util/fuse_util.h b/test/util/fuse_util.h
new file mode 100644
index 0000000000..5f5182b962
--- /dev/null
+++ b/test/util/fuse_util.h
@@ -0,0 +1,67 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef GVISOR_TEST_UTIL_FUSE_UTIL_H_
+#define GVISOR_TEST_UTIL_FUSE_UTIL_H_
+
+#include <sys/uio.h>
+
+#include <string>
+#include <vector>
+
+namespace gvisor {
+namespace testing {
+
+// The fundamental generation function with a single argument. If passed by
+// std::string or std::vector<char>, it will call specialized versions as
+// implemented below.
+template <typename T>
+std::vector<struct iovec> FuseGenerateIovecs(T &first) {
+  return {(struct iovec){.iov_base = &first, .iov_len = sizeof(first)}};
+}
+
+// If an argument is of type std::string, it must be used in read-only scenario.
+// Because we are setting up iovec, which contains the original address of a
+// data structure, we have to drop const qualification. Usually used with
+// variable-length payload data.
+template <typename T = std::string>
+std::vector<struct iovec> FuseGenerateIovecs(std::string &first) {
+  // Pad one byte for null-terminate c-string.
+  return {(struct iovec){.iov_base = const_cast<char *>(first.c_str()),
+                         .iov_len = first.size() + 1}};
+}
+
+// If an argument is of type std::vector<char>, it must be used in write-only
+// scenario and the size of the variable must be greater than or equal to the
+// size of the expected data. Usually used with variable-length payload data.
+template <typename T = std::vector<char>>
+std::vector<struct iovec> FuseGenerateIovecs(std::vector<char> &first) {
+  return {(struct iovec){.iov_base = first.data(), .iov_len = first.size()}};
+}
+
+// A helper function to set up an array of iovec struct for testing purpose.
+// Use variadic class template to generalize different numbers and different
+// types of FUSE structs.
+template <typename T, typename... Types>
+std::vector<struct iovec> FuseGenerateIovecs(T &first, Types &...args) {
+  auto first_iovec = FuseGenerateIovecs(first);
+  auto iovecs = FuseGenerateIovecs(args...);
+  first_iovec.insert(std::end(first_iovec), std::begin(iovecs),
+                     std::end(iovecs));
+  return first_iovec;
+}
+
+}  // namespace testing
+}  // namespace gvisor
+#endif  // GVISOR_TEST_UTIL_FUSE_UTIL_H_

From 1aa1bb9aad6f1984e32f11a5e8c8bfd98746dc85 Mon Sep 17 00:00:00 2001
From: Craig Chi <craigchi@google.com>
Date: Mon, 17 Aug 2020 10:05:10 -0700
Subject: [PATCH 171/211] Add function to create a fake inode in FUSE
 integration test

Adds a function for the testing thread to set up a fake inode with a
specific path under mount point. After this function is called, each
subsequent FUSE_LOOKUP request with the same path will be served with
the fixed stub response.

Fixes #3539
---
 test/fuse/linux/fuse_base.cc | 91 ++++++++++++++++++++++++++++++++++++
 test/fuse/linux/fuse_base.h  | 25 ++++++++++
 2 files changed, 116 insertions(+)

diff --git a/test/fuse/linux/fuse_base.cc b/test/fuse/linux/fuse_base.cc
index c354e1dcb1..a9fe1044ee 100644
--- a/test/fuse/linux/fuse_base.cc
+++ b/test/fuse/linux/fuse_base.cc
@@ -117,6 +117,23 @@ uint32_t FuseTest::GetServerTotalReceivedBytes() {
       static_cast<uint32_t>(FuseTestCmd::kGetTotalReceivedBytes));
 }
 
+// Sends the `kSetInodeLookup` command, expected mode, and the path of the
+// inode to create under the mount point.
+void FuseTest::SetServerInodeLookup(const std::string& path, mode_t mode) {
+  uint32_t cmd = static_cast<uint32_t>(FuseTestCmd::kSetInodeLookup);
+  EXPECT_THAT(RetryEINTR(write)(sock_[0], &cmd, sizeof(cmd)),
+              SyscallSucceedsWithValue(sizeof(cmd)));
+
+  EXPECT_THAT(RetryEINTR(write)(sock_[0], &mode, sizeof(mode)),
+              SyscallSucceedsWithValue(sizeof(mode)));
+
+  // Pad 1 byte for null-terminate c-string.
+  EXPECT_THAT(RetryEINTR(write)(sock_[0], path.c_str(), path.size() + 1),
+              SyscallSucceedsWithValue(path.size() + 1));
+
+  WaitServerComplete();
+}
+
 void FuseTest::MountFuse() {
   EXPECT_THAT(dev_fd_ = open("/dev/fuse", O_RDWR), SyscallSucceeds());
 
@@ -252,6 +269,9 @@ void FuseTest::ServerHandleCommand() {
     case FuseTestCmd::kSetResponse:
       ServerReceiveResponse();
       break;
+    case FuseTestCmd::kSetInodeLookup:
+      ServerReceiveInodeLookup();
+      break;
     case FuseTestCmd::kGetRequest:
       ServerSendReceivedRequest();
       break;
@@ -272,6 +292,64 @@ void FuseTest::ServerHandleCommand() {
   ServerCompleteWith(!HasFailure());
 }
 
+// Reads the expected file mode and the path of one file. Crafts a basic
+// `fuse_entry_out` memory block and inserts into a map for future use.
+// The FUSE server will always return this response if a FUSE_LOOKUP
+// request with this specific path comes in.
+void FuseTest::ServerReceiveInodeLookup() {
+  mode_t mode;
+  std::vector<char> buf(FUSE_MIN_READ_BUFFER);
+
+  EXPECT_THAT(RetryEINTR(read)(sock_[1], &mode, sizeof(mode)),
+              SyscallSucceedsWithValue(sizeof(mode)));
+
+  EXPECT_THAT(RetryEINTR(read)(sock_[1], buf.data(), buf.size()),
+              SyscallSucceeds());
+
+  std::string path(buf.data());
+
+  uint32_t out_len =
+      sizeof(struct fuse_out_header) + sizeof(struct fuse_entry_out);
+  struct fuse_out_header out_header = {
+      .len = out_len,
+      .error = 0,
+  };
+  struct fuse_entry_out out_payload = {
+      .nodeid = nodeid_,
+      .generation = 0,
+      .entry_valid = 0,
+      .attr_valid = 0,
+      .entry_valid_nsec = 0,
+      .attr_valid_nsec = 0,
+      .attr =
+          (struct fuse_attr){
+              .ino = nodeid_,
+              .size = 512,
+              .blocks = 4,
+              .atime = 0,
+              .mtime = 0,
+              .ctime = 0,
+              .atimensec = 0,
+              .mtimensec = 0,
+              .ctimensec = 0,
+              .mode = mode,
+              .nlink = 2,
+              .uid = 1234,
+              .gid = 4321,
+              .rdev = 12,
+              .blksize = 4096,
+          },
+  };
+  // Since this is only used in test, nodeid_ is simply increased by 1 to
+  // comply with the unqiueness of different path.
+  ++nodeid_;
+
+  memcpy(buf.data(), &out_header, sizeof(out_header));
+  memcpy(buf.data() + sizeof(out_header), &out_payload, sizeof(out_payload));
+  lookups_.AddMemBlock(FUSE_LOOKUP, buf.data(), out_len);
+  lookup_map_[path] = lookups_.Next();
+}
+
 // Sends the received request pointed by current cursor and advances cursor.
 void FuseTest::ServerSendReceivedRequest() {
   if (requests_.End()) {
@@ -297,6 +375,19 @@ void FuseTest::ServerProcessFuseRequest() {
   EXPECT_THAT(len = RetryEINTR(read)(dev_fd_, buf.data(), buf.size()),
               SyscallSucceeds());
   fuse_in_header* in_header = reinterpret_cast<fuse_in_header*>(buf.data());
+
+  // Check if this is a preset FUSE_LOOKUP path.
+  if (in_header->opcode == FUSE_LOOKUP) {
+    std::string path(buf.data() + sizeof(struct fuse_in_header));
+    auto it = lookup_map_.find(path);
+    if (it != lookup_map_.end()) {
+      // Matches a preset path. Reply with fake data and skip saving the
+      // request.
+      ServerRespondFuseSuccess(lookups_, it->second, in_header->unique);
+      return;
+    }
+  }
+
   requests_.AddMemBlock(in_header->opcode, buf.data(), len);
 
   // Check if there is a corresponding response.
diff --git a/test/fuse/linux/fuse_base.h b/test/fuse/linux/fuse_base.h
index 3f25229777..a21b4bb8da 100644
--- a/test/fuse/linux/fuse_base.h
+++ b/test/fuse/linux/fuse_base.h
@@ -17,9 +17,11 @@
 
 #include <linux/fuse.h>
 #include <string.h>
+#include <sys/stat.h>
 #include <sys/uio.h>
 
 #include <iostream>
+#include <unordered_map>
 #include <vector>
 
 #include "gtest/gtest.h"
@@ -35,6 +37,7 @@ constexpr char kMountOpts[] = "rootmode=755,user_id=0,group_id=0";
 // server. See test/fuse/README.md for further detail.
 enum class FuseTestCmd {
   kSetResponse = 0,
+  kSetInodeLookup,
   kGetRequest,
   kGetNumUnconsumedRequests,
   kGetNumUnsentResponses,
@@ -114,6 +117,9 @@ class FuseMemBuffer {
 // to manipulate with it. Refer to test/fuse/README.md for detailed explanation.
 class FuseTest : public ::testing::Test {
  public:
+  // nodeid_ is the ID of a fake inode. We starts from 2 since 1 is occupied by
+  // the mount point.
+  FuseTest() : nodeid_(2) {}
   void SetUp() override;
   void TearDown() override;
 
@@ -122,6 +128,16 @@ class FuseTest : public ::testing::Test {
   // expected FUSE reactions.
   void SetServerResponse(uint32_t opcode, std::vector<struct iovec>& iovecs);
 
+  // Called by the testing thread to install a fake path under the mount point.
+  // e.g. a file under /mnt/dir/file and moint point is /mnt, then it will look
+  // up "dir/file" in this case.
+  //
+  // It sets a fixed response to the FUSE_LOOKUP requests issued with this
+  // path, pretending there is an inode and avoid ENOENT when testing. If mode
+  // is not given, it creates a regular file with mode 0600.
+  void SetServerInodeLookup(const std::string& path,
+                            mode_t mode = S_IFREG | S_IRUSR | S_IWUSR);
+
   // Called by the testing thread to ask the FUSE server for its next received
   // FUSE request. Be sure to use the corresponding struct of iovec to receive
   // data from server.
@@ -182,6 +198,11 @@ class FuseTest : public ::testing::Test {
   // memory queue.
   void ServerReceiveResponse();
 
+  // The FUSE server side's corresponding code of `SetServerInodeLookup()`.
+  // Handles `kSetInodeLookup` command. Receives an expected file mode and
+  // file path under the mount point.
+  void ServerReceiveInodeLookup();
+
   // The FUSE server side's corresponding code of `GetServerActualRequest()`.
   // Handles `kGetRequest` command. Sends the next received request pointed by
   // the cursor.
@@ -203,8 +224,12 @@ class FuseTest : public ::testing::Test {
   int dev_fd_;
   int sock_[2];
 
+  uint64_t nodeid_;
+  std::unordered_map<std::string, FuseMemBlock> lookup_map_;
+
   FuseMemBuffer requests_;
   FuseMemBuffer responses_;
+  FuseMemBuffer lookups_;
 };
 
 }  // namespace testing

From 2c974036d664e0eb3e5b567f4d1082af19626889 Mon Sep 17 00:00:00 2001
From: Andrei Vagin <avagin@google.com>
Date: Thu, 13 Aug 2020 22:23:40 +0000
Subject: [PATCH 172/211] Implement FUSE_LOOKUP

Fixes #3231

Co-authored-by: Boyuan He <heboyuan@google.com>
---
 pkg/abi/linux/fuse.go                  | 56 ++++++++++++++++++
 pkg/sentry/fsimpl/fuse/fusefs.go       | 80 +++++++++++++++++++++++---
 pkg/sentry/fsimpl/kernfs/filesystem.go |  2 +-
 pkg/sentry/fsimpl/kernfs/kernfs.go     |  6 +-
 4 files changed, 133 insertions(+), 11 deletions(-)

diff --git a/pkg/abi/linux/fuse.go b/pkg/abi/linux/fuse.go
index 7e30483eee..346a9e6fc6 100644
--- a/pkg/abi/linux/fuse.go
+++ b/pkg/abi/linux/fuse.go
@@ -14,12 +14,17 @@
 
 package linux
 
+import "gvisor.dev/gvisor/tools/go_marshal/marshal"
+
 // +marshal
 type FUSEOpcode uint32
 
 // +marshal
 type FUSEOpID uint64
 
+// FUSE_ROOT_ID is the id of root inode.
+const FUSE_ROOT_ID = 1
+
 // Opcodes for FUSE operations. Analogous to the opcodes in include/linux/fuse.h.
 const (
 	FUSE_LOOKUP   FUSEOpcode = 1
@@ -301,3 +306,54 @@ type FUSEGetAttrOut struct {
 	// Attr contains the metadata returned from the FUSE server
 	Attr FUSEAttr
 }
+
+// FUSEEntryOut is the reply sent by the daemon to the kernel
+// for FUSE_MKNOD, FUSE_MKDIR, FUSE_SYMLINK, FUSE_LINK and
+// FUSE_LOOKUP.
+//
+// +marshal
+type FUSEEntryOut struct {
+	// NodeID is the ID for current inode.
+	NodeID uint64
+
+	// Generation is the generation number of inode.
+	// Used to identify an inode that have different ID at different time.
+	Generation uint64
+
+	// EntryValid indicates timeout for an entry.
+	EntryValid uint64
+
+	// AttrValid indicates timeout for an entry's attributes.
+	AttrValid uint64
+
+	// EntryValidNsec indicates timeout for an entry in nanosecond.
+	EntryValidNSec uint32
+
+	// AttrValidNsec indicates timeout for an entry's attributes in nanosecond.
+	AttrValidNSec uint32
+
+	// Attr contains the attributes of an entry.
+	Attr FUSEAttr
+}
+
+// FUSELookupIn is the request sent by the kernel to the daemon
+// to look up a file name.
+//
+// Dynamically-sized objects cannot be marshalled.
+type FUSELookupIn struct {
+	marshal.StubMarshallable
+
+	// Name is a file name to be looked up.
+	Name string
+}
+
+// MarshalUnsafe serializes r.name to the dst buffer.
+func (r *FUSELookupIn) MarshalUnsafe(buf []byte) {
+	copy(buf, []byte(r.Name))
+}
+
+// SizeBytes is the size of the memory representation of FUSELookupIn.
+// 1 extra byte for null-terminated string.
+func (r *FUSELookupIn) SizeBytes() int {
+	return len(r.Name) + 1
+}
diff --git a/pkg/sentry/fsimpl/fuse/fusefs.go b/pkg/sentry/fsimpl/fuse/fusefs.go
index 810819ae41..cee5acb3f2 100644
--- a/pkg/sentry/fsimpl/fuse/fusefs.go
+++ b/pkg/sentry/fsimpl/fuse/fusefs.go
@@ -26,6 +26,7 @@ import (
 	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
 	"gvisor.dev/gvisor/pkg/sentry/vfs"
 	"gvisor.dev/gvisor/pkg/syserror"
+	"gvisor.dev/gvisor/tools/go_marshal/marshal"
 )
 
 // Name is the default filesystem name.
@@ -165,7 +166,7 @@ func (fsType FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt
 	}
 
 	// root is the fusefs root directory.
-	root := fs.newInode(creds, fsopts.rootMode)
+	root := fs.newRootInode(creds, fsopts.rootMode)
 
 	return fs.VFSFilesystem(), root.VFSDentry(), nil
 }
@@ -205,14 +206,28 @@ type inode struct {
 	kernfs.InodeNotSymlink
 	kernfs.OrderedChildren
 
-	locks vfs.FileLocks
-
+	NodeID uint64
 	dentry kernfs.Dentry
+	locks  vfs.FileLocks
+
+	// the owning filesystem. fs is immutable.
+	fs *filesystem
 }
 
-func (fs *filesystem) newInode(creds *auth.Credentials, mode linux.FileMode) *kernfs.Dentry {
-	i := &inode{}
-	i.InodeAttrs.Init(creds, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), linux.ModeDirectory|0755)
+func (fs *filesystem) newRootInode(creds *auth.Credentials, mode linux.FileMode) *kernfs.Dentry {
+	i := &inode{fs: fs}
+	i.InodeAttrs.Init(creds, linux.UNNAMED_MAJOR, fs.devMinor, 1, linux.ModeDirectory|0755)
+	i.OrderedChildren.Init(kernfs.OrderedChildrenOptions{})
+	i.dentry.Init(i)
+	i.NodeID = 1
+
+	return &i.dentry
+}
+
+func (fs *filesystem) newInode(nodeID uint64, attr linux.FUSEAttr) *kernfs.Dentry {
+	i := &inode{fs: fs, NodeID: nodeID}
+	creds := auth.Credentials{EffectiveKGID: auth.KGID(attr.UID), EffectiveKUID: auth.KUID(attr.UID)}
+	i.InodeAttrs.Init(&creds, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), linux.FileMode(attr.Mode))
 	i.OrderedChildren.Init(kernfs.OrderedChildrenOptions{})
 	i.EnableLeakCheck()
 	i.dentry.Init(i)
@@ -231,6 +246,57 @@ func (i *inode) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentr
 	return fd.VFSFileDescription(), nil
 }
 
+// Lookup implements kernfs.Inode.Lookup.
+func (i *inode) Lookup(ctx context.Context, name string) (*vfs.Dentry, error) {
+	in := linux.FUSELookupIn{Name: name}
+	return i.newEntry(ctx, name, 0, linux.FUSE_LOOKUP, &in)
+}
+
+// IterDirents implements Inode.IterDirents.
+func (inode) IterDirents(ctx context.Context, callback vfs.IterDirentsCallback, offset, relOffset int64) (int64, error) {
+	return offset, nil
+}
+
+// Valid implements Inode.Valid.
+func (inode) Valid(ctx context.Context) bool {
+	return true
+}
+
+// newEntry calls FUSE server for entry creation and allocates corresponding entry according to response.
+// Shared by FUSE_MKNOD, FUSE_MKDIR, FUSE_SYMLINK, FUSE_LINK and FUSE_LOOKUP.
+func (i *inode) newEntry(ctx context.Context, name string, fileType linux.FileMode, opcode linux.FUSEOpcode, payload marshal.Marshallable) (*vfs.Dentry, error) {
+	kernelTask := kernel.TaskFromContext(ctx)
+	if kernelTask == nil {
+		log.Warningf("fusefs.Inode.newEntry: couldn't get kernel task from context", i.NodeID)
+		return nil, syserror.EINVAL
+	}
+	req, err := i.fs.conn.NewRequest(auth.CredentialsFromContext(ctx), uint32(kernelTask.ThreadID()), i.NodeID, opcode, payload)
+	if err != nil {
+		return nil, err
+	}
+	res, err := i.fs.conn.Call(kernelTask, req)
+	if err != nil {
+		return nil, err
+	}
+	if err := res.Error(); err != nil {
+		return nil, err
+	}
+	out := linux.FUSEEntryOut{}
+	if err := res.UnmarshalPayload(&out); err != nil {
+		return nil, err
+	}
+	if opcode != linux.FUSE_LOOKUP && ((out.Attr.Mode&linux.S_IFMT)^uint32(fileType) != 0 || out.NodeID == 0 || out.NodeID == linux.FUSE_ROOT_ID) {
+		return nil, syserror.EIO
+	}
+	child := i.fs.newInode(out.NodeID, out.Attr)
+	if opcode == linux.FUSE_LOOKUP {
+		i.dentry.InsertChildLocked(name, child)
+	} else {
+		i.dentry.InsertChild(name, child)
+	}
+	return child.VFSDentry(), nil
+}
+
 // statFromFUSEAttr makes attributes from linux.FUSEAttr to linux.Statx. The
 // opts.Sync attribute is ignored since the synchronization is handled by the
 // FUSE server.
@@ -299,7 +365,7 @@ func (i *inode) Stat(ctx context.Context, fs *vfs.Filesystem, opts vfs.StatOptio
 	// finally be translated into vfs.FilesystemImpl.StatAt() (see
 	// pkg/sentry/syscalls/linux/vfs2/stat.go), resulting in the same flow
 	// as stat(2). Thus GetAttrFlags and Fh variable will never be used in VFS2.
-	req, err := conn.NewRequest(creds, uint32(task.ThreadID()), i.Ino(), linux.FUSE_GETATTR, &in)
+	req, err := conn.NewRequest(creds, uint32(task.ThreadID()), i.NodeID, linux.FUSE_GETATTR, &in)
 	if err != nil {
 		return linux.Statx{}, err
 	}
diff --git a/pkg/sentry/fsimpl/kernfs/filesystem.go b/pkg/sentry/fsimpl/kernfs/filesystem.go
index d7d3e8f483..9042030706 100644
--- a/pkg/sentry/fsimpl/kernfs/filesystem.go
+++ b/pkg/sentry/fsimpl/kernfs/filesystem.go
@@ -140,7 +140,7 @@ func (fs *Filesystem) revalidateChildLocked(ctx context.Context, vfsObj *vfs.Vir
 		}
 		// Reference on childVFSD dropped by a corresponding Valid.
 		child = childVFSD.Impl().(*Dentry)
-		parent.insertChildLocked(name, child)
+		parent.InsertChildLocked(name, child)
 	}
 	return child, nil
 }
diff --git a/pkg/sentry/fsimpl/kernfs/kernfs.go b/pkg/sentry/fsimpl/kernfs/kernfs.go
index 88fcd54aa6..67a0347fe0 100644
--- a/pkg/sentry/fsimpl/kernfs/kernfs.go
+++ b/pkg/sentry/fsimpl/kernfs/kernfs.go
@@ -246,15 +246,15 @@ func (d *Dentry) OnZeroWatches(context.Context) {}
 // Precondition: d must represent a directory inode.
 func (d *Dentry) InsertChild(name string, child *Dentry) {
 	d.dirMu.Lock()
-	d.insertChildLocked(name, child)
+	d.InsertChildLocked(name, child)
 	d.dirMu.Unlock()
 }
 
-// insertChildLocked is equivalent to InsertChild, with additional
+// InsertChildLocked is equivalent to InsertChild, with additional
 // preconditions.
 //
 // Precondition: d.dirMu must be locked.
-func (d *Dentry) insertChildLocked(name string, child *Dentry) {
+func (d *Dentry) InsertChildLocked(name string, child *Dentry) {
 	if !d.isDir() {
 		panic(fmt.Sprintf("InsertChild called on non-directory Dentry: %+v.", d))
 	}

From fc1196ee65a53004e3e8b97ec5d57404552b90b8 Mon Sep 17 00:00:00 2001
From: Boyuan He <heboyuan@google.com>
Date: Tue, 18 Aug 2020 01:46:39 +0000
Subject: [PATCH 173/211] Implement FUSE_OPEN/OPENDIR

Fixes #3174
---
 pkg/abi/linux/fuse.go                |  38 ++++++++
 pkg/sentry/fsimpl/fuse/BUILD         |   1 +
 pkg/sentry/fsimpl/fuse/connection.go |  16 +++-
 pkg/sentry/fsimpl/fuse/file.go       |  96 +++++++++++++++++++++
 pkg/sentry/fsimpl/fuse/fusefs.go     |  97 +++++++++++++++++++--
 test/fuse/BUILD                      |   5 ++
 test/fuse/linux/BUILD                |  13 +++
 test/fuse/linux/fuse_base.cc         |  50 ++++++-----
 test/fuse/linux/fuse_base.h          |   9 ++
 test/fuse/linux/open_test.cc         | 124 +++++++++++++++++++++++++++
 test/util/BUILD                      |   1 +
 test/util/fuse_util.cc               |  59 +++++++++++++
 test/util/fuse_util.h                |   4 +
 13 files changed, 481 insertions(+), 32 deletions(-)
 create mode 100644 pkg/sentry/fsimpl/fuse/file.go
 create mode 100644 test/fuse/linux/open_test.cc
 create mode 100644 test/util/fuse_util.cc

diff --git a/pkg/abi/linux/fuse.go b/pkg/abi/linux/fuse.go
index 346a9e6fc6..e09715ecd2 100644
--- a/pkg/abi/linux/fuse.go
+++ b/pkg/abi/linux/fuse.go
@@ -357,3 +357,41 @@ func (r *FUSELookupIn) MarshalUnsafe(buf []byte) {
 func (r *FUSELookupIn) SizeBytes() int {
 	return len(r.Name) + 1
 }
+
+// MAX_NON_LFS indicates the maximum offset without large file support.
+const MAX_NON_LFS = ((1 << 31) - 1)
+
+// flags returned by OPEN request.
+const (
+	// FOPEN_DIRECT_IO indicates bypassing page cache for this opened file.
+	FOPEN_DIRECT_IO = 1 << 0
+	// FOPEN_KEEP_CACHE avoids invalidate of data cache on open.
+	FOPEN_KEEP_CACHE = 1 << 1
+	// FOPEN_NONSEEKABLE indicates the file cannot be seeked.
+	FOPEN_NONSEEKABLE = 1 << 2
+)
+
+// FUSEOpenIn is the request sent by the kernel to the daemon,
+// to negotiate flags and get file handle.
+//
+// +marshal
+type FUSEOpenIn struct {
+	// Flags of this open request.
+	Flags uint32
+
+	_ uint32
+}
+
+// FUSEOpenOut is the reply sent by the daemon to the kernel
+// for FUSEOpenIn.
+//
+// +marshal
+type FUSEOpenOut struct {
+	// Fh is the file handler for opened file.
+	Fh uint64
+
+	// OpenFlag for the opened file.
+	OpenFlag uint32
+
+	_ uint32
+}
diff --git a/pkg/sentry/fsimpl/fuse/BUILD b/pkg/sentry/fsimpl/fuse/BUILD
index 53a4f3012a..d1671e576d 100644
--- a/pkg/sentry/fsimpl/fuse/BUILD
+++ b/pkg/sentry/fsimpl/fuse/BUILD
@@ -31,6 +31,7 @@ go_library(
     srcs = [
         "connection.go",
         "dev.go",
+        "file.go",
         "fusefs.go",
         "init.go",
         "inode_refs.go",
diff --git a/pkg/sentry/fsimpl/fuse/connection.go b/pkg/sentry/fsimpl/fuse/connection.go
index 6df2728aba..7d3c30116a 100644
--- a/pkg/sentry/fsimpl/fuse/connection.go
+++ b/pkg/sentry/fsimpl/fuse/connection.go
@@ -78,8 +78,13 @@ type Response struct {
 type connection struct {
 	fd *DeviceFD
 
+	// mu protect access to struct memebers.
+	mu sync.Mutex
+
+	// attributeVersion is the version of connection's attributes.
+	attributeVersion uint64
+
 	// The following FUSE_INIT flags are currently unsupported by this implementation:
-	// - FUSE_ATOMIC_O_TRUNC: requires open(..., O_TRUNC)
 	// - FUSE_EXPORT_SUPPORT
 	// - FUSE_HANDLE_KILLPRIV
 	// - FUSE_POSIX_LOCKS: requires POSIX locks
@@ -113,6 +118,11 @@ type connection struct {
 	// TODO(gvisor.dev/issue/3185): abort all queued requests.
 	aborted bool
 
+	// atomicOTrunc is true when FUSE does not send a separate SETATTR request
+	// before open with O_TRUNC flag.
+	// Negotiated and only set in INIT.
+	atomicOTrunc bool
+
 	// connInitError if FUSE_INIT encountered error (major version mismatch).
 	// Only set in INIT.
 	connInitError bool
@@ -189,6 +199,10 @@ type connection struct {
 	// dontMask if filestestem does not apply umask to creation modes.
 	// Negotiated in INIT.
 	dontMask bool
+
+	// noOpen if FUSE server doesn't support open operation.
+	// This flag only influence performance, not correctness of the program.
+	noOpen bool
 }
 
 // newFUSEConnection creates a FUSE connection to fd.
diff --git a/pkg/sentry/fsimpl/fuse/file.go b/pkg/sentry/fsimpl/fuse/file.go
new file mode 100644
index 0000000000..ab60ab7144
--- /dev/null
+++ b/pkg/sentry/fsimpl/fuse/file.go
@@ -0,0 +1,96 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package fuse
+
+import (
+	"gvisor.dev/gvisor/pkg/abi/linux"
+	"gvisor.dev/gvisor/pkg/context"
+	"gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
+	"gvisor.dev/gvisor/pkg/sentry/vfs"
+	"gvisor.dev/gvisor/pkg/usermem"
+)
+
+// fileDescription implements vfs.FileDescriptionImpl for fuse.
+type fileDescription struct {
+	vfsfd vfs.FileDescription
+	vfs.FileDescriptionDefaultImpl
+	vfs.DentryMetadataFileDescriptionImpl
+	vfs.NoLockFD
+
+	// the file handle used in userspace.
+	Fh uint64
+
+	// Nonseekable is indicate cannot perform seek on a file.
+	Nonseekable bool
+
+	// DirectIO suggest fuse to use direct io operation.
+	DirectIO bool
+
+	// OpenFlag is the flag returned by open.
+	OpenFlag uint32
+}
+
+func (fd *fileDescription) dentry() *kernfs.Dentry {
+	return fd.vfsfd.Dentry().Impl().(*kernfs.Dentry)
+}
+
+func (fd *fileDescription) inode() *inode {
+	return fd.dentry().Inode().(*inode)
+}
+
+func (fd *fileDescription) filesystem() *vfs.Filesystem {
+	return fd.vfsfd.VirtualDentry().Mount().Filesystem()
+}
+
+// Release implements vfs.FileDescriptionImpl.Release.
+func (fd *fileDescription) Release(ctx context.Context) {}
+
+// PRead implements vfs.FileDescriptionImpl.PRead.
+func (fd *fileDescription) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) {
+	return 0, nil
+}
+
+// Read implements vfs.FileDescriptionImpl.Read.
+func (fd *fileDescription) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) {
+	return 0, nil
+}
+
+// PWrite implements vfs.FileDescriptionImpl.PWrite.
+func (fd *fileDescription) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) {
+	return 0, nil
+}
+
+// Write implements vfs.FileDescriptionImpl.Write.
+func (fd *fileDescription) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) {
+	return 0, nil
+}
+
+// Seek implements vfs.FileDescriptionImpl.Seek.
+func (fd *fileDescription) Seek(ctx context.Context, offset int64, whence int32) (int64, error) {
+	return 0, nil
+}
+
+// Stat implements FileDescriptionImpl.Stat.
+// Stat implements vfs.FileDescriptionImpl.Stat.
+func (fd *fileDescription) Stat(ctx context.Context, opts vfs.StatOptions) (linux.Statx, error) {
+	fs := fd.filesystem()
+	inode := fd.inode()
+	return inode.Stat(ctx, fs, opts)
+}
+
+// SetStat implements FileDescriptionImpl.SetStat.
+func (fd *fileDescription) SetStat(ctx context.Context, opts vfs.SetStatOptions) error {
+	return nil
+}
diff --git a/pkg/sentry/fsimpl/fuse/fusefs.go b/pkg/sentry/fsimpl/fuse/fusefs.go
index cee5acb3f2..b5f05b80b3 100644
--- a/pkg/sentry/fsimpl/fuse/fusefs.go
+++ b/pkg/sentry/fsimpl/fuse/fusefs.go
@@ -17,6 +17,7 @@ package fuse
 
 import (
 	"strconv"
+	"sync/atomic"
 
 	"gvisor.dev/gvisor/pkg/abi/linux"
 	"gvisor.dev/gvisor/pkg/context"
@@ -212,6 +213,18 @@ type inode struct {
 
 	// the owning filesystem. fs is immutable.
 	fs *filesystem
+
+	// size of the file.
+	size uint64
+
+	// attributeVersion is the version of inode's attributes.
+	attributeVersion uint64
+
+	// attributeTime is the remaining vaild time of attributes.
+	attributeTime uint64
+
+	// version of the inode.
+	version uint64
 }
 
 func (fs *filesystem) newRootInode(creds *auth.Credentials, mode linux.FileMode) *kernfs.Dentry {
@@ -237,13 +250,87 @@ func (fs *filesystem) newInode(nodeID uint64, attr linux.FUSEAttr) *kernfs.Dentr
 
 // Open implements kernfs.Inode.Open.
 func (i *inode) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
-	fd, err := kernfs.NewGenericDirectoryFD(rp.Mount(), vfsd, &i.OrderedChildren, &i.locks, &opts, kernfs.GenericDirectoryFDOptions{
-		SeekEnd: kernfs.SeekEndStaticEntries,
-	})
-	if err != nil {
+	isDir := i.InodeAttrs.Mode().IsDir()
+	// return error if specified to open directory but inode is not a directory.
+	if !isDir && opts.Mode.IsDir() {
+		return nil, syserror.ENOTDIR
+	}
+	if opts.Flags&linux.O_LARGEFILE == 0 && atomic.LoadUint64(&i.size) > linux.MAX_NON_LFS {
+		return nil, syserror.EOVERFLOW
+	}
+
+	// FOPEN_KEEP_CACHE is the defualt flag for noOpen.
+	fd := fileDescription{OpenFlag: linux.FOPEN_KEEP_CACHE}
+	// Only send open request when FUSE server support open or is opening a directory.
+	if !i.fs.conn.noOpen || isDir {
+		kernelTask := kernel.TaskFromContext(ctx)
+		if kernelTask == nil {
+			log.Warningf("fusefs.Inode.Open: couldn't get kernel task from context")
+			return nil, syserror.EINVAL
+		}
+
+		var opcode linux.FUSEOpcode
+		if isDir {
+			opcode = linux.FUSE_OPENDIR
+		} else {
+			opcode = linux.FUSE_OPEN
+		}
+		in := linux.FUSEOpenIn{Flags: opts.Flags & ^uint32(linux.O_CREAT|linux.O_EXCL|linux.O_NOCTTY)}
+		if !i.fs.conn.atomicOTrunc {
+			in.Flags &= ^uint32(linux.O_TRUNC)
+		}
+		req, err := i.fs.conn.NewRequest(auth.CredentialsFromContext(ctx), uint32(kernelTask.ThreadID()), i.NodeID, opcode, &in)
+		if err != nil {
+			return nil, err
+		}
+
+		res, err := i.fs.conn.Call(kernelTask, req)
+		if err != nil {
+			return nil, err
+		}
+		if err := res.Error(); err == syserror.ENOSYS && !isDir {
+			i.fs.conn.noOpen = true
+		} else if err != nil {
+			return nil, err
+		} else {
+			out := linux.FUSEOpenOut{}
+			if err := res.UnmarshalPayload(&out); err != nil {
+				return nil, err
+			}
+			fd.OpenFlag = out.OpenFlag
+			fd.Fh = out.Fh
+		}
+	}
+
+	if isDir {
+		fd.OpenFlag &= ^uint32(linux.FOPEN_DIRECT_IO)
+	}
+
+	// TODO(gvisor.dev/issue/3234): invalidate mmap after implemented it for FUSE Inode
+	fd.DirectIO = fd.OpenFlag&linux.FOPEN_DIRECT_IO != 0
+	fdOptions := &vfs.FileDescriptionOptions{}
+	if fd.OpenFlag&linux.FOPEN_NONSEEKABLE != 0 {
+		fdOptions.DenyPRead = true
+		fdOptions.DenyPWrite = true
+		fd.Nonseekable = true
+	}
+
+	// If we don't send SETATTR before open (which is indicated by atomicOTrunc)
+	// and O_TRUNC is set, update the inode's version number and clean existing data
+	// by setting the file size to 0.
+	if i.fs.conn.atomicOTrunc && opts.Flags&linux.O_TRUNC != 0 {
+		i.fs.conn.mu.Lock()
+		i.fs.conn.attributeVersion++
+		i.attributeVersion = i.fs.conn.attributeVersion
+		atomic.StoreUint64(&i.size, 0)
+		i.fs.conn.mu.Unlock()
+		i.attributeTime = 0
+	}
+
+	if err := fd.vfsfd.Init(&fd, opts.Flags, rp.Mount(), vfsd, fdOptions); err != nil {
 		return nil, err
 	}
-	return fd.VFSFileDescription(), nil
+	return &fd.vfsfd, nil
 }
 
 // Lookup implements kernfs.Inode.Lookup.
diff --git a/test/fuse/BUILD b/test/fuse/BUILD
index 385920e171..209030ff10 100644
--- a/test/fuse/BUILD
+++ b/test/fuse/BUILD
@@ -6,3 +6,8 @@ syscall_test(
     fuse = "True",
     test = "//test/fuse/linux:stat_test",
 )
+
+syscall_test(
+    fuse = "True",
+    test = "//test/fuse/linux:open_test",
+)
diff --git a/test/fuse/linux/BUILD b/test/fuse/linux/BUILD
index e4a614e112..1d989a2f4f 100644
--- a/test/fuse/linux/BUILD
+++ b/test/fuse/linux/BUILD
@@ -18,6 +18,19 @@ cc_binary(
     ],
 )
 
+cc_binary(
+    name = "open_test",
+    testonly = 1,
+    srcs = ["open_test.cc"],
+    deps = [
+        gtest,
+        ":fuse_base",
+        "//test/util:fuse_util",
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
 cc_library(
     name = "fuse_base",
     testonly = 1,
diff --git a/test/fuse/linux/fuse_base.cc b/test/fuse/linux/fuse_base.cc
index a9fe1044ee..e734100b19 100644
--- a/test/fuse/linux/fuse_base.cc
+++ b/test/fuse/linux/fuse_base.cc
@@ -117,6 +117,16 @@ uint32_t FuseTest::GetServerTotalReceivedBytes() {
       static_cast<uint32_t>(FuseTestCmd::kGetTotalReceivedBytes));
 }
 
+// Sends the `kSkipRequest` command to the FUSE server, which would skip
+// current stored request data.
+void FuseTest::SkipServerActualRequest() {
+  uint32_t cmd = static_cast<uint32_t>(FuseTestCmd::kSkipRequest);
+  EXPECT_THAT(RetryEINTR(write)(sock_[0], &cmd, sizeof(cmd)),
+              SyscallSucceedsWithValue(sizeof(cmd)));
+
+  WaitServerComplete();
+}
+
 // Sends the `kSetInodeLookup` command, expected mode, and the path of the
 // inode to create under the mount point.
 void FuseTest::SetServerInodeLookup(const std::string& path, mode_t mode) {
@@ -284,6 +294,9 @@ void FuseTest::ServerHandleCommand() {
     case FuseTestCmd::kGetNumUnsentResponses:
       ServerSendData(static_cast<uint32_t>(responses_.RemainingBlocks()));
       break;
+    case FuseTestCmd::kSkipRequest:
+      ServerSkipReceivedRequest();
+      break;
     default:
       FAIL() << "Unknown FuseTestCmd " << cmd;
       break;
@@ -314,32 +327,7 @@ void FuseTest::ServerReceiveInodeLookup() {
       .len = out_len,
       .error = 0,
   };
-  struct fuse_entry_out out_payload = {
-      .nodeid = nodeid_,
-      .generation = 0,
-      .entry_valid = 0,
-      .attr_valid = 0,
-      .entry_valid_nsec = 0,
-      .attr_valid_nsec = 0,
-      .attr =
-          (struct fuse_attr){
-              .ino = nodeid_,
-              .size = 512,
-              .blocks = 4,
-              .atime = 0,
-              .mtime = 0,
-              .ctime = 0,
-              .atimensec = 0,
-              .mtimensec = 0,
-              .ctimensec = 0,
-              .mode = mode,
-              .nlink = 2,
-              .uid = 1234,
-              .gid = 4321,
-              .rdev = 12,
-              .blksize = 4096,
-          },
-  };
+  struct fuse_entry_out out_payload = DefaultEntryOut(mode, nodeid_);
   // Since this is only used in test, nodeid_ is simply increased by 1 to
   // comply with the unqiueness of different path.
   ++nodeid_;
@@ -363,6 +351,15 @@ void FuseTest::ServerSendReceivedRequest() {
       SyscallSucceedsWithValue(mem_block.len));
 }
 
+// Skip the request pointed by current cursor.
+void FuseTest::ServerSkipReceivedRequest() {
+  if (requests_.End()) {
+    FAIL() << "No more received request.";
+    return;
+  }
+  requests_.Next();
+}
+
 // Handles FUSE request. Reads request from /dev/fuse, checks if it has the
 // same opcode as expected, and responds with the saved fake FUSE response.
 // The FUSE request is copied to the serial buffer and can be retrieved one-
@@ -390,6 +387,7 @@ void FuseTest::ServerProcessFuseRequest() {
 
   requests_.AddMemBlock(in_header->opcode, buf.data(), len);
 
+  if (in_header->opcode == FUSE_RELEASE) return;
   // Check if there is a corresponding response.
   if (responses_.End()) {
     GTEST_NONFATAL_FAILURE_("No more FUSE response is expected");
diff --git a/test/fuse/linux/fuse_base.h b/test/fuse/linux/fuse_base.h
index a21b4bb8da..2474b763f8 100644
--- a/test/fuse/linux/fuse_base.h
+++ b/test/fuse/linux/fuse_base.h
@@ -42,6 +42,7 @@ enum class FuseTestCmd {
   kGetNumUnconsumedRequests,
   kGetNumUnsentResponses,
   kGetTotalReceivedBytes,
+  kSkipRequest,
 };
 
 // Holds the information of a memory block in a serial buffer.
@@ -158,6 +159,10 @@ class FuseTest : public ::testing::Test {
   // bytes from /dev/fuse.
   uint32_t GetServerTotalReceivedBytes();
 
+  // Called by the testing thread to ask the FUSE server to skip stored
+  // request data.
+  void SkipServerActualRequest();
+
  protected:
   TempPath mount_point_;
 
@@ -211,6 +216,10 @@ class FuseTest : public ::testing::Test {
   // Sends a uint32_t data via socket.
   inline void ServerSendData(uint32_t data);
 
+  // The FUSE server side's corresponding code of `SkipServerActualRequest()`.
+  // Handles `kSkipRequest` command. Skip the request pointed by current cursor.
+  void ServerSkipReceivedRequest();
+
   // Handles FUSE request sent to /dev/fuse by its saved responses.
   void ServerProcessFuseRequest();
 
diff --git a/test/fuse/linux/open_test.cc b/test/fuse/linux/open_test.cc
new file mode 100644
index 0000000000..ed0641587e
--- /dev/null
+++ b/test/fuse/linux/open_test.cc
@@ -0,0 +1,124 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/fuse.h>
+#include <sys/stat.h>
+#include <sys/statfs.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <string>
+
+#include "gtest/gtest.h"
+#include "test/fuse/linux/fuse_base.h"
+#include "test/util/fuse_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+class OpenTest : public FuseTest {
+ protected:
+  const std::string test_file_ = "test_file";
+  const mode_t regular_file_ = S_IFREG | S_IRWXU | S_IRWXG | S_IRWXO;
+
+  struct fuse_open_out out_payload_ = {
+      .fh = 1,
+      .open_flags = O_RDWR,
+  };
+};
+
+TEST_F(OpenTest, RegularFile) {
+  const std::string test_file_path =
+      JoinPath(mount_point_.path().c_str(), test_file_);
+  SetServerInodeLookup(test_file_, regular_file_);
+
+  struct fuse_out_header out_header = {
+      .len = sizeof(struct fuse_out_header) + sizeof(struct fuse_open_out),
+  };
+  auto iov_out = FuseGenerateIovecs(out_header, out_payload_);
+  SetServerResponse(FUSE_OPEN, iov_out);
+  FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_path.c_str(), O_RDWR));
+
+  struct fuse_in_header in_header;
+  struct fuse_open_in in_payload;
+  auto iov_in = FuseGenerateIovecs(in_header, in_payload);
+  GetServerActualRequest(iov_in);
+
+  EXPECT_EQ(in_header.len, sizeof(in_header) + sizeof(in_payload));
+  EXPECT_EQ(in_header.opcode, FUSE_OPEN);
+  EXPECT_EQ(in_payload.flags, O_RDWR);
+  EXPECT_THAT(fcntl(fd.get(), F_GETFL), SyscallSucceedsWithValue(O_RDWR));
+}
+
+TEST_F(OpenTest, SetNoOpen) {
+  const std::string test_file_path =
+      JoinPath(mount_point_.path().c_str(), test_file_);
+  SetServerInodeLookup(test_file_, regular_file_);
+
+  // ENOSYS indicates open is not implemented.
+  struct fuse_out_header out_header = {
+      .len = sizeof(struct fuse_out_header) + sizeof(struct fuse_open_out),
+      .error = -ENOSYS,
+  };
+  auto iov_out = FuseGenerateIovecs(out_header, out_payload_);
+  SetServerResponse(FUSE_OPEN, iov_out);
+  ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_path.c_str(), O_RDWR));
+  SkipServerActualRequest();
+
+  // check open doesn't send new request.
+  uint32_t recieved_before = GetServerTotalReceivedBytes();
+  ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_path.c_str(), O_RDWR));
+  EXPECT_EQ(GetServerTotalReceivedBytes(), recieved_before);
+}
+
+TEST_F(OpenTest, OpenFail) {
+  struct fuse_out_header out_header = {
+      .len = sizeof(struct fuse_out_header) + sizeof(struct fuse_open_out),
+      .error = -ENOENT,
+  };
+
+  auto iov_out = FuseGenerateIovecs(out_header, out_payload_);
+  SetServerResponse(FUSE_OPENDIR, iov_out);
+  ASSERT_THAT(open(mount_point_.path().c_str(), O_RDWR),
+              SyscallFailsWithErrno(ENOENT));
+
+  struct fuse_in_header in_header;
+  struct fuse_open_in in_payload;
+  auto iov_in = FuseGenerateIovecs(in_header, in_payload);
+  GetServerActualRequest(iov_in);
+
+  EXPECT_EQ(in_header.len, sizeof(in_header) + sizeof(in_payload));
+  EXPECT_EQ(in_header.opcode, FUSE_OPENDIR);
+  EXPECT_EQ(in_payload.flags, O_RDWR);
+}
+
+TEST_F(OpenTest, DirectoryFlagOnRegularFile) {
+  const std::string test_file_path =
+      JoinPath(mount_point_.path().c_str(), test_file_);
+
+  SetServerInodeLookup(test_file_, regular_file_);
+  ASSERT_THAT(open(test_file_path.c_str(), O_RDWR | O_DIRECTORY),
+              SyscallFailsWithErrno(ENOTDIR));
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/util/BUILD b/test/util/BUILD
index b0c2c2a5a9..fc5fb3a8de 100644
--- a/test/util/BUILD
+++ b/test/util/BUILD
@@ -48,6 +48,7 @@ cc_library(
 cc_library(
     name = "fuse_util",
     testonly = 1,
+    srcs = ["fuse_util.cc"],
     hdrs = ["fuse_util.h"],
 )
 
diff --git a/test/util/fuse_util.cc b/test/util/fuse_util.cc
new file mode 100644
index 0000000000..5b10a9e451
--- /dev/null
+++ b/test/util/fuse_util.cc
@@ -0,0 +1,59 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "test/util/fuse_util.h"
+
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include <string>
+
+namespace gvisor {
+namespace testing {
+
+// Create response body with specified mode and nodeID.
+fuse_entry_out DefaultEntryOut(mode_t mode, uint64_t node_id) {
+  const int time_sec = 1595436289;
+  const int time_nsec = 134150844;
+  struct fuse_entry_out default_entry_out = {
+      .nodeid = node_id,
+      .generation = 0,
+      .entry_valid = time_sec,
+      .attr_valid = time_sec,
+      .entry_valid_nsec = time_nsec,
+      .attr_valid_nsec = time_nsec,
+      .attr =
+          (struct fuse_attr){
+              .ino = node_id,
+              .size = 512,
+              .blocks = 4,
+              .atime = time_sec,
+              .mtime = time_sec,
+              .ctime = time_sec,
+              .atimensec = time_nsec,
+              .mtimensec = time_nsec,
+              .ctimensec = time_nsec,
+              .mode = mode,
+              .nlink = 2,
+              .uid = 1234,
+              .gid = 4321,
+              .rdev = 12,
+              .blksize = 4096,
+          },
+  };
+  return default_entry_out;
+};
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/util/fuse_util.h b/test/util/fuse_util.h
index 5f5182b962..1f1bf64a40 100644
--- a/test/util/fuse_util.h
+++ b/test/util/fuse_util.h
@@ -15,6 +15,7 @@
 #ifndef GVISOR_TEST_UTIL_FUSE_UTIL_H_
 #define GVISOR_TEST_UTIL_FUSE_UTIL_H_
 
+#include <linux/fuse.h>
 #include <sys/uio.h>
 
 #include <string>
@@ -62,6 +63,9 @@ std::vector<struct iovec> FuseGenerateIovecs(T &first, Types &...args) {
   return first_iovec;
 }
 
+// Return a fuse_entry_out FUSE server response body.
+fuse_entry_out DefaultEntryOut(mode_t mode, uint64_t nodeId);
+
 }  // namespace testing
 }  // namespace gvisor
 #endif  // GVISOR_TEST_UTIL_FUSE_UTIL_H_

From ccd1a64049df263355d1401da46c78ec2236455c Mon Sep 17 00:00:00 2001
From: Boyuan He <heboyuan@google.com>
Date: Tue, 18 Aug 2020 20:59:28 +0000
Subject: [PATCH 174/211] Implement FUSE_RELEASE/RELEASEDIR

Fixes #3314
---
 pkg/abi/linux/fuse.go           | 18 ++++++++
 pkg/sentry/fsimpl/fuse/dev.go   |  6 +++
 pkg/sentry/fsimpl/fuse/file.go  | 31 +++++++++++++-
 test/fuse/BUILD                 |  5 +++
 test/fuse/linux/BUILD           | 13 ++++++
 test/fuse/linux/fuse_base.cc    |  3 +-
 test/fuse/linux/fuse_base.h     |  6 +--
 test/fuse/linux/open_test.cc    |  4 ++
 test/fuse/linux/release_test.cc | 74 +++++++++++++++++++++++++++++++++
 9 files changed, 154 insertions(+), 6 deletions(-)
 create mode 100644 test/fuse/linux/release_test.cc

diff --git a/pkg/abi/linux/fuse.go b/pkg/abi/linux/fuse.go
index e09715ecd2..a1b2a2abf8 100644
--- a/pkg/abi/linux/fuse.go
+++ b/pkg/abi/linux/fuse.go
@@ -395,3 +395,21 @@ type FUSEOpenOut struct {
 
 	_ uint32
 }
+
+// FUSEReleaseIn is the request sent by the kernel to the daemon
+// when there is no more reference to a file.
+//
+// +marshal
+type FUSEReleaseIn struct {
+	// Fh is the file handler for the file to be released.
+	Fh uint64
+
+	// Flags of the file.
+	Flags uint32
+
+	// ReleaseFlags of this release request.
+	ReleaseFlags uint32
+
+	// LockOwner is the id of the lock owner if there is one.
+	LockOwner uint64
+}
diff --git a/pkg/sentry/fsimpl/fuse/dev.go b/pkg/sentry/fsimpl/fuse/dev.go
index 0efd2d90d9..e2de8e0973 100644
--- a/pkg/sentry/fsimpl/fuse/dev.go
+++ b/pkg/sentry/fsimpl/fuse/dev.go
@@ -168,6 +168,9 @@ func (fd *DeviceFD) readLocked(ctx context.Context, dst usermem.IOSequence, opts
 
 			// We're done with this request.
 			fd.queue.Remove(req)
+			if req.hdr.Opcode == linux.FUSE_RELEASE {
+				fd.numActiveRequests -= 1
+			}
 
 			// Restart the read as this request was invalid.
 			log.Warningf("fuse.DeviceFD.Read: request found was too large. Restarting read.")
@@ -184,6 +187,9 @@ func (fd *DeviceFD) readLocked(ctx context.Context, dst usermem.IOSequence, opts
 		if readCursor >= req.hdr.Len {
 			// Fully done with this req, remove it from the queue.
 			fd.queue.Remove(req)
+			if req.hdr.Opcode == linux.FUSE_RELEASE {
+				fd.numActiveRequests -= 1
+			}
 			break
 		}
 	}
diff --git a/pkg/sentry/fsimpl/fuse/file.go b/pkg/sentry/fsimpl/fuse/file.go
index ab60ab7144..01d20caf62 100644
--- a/pkg/sentry/fsimpl/fuse/file.go
+++ b/pkg/sentry/fsimpl/fuse/file.go
@@ -18,6 +18,8 @@ import (
 	"gvisor.dev/gvisor/pkg/abi/linux"
 	"gvisor.dev/gvisor/pkg/context"
 	"gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
+	"gvisor.dev/gvisor/pkg/sentry/kernel"
+	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
 	"gvisor.dev/gvisor/pkg/sentry/vfs"
 	"gvisor.dev/gvisor/pkg/usermem"
 )
@@ -54,8 +56,34 @@ func (fd *fileDescription) filesystem() *vfs.Filesystem {
 	return fd.vfsfd.VirtualDentry().Mount().Filesystem()
 }
 
+func (fd *fileDescription) statusFlags() uint32 {
+	return fd.vfsfd.StatusFlags()
+}
+
 // Release implements vfs.FileDescriptionImpl.Release.
-func (fd *fileDescription) Release(ctx context.Context) {}
+func (fd *fileDescription) Release(ctx context.Context) {
+	// no need to release if FUSE server doesn't implement Open.
+	conn := fd.inode().fs.conn
+	if conn.noOpen {
+		return
+	}
+
+	in := linux.FUSEReleaseIn{
+		Fh:    fd.Fh,
+		Flags: fd.statusFlags(),
+	}
+	// TODO(gvisor.dev/issue/3245): add logic when we support file lock owner.
+	var opcode linux.FUSEOpcode
+	if fd.inode().Mode().IsDir() {
+		opcode = linux.FUSE_RELEASEDIR
+	} else {
+		opcode = linux.FUSE_RELEASE
+	}
+	kernelTask := kernel.TaskFromContext(ctx)
+	// ignoring errors and FUSE server reply is analogous to Linux's behavior.
+	req, _ := conn.NewRequest(auth.CredentialsFromContext(ctx), uint32(kernelTask.ThreadID()), fd.inode().NodeID, opcode, &in)
+	conn.CallAsync(kernelTask, req)
+}
 
 // PRead implements vfs.FileDescriptionImpl.PRead.
 func (fd *fileDescription) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) {
@@ -82,7 +110,6 @@ func (fd *fileDescription) Seek(ctx context.Context, offset int64, whence int32)
 	return 0, nil
 }
 
-// Stat implements FileDescriptionImpl.Stat.
 // Stat implements vfs.FileDescriptionImpl.Stat.
 func (fd *fileDescription) Stat(ctx context.Context, opts vfs.StatOptions) (linux.Statx, error) {
 	fs := fd.filesystem()
diff --git a/test/fuse/BUILD b/test/fuse/BUILD
index 209030ff10..53cbadb3c2 100644
--- a/test/fuse/BUILD
+++ b/test/fuse/BUILD
@@ -11,3 +11,8 @@ syscall_test(
     fuse = "True",
     test = "//test/fuse/linux:open_test",
 )
+
+syscall_test(
+    fuse = "True",
+    test = "//test/fuse/linux:release_test",
+)
diff --git a/test/fuse/linux/BUILD b/test/fuse/linux/BUILD
index 1d989a2f4f..abee1a33cc 100644
--- a/test/fuse/linux/BUILD
+++ b/test/fuse/linux/BUILD
@@ -31,6 +31,19 @@ cc_binary(
     ],
 )
 
+cc_binary(
+    name = "release_test",
+    testonly = 1,
+    srcs = ["release_test.cc"],
+    deps = [
+        gtest,
+        ":fuse_base",
+        "//test/util:fuse_util",
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
 cc_library(
     name = "fuse_base",
     testonly = 1,
diff --git a/test/fuse/linux/fuse_base.cc b/test/fuse/linux/fuse_base.cc
index e734100b19..98b4e1466b 100644
--- a/test/fuse/linux/fuse_base.cc
+++ b/test/fuse/linux/fuse_base.cc
@@ -387,7 +387,8 @@ void FuseTest::ServerProcessFuseRequest() {
 
   requests_.AddMemBlock(in_header->opcode, buf.data(), len);
 
-  if (in_header->opcode == FUSE_RELEASE) return;
+  if (in_header->opcode == FUSE_RELEASE || in_header->opcode == FUSE_RELEASEDIR)
+    return;
   // Check if there is a corresponding response.
   if (responses_.End()) {
     GTEST_NONFATAL_FAILURE_("No more FUSE response is expected");
diff --git a/test/fuse/linux/fuse_base.h b/test/fuse/linux/fuse_base.h
index 2474b763f8..ff4c4499df 100644
--- a/test/fuse/linux/fuse_base.h
+++ b/test/fuse/linux/fuse_base.h
@@ -166,13 +166,13 @@ class FuseTest : public ::testing::Test {
  protected:
   TempPath mount_point_;
 
+  // Unmounts the mountpoint of the FUSE server.
+  void UnmountFuse();
+
  private:
   // Opens /dev/fuse and inherit the file descriptor for the FUSE server.
   void MountFuse();
 
-  // Unmounts the mountpoint of the FUSE server.
-  void UnmountFuse();
-
   // Creates a socketpair for communication and forks FUSE server.
   void SetUpFuseServer();
 
diff --git a/test/fuse/linux/open_test.cc b/test/fuse/linux/open_test.cc
index ed0641587e..4b0c4a8051 100644
--- a/test/fuse/linux/open_test.cc
+++ b/test/fuse/linux/open_test.cc
@@ -33,6 +33,10 @@ namespace testing {
 namespace {
 
 class OpenTest : public FuseTest {
+  // OpenTest doesn't care the release request when close a fd,
+  // so doesn't check leftover requests when tearing down.
+  void TearDown() { UnmountFuse(); }
+
  protected:
   const std::string test_file_ = "test_file";
   const mode_t regular_file_ = S_IFREG | S_IRWXU | S_IRWXG | S_IRWXO;
diff --git a/test/fuse/linux/release_test.cc b/test/fuse/linux/release_test.cc
new file mode 100644
index 0000000000..b5adb0870c
--- /dev/null
+++ b/test/fuse/linux/release_test.cc
@@ -0,0 +1,74 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/fuse.h>
+#include <sys/mount.h>
+#include <sys/stat.h>
+#include <sys/statfs.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <string>
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "test/fuse/linux/fuse_base.h"
+#include "test/util/fuse_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+class ReleaseTest : public FuseTest {
+ protected:
+  const std::string test_file_ = "test_file";
+};
+
+TEST_F(ReleaseTest, RegularFile) {
+  const std::string test_file_path =
+      JoinPath(mount_point_.path().c_str(), test_file_);
+  SetServerInodeLookup(test_file_, S_IFREG | S_IRWXU | S_IRWXG | S_IRWXO);
+
+  struct fuse_out_header out_header = {
+      .len = sizeof(struct fuse_out_header) + sizeof(struct fuse_open_out),
+  };
+  struct fuse_open_out out_payload = {
+      .fh = 1,
+      .open_flags = O_RDWR,
+  };
+  auto iov_out = FuseGenerateIovecs(out_header, out_payload);
+  SetServerResponse(FUSE_OPEN, iov_out);
+  FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_path, O_RDWR));
+  SkipServerActualRequest();
+  ASSERT_THAT(close(fd.release()), SyscallSucceeds());
+
+  struct fuse_in_header in_header;
+  struct fuse_release_in in_payload;
+  auto iov_in = FuseGenerateIovecs(in_header, in_payload);
+  GetServerActualRequest(iov_in);
+
+  EXPECT_EQ(in_header.len, sizeof(in_header) + sizeof(in_payload));
+  EXPECT_EQ(in_header.opcode, FUSE_RELEASE);
+  EXPECT_EQ(in_payload.flags, O_RDWR);
+  EXPECT_EQ(in_payload.fh, 1);
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor

From e6c69537b2c73920ec7cbf28cffbdedee1651792 Mon Sep 17 00:00:00 2001
From: Boyuan He <heboyuan@google.com>
Date: Tue, 18 Aug 2020 21:51:06 +0000
Subject: [PATCH 175/211] Implement FUSE_MKNOD

Fixes #3492
---
 pkg/abi/linux/fuse.go            |  43 +++++++++++++
 pkg/sentry/fsimpl/fuse/fusefs.go |  13 ++++
 test/fuse/BUILD                  |   5 ++
 test/fuse/linux/BUILD            |  14 ++++
 test/fuse/linux/mknod_test.cc    | 107 +++++++++++++++++++++++++++++++
 5 files changed, 182 insertions(+)
 create mode 100644 test/fuse/linux/mknod_test.cc

diff --git a/pkg/abi/linux/fuse.go b/pkg/abi/linux/fuse.go
index a1b2a2abf8..97d9600963 100644
--- a/pkg/abi/linux/fuse.go
+++ b/pkg/abi/linux/fuse.go
@@ -413,3 +413,46 @@ type FUSEReleaseIn struct {
 	// LockOwner is the id of the lock owner if there is one.
 	LockOwner uint64
 }
+
+// FUSEMknodMeta contains all the static fields of FUSEMknodIn,
+// which is used for FUSE_MKNOD.
+//
+// +marshal
+type FUSEMknodMeta struct {
+	// Mode of the inode to create.
+	Mode uint32
+
+	// Rdev encodes device major and minor information.
+	Rdev uint32
+
+	// Umask is the current file mode creation mask.
+	Umask uint32
+
+	_ uint32
+}
+
+// FUSEMknodIn contains all the arguments sent by the kernel
+// to the daemon, to create a new file node.
+//
+// Dynamically-sized objects cannot be marshalled.
+type FUSEMknodIn struct {
+	marshal.StubMarshallable
+
+	// MknodMeta contains mode, rdev and umash field for FUSE_MKNODS.
+	MknodMeta FUSEMknodMeta
+
+	// Name is the name of the node to create.
+	Name string
+}
+
+// MarshalUnsafe serializes r.MknodMeta and r.Name to the dst buffer.
+func (r *FUSEMknodIn) MarshalUnsafe(buf []byte) {
+	r.MknodMeta.MarshalUnsafe(buf[:r.MknodMeta.SizeBytes()])
+	copy(buf[r.MknodMeta.SizeBytes():], r.Name)
+}
+
+// SizeBytes is the size of the memory representation of FUSEMknodIn.
+// 1 extra byte for null-terminated string.
+func (r *FUSEMknodIn) SizeBytes() int {
+	return r.MknodMeta.SizeBytes() + len(r.Name) + 1
+}
diff --git a/pkg/sentry/fsimpl/fuse/fusefs.go b/pkg/sentry/fsimpl/fuse/fusefs.go
index b5f05b80b3..5cef0b94f2 100644
--- a/pkg/sentry/fsimpl/fuse/fusefs.go
+++ b/pkg/sentry/fsimpl/fuse/fusefs.go
@@ -349,6 +349,19 @@ func (inode) Valid(ctx context.Context) bool {
 	return true
 }
 
+// NewNode implements kernfs.Inode.NewNode.
+func (i *inode) NewNode(ctx context.Context, name string, opts vfs.MknodOptions) (*vfs.Dentry, error) {
+	in := linux.FUSEMknodIn{
+		MknodMeta: linux.FUSEMknodMeta{
+			Mode:  uint32(opts.Mode),
+			Rdev:  linux.MakeDeviceID(uint16(opts.DevMajor), opts.DevMinor),
+			Umask: uint32(kernel.TaskFromContext(ctx).FSContext().Umask()),
+		},
+		Name: name,
+	}
+	return i.newEntry(ctx, name, opts.Mode.FileType(), linux.FUSE_MKNOD, &in)
+}
+
 // newEntry calls FUSE server for entry creation and allocates corresponding entry according to response.
 // Shared by FUSE_MKNOD, FUSE_MKDIR, FUSE_SYMLINK, FUSE_LINK and FUSE_LOOKUP.
 func (i *inode) newEntry(ctx context.Context, name string, fileType linux.FileMode, opcode linux.FUSEOpcode, payload marshal.Marshallable) (*vfs.Dentry, error) {
diff --git a/test/fuse/BUILD b/test/fuse/BUILD
index 53cbadb3c2..ff2948eb3c 100644
--- a/test/fuse/BUILD
+++ b/test/fuse/BUILD
@@ -16,3 +16,8 @@ syscall_test(
     fuse = "True",
     test = "//test/fuse/linux:release_test",
 )
+
+syscall_test(
+    fuse = "True",
+    test = "//test/fuse/linux:mknod_test",
+)
diff --git a/test/fuse/linux/BUILD b/test/fuse/linux/BUILD
index abee1a33cc..c0f9176063 100644
--- a/test/fuse/linux/BUILD
+++ b/test/fuse/linux/BUILD
@@ -44,6 +44,20 @@ cc_binary(
     ],
 )
 
+cc_binary(
+    name = "mknod_test",
+    testonly = 1,
+    srcs = ["mknod_test.cc"],
+    deps = [
+        gtest,
+        ":fuse_base",
+        "//test/util:fuse_util",
+        "//test/util:temp_umask",
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
 cc_library(
     name = "fuse_base",
     testonly = 1,
diff --git a/test/fuse/linux/mknod_test.cc b/test/fuse/linux/mknod_test.cc
new file mode 100644
index 0000000000..74c74d76ba
--- /dev/null
+++ b/test/fuse/linux/mknod_test.cc
@@ -0,0 +1,107 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/fuse.h>
+#include <sys/stat.h>
+#include <sys/statfs.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <string>
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "test/fuse/linux/fuse_base.h"
+#include "test/util/fuse_util.h"
+#include "test/util/temp_umask.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+class MknodTest : public FuseTest {
+ protected:
+  const std::string test_file_ = "test_file";
+  const mode_t perms_ = S_IRWXU | S_IRWXG | S_IRWXO;
+};
+
+TEST_F(MknodTest, RegularFile) {
+  const std::string test_file_path =
+      JoinPath(mount_point_.path().c_str(), test_file_);
+  const mode_t new_umask = 0077;
+
+  struct fuse_out_header out_header = {
+      .len = sizeof(struct fuse_out_header) + sizeof(struct fuse_entry_out),
+  };
+  struct fuse_entry_out out_payload = DefaultEntryOut(S_IFREG | perms_, 5);
+  auto iov_out = FuseGenerateIovecs(out_header, out_payload);
+  SetServerResponse(FUSE_MKNOD, iov_out);
+  TempUmask mask(new_umask);
+  ASSERT_THAT(mknod(test_file_path.c_str(), perms_, 0), SyscallSucceeds());
+
+  struct fuse_in_header in_header;
+  struct fuse_mknod_in in_payload;
+  std::vector<char> actual_file(test_file_.length() + 1);
+  auto iov_in = FuseGenerateIovecs(in_header, in_payload, actual_file);
+  GetServerActualRequest(iov_in);
+
+  EXPECT_EQ(in_header.len,
+            sizeof(in_header) + sizeof(in_payload) + test_file_.length() + 1);
+  EXPECT_EQ(in_header.opcode, FUSE_MKNOD);
+  EXPECT_EQ(in_payload.mode & 0777, perms_ & ~new_umask);
+  EXPECT_EQ(in_payload.umask, new_umask);
+  EXPECT_EQ(in_payload.rdev, 0);
+  EXPECT_EQ(std::string(actual_file.data()), test_file_);
+}
+
+TEST_F(MknodTest, FileTypeError) {
+  const std::string test_file_path =
+      JoinPath(mount_point_.path().c_str(), test_file_);
+
+  struct fuse_out_header out_header = {
+      .len = sizeof(struct fuse_out_header) + sizeof(struct fuse_entry_out),
+  };
+  // server return directory instead of regular file should cause an error.
+  struct fuse_entry_out out_payload = DefaultEntryOut(S_IFDIR | perms_, 5);
+  auto iov_out = FuseGenerateIovecs(out_header, out_payload);
+  SetServerResponse(FUSE_MKNOD, iov_out);
+  ASSERT_THAT(mknod(test_file_path.c_str(), perms_, 0),
+              SyscallFailsWithErrno(EIO));
+  SkipServerActualRequest();
+}
+
+TEST_F(MknodTest, NodeIDError) {
+  const std::string test_file_path =
+      JoinPath(mount_point_.path().c_str(), test_file_);
+
+  struct fuse_out_header out_header = {
+      .len = sizeof(struct fuse_out_header) + sizeof(struct fuse_entry_out),
+  };
+  struct fuse_entry_out out_payload =
+      DefaultEntryOut(S_IFREG | perms_, FUSE_ROOT_ID);
+  auto iov_out = FuseGenerateIovecs(out_header, out_payload);
+  SetServerResponse(FUSE_MKNOD, iov_out);
+  ASSERT_THAT(mknod(test_file_path.c_str(), perms_, 0),
+              SyscallFailsWithErrno(EIO));
+  SkipServerActualRequest();
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor

From 3d7c9f41ca4ea43339f0bbef3e619ac2f50284c1 Mon Sep 17 00:00:00 2001
From: Boyuan He <heboyuan@google.com>
Date: Tue, 18 Aug 2020 23:09:34 +0000
Subject: [PATCH 176/211] Implement FUSE_SYMLINK

Fixes #3452
---
 pkg/abi/linux/fuse.go            | 27 ++++++++++
 pkg/sentry/fsimpl/fuse/fusefs.go |  9 ++++
 test/fuse/BUILD                  |  5 ++
 test/fuse/linux/BUILD            | 13 +++++
 test/fuse/linux/symlink_test.cc  | 88 ++++++++++++++++++++++++++++++++
 5 files changed, 142 insertions(+)
 create mode 100644 test/fuse/linux/symlink_test.cc

diff --git a/pkg/abi/linux/fuse.go b/pkg/abi/linux/fuse.go
index 97d9600963..ea5a7fd435 100644
--- a/pkg/abi/linux/fuse.go
+++ b/pkg/abi/linux/fuse.go
@@ -456,3 +456,30 @@ func (r *FUSEMknodIn) MarshalUnsafe(buf []byte) {
 func (r *FUSEMknodIn) SizeBytes() int {
 	return r.MknodMeta.SizeBytes() + len(r.Name) + 1
 }
+
+// FUSESymLinkIn is the request sent by the kernel to the daemon,
+// to create a symbolic link.
+//
+// Dynamically-sized objects cannot be marshalled.
+type FUSESymLinkIn struct {
+	marshal.StubMarshallable
+
+	// Name of symlink to create.
+	Name string
+
+	// Target of the symlink.
+	Target string
+}
+
+// MarshalUnsafe serializes r.Name and r.Target to the dst buffer.
+// Left null-termination at end of r.Name and r.Target.
+func (r *FUSESymLinkIn) MarshalUnsafe(buf []byte) {
+	copy(buf, r.Name)
+	copy(buf[len(r.Name)+1:], r.Target)
+}
+
+// SizeBytes is the size of the memory representation of FUSESymLinkIn.
+// 2 extra bytes for null-terminated string.
+func (r *FUSESymLinkIn) SizeBytes() int {
+	return len(r.Name) + len(r.Target) + 2
+}
diff --git a/pkg/sentry/fsimpl/fuse/fusefs.go b/pkg/sentry/fsimpl/fuse/fusefs.go
index 5cef0b94f2..0021e2933d 100644
--- a/pkg/sentry/fsimpl/fuse/fusefs.go
+++ b/pkg/sentry/fsimpl/fuse/fusefs.go
@@ -362,6 +362,15 @@ func (i *inode) NewNode(ctx context.Context, name string, opts vfs.MknodOptions)
 	return i.newEntry(ctx, name, opts.Mode.FileType(), linux.FUSE_MKNOD, &in)
 }
 
+// NewSymlink implements kernfs.Inode.NewSymlink.
+func (i *inode) NewSymlink(ctx context.Context, name, target string) (*vfs.Dentry, error) {
+	in := linux.FUSESymLinkIn{
+		Name:   name,
+		Target: target,
+	}
+	return i.newEntry(ctx, name, linux.S_IFLNK, linux.FUSE_SYMLINK, &in)
+}
+
 // newEntry calls FUSE server for entry creation and allocates corresponding entry according to response.
 // Shared by FUSE_MKNOD, FUSE_MKDIR, FUSE_SYMLINK, FUSE_LINK and FUSE_LOOKUP.
 func (i *inode) newEntry(ctx context.Context, name string, fileType linux.FileMode, opcode linux.FUSEOpcode, payload marshal.Marshallable) (*vfs.Dentry, error) {
diff --git a/test/fuse/BUILD b/test/fuse/BUILD
index ff2948eb3c..2f91fe2c78 100644
--- a/test/fuse/BUILD
+++ b/test/fuse/BUILD
@@ -21,3 +21,8 @@ syscall_test(
     fuse = "True",
     test = "//test/fuse/linux:mknod_test",
 )
+
+syscall_test(
+    fuse = "True",
+    test = "//test/fuse/linux:symlink_test",
+)
diff --git a/test/fuse/linux/BUILD b/test/fuse/linux/BUILD
index c0f9176063..df42857f6e 100644
--- a/test/fuse/linux/BUILD
+++ b/test/fuse/linux/BUILD
@@ -58,6 +58,19 @@ cc_binary(
     ],
 )
 
+cc_binary(
+    name = "symlink_test",
+    testonly = 1,
+    srcs = ["symlink_test.cc"],
+    deps = [
+        gtest,
+        ":fuse_base",
+        "//test/util:fuse_util",
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
 cc_library(
     name = "fuse_base",
     testonly = 1,
diff --git a/test/fuse/linux/symlink_test.cc b/test/fuse/linux/symlink_test.cc
new file mode 100644
index 0000000000..2c3a529872
--- /dev/null
+++ b/test/fuse/linux/symlink_test.cc
@@ -0,0 +1,88 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/fuse.h>
+#include <sys/stat.h>
+#include <sys/statfs.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <string>
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "test/fuse/linux/fuse_base.h"
+#include "test/util/fuse_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+class SymlinkTest : public FuseTest {
+ protected:
+  const std::string target_file_ = "target_file_";
+  const std::string symlink_ = "symlink_";
+  const mode_t perms_ = S_IRWXU | S_IRWXG | S_IRWXO;
+};
+
+TEST_F(SymlinkTest, CreateSymLink) {
+  const std::string symlink_path =
+      JoinPath(mount_point_.path().c_str(), symlink_);
+
+  struct fuse_out_header out_header = {
+      .len = sizeof(struct fuse_out_header) + sizeof(struct fuse_entry_out),
+  };
+  struct fuse_entry_out out_payload = DefaultEntryOut(S_IFLNK | perms_, 5);
+  auto iov_out = FuseGenerateIovecs(out_header, out_payload);
+  SetServerResponse(FUSE_SYMLINK, iov_out);
+  ASSERT_THAT(symlink(target_file_.c_str(), symlink_path.c_str()),
+              SyscallSucceeds());
+
+  struct fuse_in_header in_header;
+  std::vector<char> actual_target_file(target_file_.length() + 1);
+  std::vector<char> actual_symlink(symlink_.length() + 1);
+  auto iov_in =
+      FuseGenerateIovecs(in_header, actual_symlink, actual_target_file);
+  GetServerActualRequest(iov_in);
+
+  EXPECT_EQ(in_header.len,
+            sizeof(in_header) + symlink_.length() + target_file_.length() + 2);
+  EXPECT_EQ(in_header.opcode, FUSE_SYMLINK);
+  EXPECT_EQ(std::string(actual_target_file.data()), target_file_);
+  EXPECT_EQ(std::string(actual_symlink.data()), symlink_);
+}
+
+TEST_F(SymlinkTest, FileTypeError) {
+  const std::string symlink_path =
+      JoinPath(mount_point_.path().c_str(), symlink_);
+
+  struct fuse_out_header out_header = {
+      .len = sizeof(struct fuse_out_header) + sizeof(struct fuse_entry_out),
+  };
+  struct fuse_entry_out out_payload = DefaultEntryOut(S_IFREG | perms_, 5);
+  auto iov_out = FuseGenerateIovecs(out_header, out_payload);
+  SetServerResponse(FUSE_SYMLINK, iov_out);
+  ASSERT_THAT(symlink(target_file_.c_str(), symlink_path.c_str()),
+              SyscallFailsWithErrno(EIO));
+  SkipServerActualRequest();
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor

From 405aac54eee6c88cc9429aa26e14a52cbf435f42 Mon Sep 17 00:00:00 2001
From: Boyuan He <heboyuan@google.com>
Date: Tue, 18 Aug 2020 23:50:22 +0000
Subject: [PATCH 177/211] Implement FUSE_READLINK

Fixes #3316
---
 pkg/abi/linux/fuse.go                       | 11 +++
 pkg/sentry/fsimpl/fuse/fusefs.go            | 30 ++++++++
 pkg/sentry/fsimpl/kernfs/filesystem.go      |  2 +-
 pkg/sentry/fsimpl/kernfs/inode_impl_util.go |  2 +-
 pkg/sentry/fsimpl/kernfs/kernfs.go          |  2 +-
 pkg/sentry/fsimpl/kernfs/symlink.go         |  2 +-
 pkg/sentry/fsimpl/proc/task_fds.go          |  2 +-
 pkg/sentry/fsimpl/proc/task_files.go        |  6 +-
 pkg/sentry/fsimpl/proc/tasks_files.go       | 12 +--
 test/fuse/BUILD                             |  5 ++
 test/fuse/linux/BUILD                       | 13 ++++
 test/fuse/linux/readlink_test.cc            | 85 +++++++++++++++++++++
 12 files changed, 158 insertions(+), 14 deletions(-)
 create mode 100644 test/fuse/linux/readlink_test.cc

diff --git a/pkg/abi/linux/fuse.go b/pkg/abi/linux/fuse.go
index ea5a7fd435..5de1433d7a 100644
--- a/pkg/abi/linux/fuse.go
+++ b/pkg/abi/linux/fuse.go
@@ -483,3 +483,14 @@ func (r *FUSESymLinkIn) MarshalUnsafe(buf []byte) {
 func (r *FUSESymLinkIn) SizeBytes() int {
 	return len(r.Name) + len(r.Target) + 2
 }
+
+// FUSEEmptyIn is used by operations without request body.
+type FUSEEmptyIn struct{ marshal.StubMarshallable }
+
+// MarshalUnsafe do nothing for marshal.
+func (r *FUSEEmptyIn) MarshalUnsafe(buf []byte) {}
+
+// SizeBytes is 0 for empty request.
+func (r *FUSEEmptyIn) SizeBytes() int {
+	return 0
+}
diff --git a/pkg/sentry/fsimpl/fuse/fusefs.go b/pkg/sentry/fsimpl/fuse/fusefs.go
index 0021e2933d..8db337a2e8 100644
--- a/pkg/sentry/fsimpl/fuse/fusefs.go
+++ b/pkg/sentry/fsimpl/fuse/fusefs.go
@@ -225,6 +225,9 @@ type inode struct {
 
 	// version of the inode.
 	version uint64
+
+	// link is result of following a symbolic link.
+	link string
 }
 
 func (fs *filesystem) newRootInode(creds *auth.Credentials, mode linux.FileMode) *kernfs.Dentry {
@@ -406,6 +409,33 @@ func (i *inode) newEntry(ctx context.Context, name string, fileType linux.FileMo
 	return child.VFSDentry(), nil
 }
 
+// Readlink implements kernfs.Inode.Readlink.
+func (i *inode) Readlink(ctx context.Context, mnt *vfs.Mount) (string, error) {
+	if i.Mode().FileType()&linux.S_IFLNK == 0 {
+		return "", syserror.EINVAL
+	}
+	if i.link == "" {
+		kernelTask := kernel.TaskFromContext(ctx)
+		if kernelTask == nil {
+			log.Warningf("fusefs.Inode.Readlink: couldn't get kernel task from context")
+			return "", syserror.EINVAL
+		}
+		req, err := i.fs.conn.NewRequest(auth.CredentialsFromContext(ctx), uint32(kernelTask.ThreadID()), i.NodeID, linux.FUSE_READLINK, &linux.FUSEEmptyIn{})
+		if err != nil {
+			return "", err
+		}
+		res, err := i.fs.conn.Call(kernelTask, req)
+		if err != nil {
+			return "", err
+		}
+		i.link = string(res.data[res.hdr.SizeBytes():])
+		if !mnt.Options().ReadOnly {
+			i.attributeTime = 0
+		}
+	}
+	return i.link, nil
+}
+
 // statFromFUSEAttr makes attributes from linux.FUSEAttr to linux.Statx. The
 // opts.Sync attribute is ignored since the synchronization is handled by the
 // FUSE server.
diff --git a/pkg/sentry/fsimpl/kernfs/filesystem.go b/pkg/sentry/fsimpl/kernfs/filesystem.go
index 9042030706..7aaf1146d3 100644
--- a/pkg/sentry/fsimpl/kernfs/filesystem.go
+++ b/pkg/sentry/fsimpl/kernfs/filesystem.go
@@ -548,7 +548,7 @@ func (fs *Filesystem) ReadlinkAt(ctx context.Context, rp *vfs.ResolvingPath) (st
 	if !d.Impl().(*Dentry).isSymlink() {
 		return "", syserror.EINVAL
 	}
-	return inode.Readlink(ctx)
+	return inode.Readlink(ctx, rp.Mount())
 }
 
 // RenameAt implements vfs.FilesystemImpl.RenameAt.
diff --git a/pkg/sentry/fsimpl/kernfs/inode_impl_util.go b/pkg/sentry/fsimpl/kernfs/inode_impl_util.go
index c0b863ba4d..ef63a19473 100644
--- a/pkg/sentry/fsimpl/kernfs/inode_impl_util.go
+++ b/pkg/sentry/fsimpl/kernfs/inode_impl_util.go
@@ -172,7 +172,7 @@ func (InodeNoDynamicLookup) Valid(ctx context.Context) bool {
 type InodeNotSymlink struct{}
 
 // Readlink implements Inode.Readlink.
-func (InodeNotSymlink) Readlink(context.Context) (string, error) {
+func (InodeNotSymlink) Readlink(context.Context, *vfs.Mount) (string, error) {
 	return "", syserror.EINVAL
 }
 
diff --git a/pkg/sentry/fsimpl/kernfs/kernfs.go b/pkg/sentry/fsimpl/kernfs/kernfs.go
index 67a0347fe0..f656e2a8be 100644
--- a/pkg/sentry/fsimpl/kernfs/kernfs.go
+++ b/pkg/sentry/fsimpl/kernfs/kernfs.go
@@ -437,7 +437,7 @@ type inodeDynamicLookup interface {
 type inodeSymlink interface {
 	// Readlink returns the target of a symbolic link. If an inode is not a
 	// symlink, the implementation should return EINVAL.
-	Readlink(ctx context.Context) (string, error)
+	Readlink(ctx context.Context, mnt *vfs.Mount) (string, error)
 
 	// Getlink returns the target of a symbolic link, as used by path
 	// resolution:
diff --git a/pkg/sentry/fsimpl/kernfs/symlink.go b/pkg/sentry/fsimpl/kernfs/symlink.go
index 64731a3e4a..a9812fcef2 100644
--- a/pkg/sentry/fsimpl/kernfs/symlink.go
+++ b/pkg/sentry/fsimpl/kernfs/symlink.go
@@ -52,7 +52,7 @@ func (s *StaticSymlink) Init(creds *auth.Credentials, devMajor uint32, devMinor
 }
 
 // Readlink implements Inode.
-func (s *StaticSymlink) Readlink(_ context.Context) (string, error) {
+func (s *StaticSymlink) Readlink(_ context.Context, _ *vfs.Mount) (string, error) {
 	return s.target, nil
 }
 
diff --git a/pkg/sentry/fsimpl/proc/task_fds.go b/pkg/sentry/fsimpl/proc/task_fds.go
index 3f0d784613..5374538c92 100644
--- a/pkg/sentry/fsimpl/proc/task_fds.go
+++ b/pkg/sentry/fsimpl/proc/task_fds.go
@@ -209,7 +209,7 @@ func (fs *filesystem) newFDSymlink(task *kernel.Task, fd int32, ino uint64) *ker
 	return d
 }
 
-func (s *fdSymlink) Readlink(ctx context.Context) (string, error) {
+func (s *fdSymlink) Readlink(ctx context.Context, _ *vfs.Mount) (string, error) {
 	file, _ := getTaskFD(s.task, s.fd)
 	if file == nil {
 		return "", syserror.ENOENT
diff --git a/pkg/sentry/fsimpl/proc/task_files.go b/pkg/sentry/fsimpl/proc/task_files.go
index 356036b9b9..4f7f9cb009 100644
--- a/pkg/sentry/fsimpl/proc/task_files.go
+++ b/pkg/sentry/fsimpl/proc/task_files.go
@@ -668,7 +668,7 @@ func (fs *filesystem) newExeSymlink(task *kernel.Task, ino uint64) *kernfs.Dentr
 }
 
 // Readlink implements kernfs.Inode.
-func (s *exeSymlink) Readlink(ctx context.Context) (string, error) {
+func (s *exeSymlink) Readlink(ctx context.Context, _ *vfs.Mount) (string, error) {
 	if !kernel.ContextCanTrace(ctx, s.task, false) {
 		return "", syserror.EACCES
 	}
@@ -808,11 +808,11 @@ func (fs *filesystem) newNamespaceSymlink(task *kernel.Task, ino uint64, ns stri
 }
 
 // Readlink implements Inode.
-func (s *namespaceSymlink) Readlink(ctx context.Context) (string, error) {
+func (s *namespaceSymlink) Readlink(ctx context.Context, mnt *vfs.Mount) (string, error) {
 	if err := checkTaskState(s.task); err != nil {
 		return "", err
 	}
-	return s.StaticSymlink.Readlink(ctx)
+	return s.StaticSymlink.Readlink(ctx, mnt)
 }
 
 // Getlink implements Inode.Getlink.
diff --git a/pkg/sentry/fsimpl/proc/tasks_files.go b/pkg/sentry/fsimpl/proc/tasks_files.go
index 8c41729e4b..68c5410462 100644
--- a/pkg/sentry/fsimpl/proc/tasks_files.go
+++ b/pkg/sentry/fsimpl/proc/tasks_files.go
@@ -51,7 +51,7 @@ func (fs *filesystem) newSelfSymlink(creds *auth.Credentials, ino uint64, pidns
 	return d
 }
 
-func (s *selfSymlink) Readlink(ctx context.Context) (string, error) {
+func (s *selfSymlink) Readlink(ctx context.Context, _ *vfs.Mount) (string, error) {
 	t := kernel.TaskFromContext(ctx)
 	if t == nil {
 		// Who is reading this link?
@@ -64,8 +64,8 @@ func (s *selfSymlink) Readlink(ctx context.Context) (string, error) {
 	return strconv.FormatUint(uint64(tgid), 10), nil
 }
 
-func (s *selfSymlink) Getlink(ctx context.Context, _ *vfs.Mount) (vfs.VirtualDentry, string, error) {
-	target, err := s.Readlink(ctx)
+func (s *selfSymlink) Getlink(ctx context.Context, mnt *vfs.Mount) (vfs.VirtualDentry, string, error) {
+	target, err := s.Readlink(ctx, mnt)
 	return vfs.VirtualDentry{}, target, err
 }
 
@@ -94,7 +94,7 @@ func (fs *filesystem) newThreadSelfSymlink(creds *auth.Credentials, ino uint64,
 	return d
 }
 
-func (s *threadSelfSymlink) Readlink(ctx context.Context) (string, error) {
+func (s *threadSelfSymlink) Readlink(ctx context.Context, _ *vfs.Mount) (string, error) {
 	t := kernel.TaskFromContext(ctx)
 	if t == nil {
 		// Who is reading this link?
@@ -108,8 +108,8 @@ func (s *threadSelfSymlink) Readlink(ctx context.Context) (string, error) {
 	return fmt.Sprintf("%d/task/%d", tgid, tid), nil
 }
 
-func (s *threadSelfSymlink) Getlink(ctx context.Context, _ *vfs.Mount) (vfs.VirtualDentry, string, error) {
-	target, err := s.Readlink(ctx)
+func (s *threadSelfSymlink) Getlink(ctx context.Context, mnt *vfs.Mount) (vfs.VirtualDentry, string, error) {
+	target, err := s.Readlink(ctx, mnt)
 	return vfs.VirtualDentry{}, target, err
 }
 
diff --git a/test/fuse/BUILD b/test/fuse/BUILD
index 2f91fe2c78..c2bdcf1baf 100644
--- a/test/fuse/BUILD
+++ b/test/fuse/BUILD
@@ -26,3 +26,8 @@ syscall_test(
     fuse = "True",
     test = "//test/fuse/linux:symlink_test",
 )
+
+syscall_test(
+    fuse = "True",
+    test = "//test/fuse/linux:readlink_test",
+)
diff --git a/test/fuse/linux/BUILD b/test/fuse/linux/BUILD
index df42857f6e..d3e8ca1481 100644
--- a/test/fuse/linux/BUILD
+++ b/test/fuse/linux/BUILD
@@ -71,6 +71,19 @@ cc_binary(
     ],
 )
 
+cc_binary(
+    name = "readlink_test",
+    testonly = 1,
+    srcs = ["readlink_test.cc"],
+    deps = [
+        gtest,
+        ":fuse_base",
+        "//test/util:fuse_util",
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
 cc_library(
     name = "fuse_base",
     testonly = 1,
diff --git a/test/fuse/linux/readlink_test.cc b/test/fuse/linux/readlink_test.cc
new file mode 100644
index 0000000000..2cba8fc235
--- /dev/null
+++ b/test/fuse/linux/readlink_test.cc
@@ -0,0 +1,85 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/fuse.h>
+#include <sys/stat.h>
+#include <sys/statfs.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <string>
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "test/fuse/linux/fuse_base.h"
+#include "test/util/fuse_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+class ReadlinkTest : public FuseTest {
+ protected:
+  const std::string test_file_ = "test_file_";
+  const mode_t perms_ = S_IRWXU | S_IRWXG | S_IRWXO;
+};
+
+TEST_F(ReadlinkTest, ReadSymLink) {
+  const std::string symlink_path =
+      JoinPath(mount_point_.path().c_str(), test_file_);
+  SetServerInodeLookup(test_file_, S_IFLNK | perms_);
+
+  struct fuse_out_header out_header = {
+      .len = static_cast<uint32_t>(sizeof(struct fuse_out_header)) +
+             static_cast<uint32_t>(test_file_.length()) + 1,
+  };
+  std::string link = test_file_;
+  auto iov_out = FuseGenerateIovecs(out_header, link);
+  SetServerResponse(FUSE_READLINK, iov_out);
+  const std::string actual_link =
+      ASSERT_NO_ERRNO_AND_VALUE(ReadLink(symlink_path));
+
+  struct fuse_in_header in_header;
+  auto iov_in = FuseGenerateIovecs(in_header);
+  GetServerActualRequest(iov_in);
+
+  EXPECT_EQ(in_header.len, sizeof(in_header));
+  EXPECT_EQ(in_header.opcode, FUSE_READLINK);
+  EXPECT_EQ(0, memcmp(actual_link.c_str(), link.data(), link.size()));
+
+  // next readlink should have link cached, so shouldn't have new request to
+  // server.
+  uint32_t recieved_before = GetServerTotalReceivedBytes();
+  ASSERT_NO_ERRNO(ReadLink(symlink_path));
+  EXPECT_EQ(GetServerTotalReceivedBytes(), recieved_before);
+}
+
+TEST_F(ReadlinkTest, NotSymlink) {
+  const std::string test_file_path =
+      JoinPath(mount_point_.path().c_str(), test_file_);
+  SetServerInodeLookup(test_file_, S_IFREG | perms_);
+
+  std::vector<char> buf(PATH_MAX + 1);
+  ASSERT_THAT(readlink(test_file_path.c_str(), buf.data(), PATH_MAX),
+              SyscallFailsWithErrno(EINVAL));
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor

From 2d73a7d3b83c0e85741742f72998b41a35072990 Mon Sep 17 00:00:00 2001
From: Boyuan He <heboyuan@google.com>
Date: Tue, 18 Aug 2020 22:45:47 +0000
Subject: [PATCH 178/211] Implement FUSE_MKDIR

Fixes #3392
---
 pkg/abi/linux/fuse.go               | 38 +++++++++++++
 pkg/sentry/fsimpl/fuse/BUILD        |  1 +
 pkg/sentry/fsimpl/fuse/directory.go | 51 +++++++++++++++++
 pkg/sentry/fsimpl/fuse/fusefs.go    | 27 ++++++++-
 test/fuse/BUILD                     |  5 ++
 test/fuse/linux/BUILD               | 14 +++++
 test/fuse/linux/mkdir_test.cc       | 88 +++++++++++++++++++++++++++++
 7 files changed, 221 insertions(+), 3 deletions(-)
 create mode 100644 pkg/sentry/fsimpl/fuse/directory.go
 create mode 100644 test/fuse/linux/mkdir_test.cc

diff --git a/pkg/abi/linux/fuse.go b/pkg/abi/linux/fuse.go
index 5de1433d7a..4ef0ab9a75 100644
--- a/pkg/abi/linux/fuse.go
+++ b/pkg/abi/linux/fuse.go
@@ -494,3 +494,41 @@ func (r *FUSEEmptyIn) MarshalUnsafe(buf []byte) {}
 func (r *FUSEEmptyIn) SizeBytes() int {
 	return 0
 }
+
+// FUSEMkdirMeta contains all the static fields of FUSEMkdirIn,
+// which is used for FUSE_MKDIR.
+//
+// +marshal
+type FUSEMkdirMeta struct {
+	// Mode of the directory of create.
+	Mode uint32
+
+	// Umask is the user file creation mask.
+	Umask uint32
+}
+
+// FUSEMkdirIn contains all the arguments sent by the kernel
+// to the daemon, to create a new directory.
+//
+// Dynamically-sized objects cannot be marshalled.
+type FUSEMkdirIn struct {
+	marshal.StubMarshallable
+
+	// MkdirMeta contains Mode and Umask of the directory to create.
+	MkdirMeta FUSEMkdirMeta
+
+	// Name of the directory to create.
+	Name string
+}
+
+// MarshalUnsafe serializes r.MkdirMeta and r.Name to the dst buffer.
+func (r *FUSEMkdirIn) MarshalUnsafe(buf []byte) {
+	r.MkdirMeta.MarshalUnsafe(buf[:r.MkdirMeta.SizeBytes()])
+	copy(buf[r.MkdirMeta.SizeBytes():], r.Name)
+}
+
+// SizeBytes is the size of the memory representation of FUSEMkdirIn.
+// 1 extra byte for null-terminated Name string.
+func (r *FUSEMkdirIn) SizeBytes() int {
+	return r.MkdirMeta.SizeBytes() + len(r.Name) + 1
+}
diff --git a/pkg/sentry/fsimpl/fuse/BUILD b/pkg/sentry/fsimpl/fuse/BUILD
index d1671e576d..2d9350d57d 100644
--- a/pkg/sentry/fsimpl/fuse/BUILD
+++ b/pkg/sentry/fsimpl/fuse/BUILD
@@ -31,6 +31,7 @@ go_library(
     srcs = [
         "connection.go",
         "dev.go",
+        "directory.go",
         "file.go",
         "fusefs.go",
         "init.go",
diff --git a/pkg/sentry/fsimpl/fuse/directory.go b/pkg/sentry/fsimpl/fuse/directory.go
new file mode 100644
index 0000000000..44d41712a1
--- /dev/null
+++ b/pkg/sentry/fsimpl/fuse/directory.go
@@ -0,0 +1,51 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package fuse
+
+import (
+	"gvisor.dev/gvisor/pkg/context"
+	"gvisor.dev/gvisor/pkg/sentry/vfs"
+	"gvisor.dev/gvisor/pkg/syserror"
+	"gvisor.dev/gvisor/pkg/usermem"
+)
+
+type directoryFD struct {
+	fileDescription
+}
+
+// Allocate implements directoryFD.Allocate.
+func (directoryFD) Allocate(ctx context.Context, mode, offset, length uint64) error {
+	return syserror.EISDIR
+}
+
+// PRead implements FileDescriptionImpl.PRead.
+func (directoryFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) {
+	return 0, syserror.EISDIR
+}
+
+// Read implements FileDescriptionImpl.Read.
+func (directoryFD) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) {
+	return 0, syserror.EISDIR
+}
+
+// PWrite implements FileDescriptionImpl.PWrite.
+func (directoryFD) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) {
+	return 0, syserror.EISDIR
+}
+
+// Write implements FileDescriptionImpl.Write.
+func (directoryFD) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) {
+	return 0, syserror.EISDIR
+}
diff --git a/pkg/sentry/fsimpl/fuse/fusefs.go b/pkg/sentry/fsimpl/fuse/fusefs.go
index 8db337a2e8..4dc8ef9935 100644
--- a/pkg/sentry/fsimpl/fuse/fusefs.go
+++ b/pkg/sentry/fsimpl/fuse/fusefs.go
@@ -262,8 +262,17 @@ func (i *inode) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentr
 		return nil, syserror.EOVERFLOW
 	}
 
-	// FOPEN_KEEP_CACHE is the defualt flag for noOpen.
-	fd := fileDescription{OpenFlag: linux.FOPEN_KEEP_CACHE}
+	var fd *fileDescription
+	var fdImpl vfs.FileDescriptionImpl
+	if isDir {
+		directoryFD := &directoryFD{}
+		fd = &(directoryFD.fileDescription)
+		fdImpl = directoryFD
+	} else {
+		// FOPEN_KEEP_CACHE is the defualt flag for noOpen.
+		fd = &fileDescription{OpenFlag: linux.FOPEN_KEEP_CACHE}
+		fdImpl = fd
+	}
 	// Only send open request when FUSE server support open or is opening a directory.
 	if !i.fs.conn.noOpen || isDir {
 		kernelTask := kernel.TaskFromContext(ctx)
@@ -330,7 +339,7 @@ func (i *inode) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentr
 		i.attributeTime = 0
 	}
 
-	if err := fd.vfsfd.Init(&fd, opts.Flags, rp.Mount(), vfsd, fdOptions); err != nil {
+	if err := fd.vfsfd.Init(fdImpl, opts.Flags, rp.Mount(), vfsd, fdOptions); err != nil {
 		return nil, err
 	}
 	return &fd.vfsfd, nil
@@ -374,6 +383,18 @@ func (i *inode) NewSymlink(ctx context.Context, name, target string) (*vfs.Dentr
 	return i.newEntry(ctx, name, linux.S_IFLNK, linux.FUSE_SYMLINK, &in)
 }
 
+// NewDir implements kernfs.Inode.NewDir.
+func (i *inode) NewDir(ctx context.Context, name string, opts vfs.MkdirOptions) (*vfs.Dentry, error) {
+	in := linux.FUSEMkdirIn{
+		MkdirMeta: linux.FUSEMkdirMeta{
+			Mode:  uint32(opts.Mode),
+			Umask: uint32(kernel.TaskFromContext(ctx).FSContext().Umask()),
+		},
+		Name: name,
+	}
+	return i.newEntry(ctx, name, linux.S_IFDIR, linux.FUSE_MKDIR, &in)
+}
+
 // newEntry calls FUSE server for entry creation and allocates corresponding entry according to response.
 // Shared by FUSE_MKNOD, FUSE_MKDIR, FUSE_SYMLINK, FUSE_LINK and FUSE_LOOKUP.
 func (i *inode) newEntry(ctx context.Context, name string, fileType linux.FileMode, opcode linux.FUSEOpcode, payload marshal.Marshallable) (*vfs.Dentry, error) {
diff --git a/test/fuse/BUILD b/test/fuse/BUILD
index c2bdcf1baf..8bde81e3ca 100644
--- a/test/fuse/BUILD
+++ b/test/fuse/BUILD
@@ -31,3 +31,8 @@ syscall_test(
     fuse = "True",
     test = "//test/fuse/linux:readlink_test",
 )
+
+syscall_test(
+    fuse = "True",
+    test = "//test/fuse/linux:mkdir_test",
+)
diff --git a/test/fuse/linux/BUILD b/test/fuse/linux/BUILD
index d3e8ca1481..298ea11f8e 100644
--- a/test/fuse/linux/BUILD
+++ b/test/fuse/linux/BUILD
@@ -84,6 +84,20 @@ cc_binary(
     ],
 )
 
+cc_binary(
+    name = "mkdir_test",
+    testonly = 1,
+    srcs = ["mkdir_test.cc"],
+    deps = [
+        gtest,
+        ":fuse_base",
+        "//test/util:fuse_util",
+        "//test/util:temp_umask",
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
 cc_library(
     name = "fuse_base",
     testonly = 1,
diff --git a/test/fuse/linux/mkdir_test.cc b/test/fuse/linux/mkdir_test.cc
new file mode 100644
index 0000000000..9647cb93f1
--- /dev/null
+++ b/test/fuse/linux/mkdir_test.cc
@@ -0,0 +1,88 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/fuse.h>
+#include <sys/stat.h>
+#include <sys/statfs.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <string>
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "test/fuse/linux/fuse_base.h"
+#include "test/util/fuse_util.h"
+#include "test/util/temp_umask.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+class MkdirTest : public FuseTest {
+ protected:
+  const std::string test_dir_ = "test_dir";
+  const mode_t perms_ = S_IRWXU | S_IRWXG | S_IRWXO;
+};
+
+TEST_F(MkdirTest, CreateDir) {
+  const std::string test_dir_path_ =
+      JoinPath(mount_point_.path().c_str(), test_dir_);
+  const mode_t new_umask = 0077;
+
+  struct fuse_out_header out_header = {
+      .len = sizeof(struct fuse_out_header) + sizeof(struct fuse_entry_out),
+  };
+  struct fuse_entry_out out_payload = DefaultEntryOut(S_IFDIR | perms_, 5);
+  auto iov_out = FuseGenerateIovecs(out_header, out_payload);
+  SetServerResponse(FUSE_MKDIR, iov_out);
+  TempUmask mask(new_umask);
+  ASSERT_THAT(mkdir(test_dir_path_.c_str(), 0777), SyscallSucceeds());
+
+  struct fuse_in_header in_header;
+  struct fuse_mkdir_in in_payload;
+  std::vector<char> actual_dir(test_dir_.length() + 1);
+  auto iov_in = FuseGenerateIovecs(in_header, in_payload, actual_dir);
+  GetServerActualRequest(iov_in);
+
+  EXPECT_EQ(in_header.len,
+            sizeof(in_header) + sizeof(in_payload) + test_dir_.length() + 1);
+  EXPECT_EQ(in_header.opcode, FUSE_MKDIR);
+  EXPECT_EQ(in_payload.mode & 0777, perms_ & ~new_umask);
+  EXPECT_EQ(in_payload.umask, new_umask);
+  EXPECT_EQ(std::string(actual_dir.data()), test_dir_);
+}
+
+TEST_F(MkdirTest, FileTypeError) {
+  const std::string test_dir_path_ =
+      JoinPath(mount_point_.path().c_str(), test_dir_);
+
+  struct fuse_out_header out_header = {
+      .len = sizeof(struct fuse_out_header) + sizeof(struct fuse_entry_out),
+  };
+  struct fuse_entry_out out_payload = DefaultEntryOut(S_IFREG | perms_, 5);
+  auto iov_out = FuseGenerateIovecs(out_header, out_payload);
+  SetServerResponse(FUSE_MKDIR, iov_out);
+  ASSERT_THAT(mkdir(test_dir_path_.c_str(), 0777), SyscallFailsWithErrno(EIO));
+  SkipServerActualRequest();
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor

From 74e229c56ceb488a61a1b42d8f7da2d58c3c5418 Mon Sep 17 00:00:00 2001
From: Jinmou Li <jinmli@google.com>
Date: Wed, 15 Jul 2020 18:32:51 +0000
Subject: [PATCH 179/211] Implement FUSE_READ

Fixes #3206
---
 pkg/abi/linux/fuse.go                  |  37 +++
 pkg/sentry/fsimpl/fuse/BUILD           |   3 +
 pkg/sentry/fsimpl/fuse/connection.go   |   6 +-
 pkg/sentry/fsimpl/fuse/dev.go          |   4 +-
 pkg/sentry/fsimpl/fuse/fusefs.go       | 130 +++++++--
 pkg/sentry/fsimpl/fuse/init.go         |   3 +-
 pkg/sentry/fsimpl/fuse/read_write.go   | 152 ++++++++++
 pkg/sentry/fsimpl/fuse/regular_file.go | 125 ++++++++
 test/fuse/BUILD                        |   5 +
 test/fuse/linux/BUILD                  |  13 +
 test/fuse/linux/fuse_base.cc           |  17 +-
 test/fuse/linux/fuse_base.h            |  13 +-
 test/fuse/linux/read_test.cc           | 390 +++++++++++++++++++++++++
 13 files changed, 862 insertions(+), 36 deletions(-)
 create mode 100644 pkg/sentry/fsimpl/fuse/read_write.go
 create mode 100644 pkg/sentry/fsimpl/fuse/regular_file.go
 create mode 100644 test/fuse/linux/read_test.cc

diff --git a/pkg/abi/linux/fuse.go b/pkg/abi/linux/fuse.go
index 4ef0ab9a75..0ece7b7566 100644
--- a/pkg/abi/linux/fuse.go
+++ b/pkg/abi/linux/fuse.go
@@ -184,6 +184,13 @@ const (
 	FUSE_KERNEL_MINOR_VERSION = 31
 )
 
+// Constants relevant to FUSE operations.
+const (
+	FUSE_NAME_MAX     = 1024
+	FUSE_PAGE_SIZE    = 4096
+	FUSE_DIRENT_ALIGN = 8
+)
+
 // FUSEInitIn is the request sent by the kernel to the daemon,
 // to negotiate the version and flags.
 //
@@ -392,6 +399,36 @@ type FUSEOpenOut struct {
 
 	// OpenFlag for the opened file.
 	OpenFlag uint32
+}
+
+// FUSE_READ flags, consistent with the ones in include/uapi/linux/fuse.h.
+const (
+	FUSE_READ_LOCKOWNER = 1 << 1
+)
+
+// FUSEReadIn is the request sent by the kernel to the daemon
+// for FUSE_READ.
+//
+// +marshal
+type FUSEReadIn struct {
+	// Fh is the file handle in userspace.
+	Fh uint64
+
+	// Offset is the read offset.
+	Offset uint64
+
+	// Size is the number of bytes to read.
+	Size uint32
+
+	// ReadFlags for this FUSE_READ request.
+	// Currently only contains FUSE_READ_LOCKOWNER.
+	ReadFlags uint32
+
+	// LockOwner is the id of the lock owner if there is one.
+	LockOwner uint64
+
+	// Flags for the underlying file.
+	Flags uint32
 
 	_ uint32
 }
diff --git a/pkg/sentry/fsimpl/fuse/BUILD b/pkg/sentry/fsimpl/fuse/BUILD
index 2d9350d57d..a6ee6100d5 100644
--- a/pkg/sentry/fsimpl/fuse/BUILD
+++ b/pkg/sentry/fsimpl/fuse/BUILD
@@ -36,7 +36,9 @@ go_library(
         "fusefs.go",
         "init.go",
         "inode_refs.go",
+        "read_write.go",
         "register.go",
+        "regular_file.go",
         "request_list.go",
     ],
     visibility = ["//pkg/sentry:internal"],
@@ -45,6 +47,7 @@ go_library(
         "//pkg/context",
         "//pkg/log",
         "//pkg/refs",
+        "//pkg/safemem",
         "//pkg/sentry/fsimpl/devtmpfs",
         "//pkg/sentry/fsimpl/kernfs",
         "//pkg/sentry/kernel",
diff --git a/pkg/sentry/fsimpl/fuse/connection.go b/pkg/sentry/fsimpl/fuse/connection.go
index 7d3c30116a..a6525249d5 100644
--- a/pkg/sentry/fsimpl/fuse/connection.go
+++ b/pkg/sentry/fsimpl/fuse/connection.go
@@ -161,6 +161,7 @@ type connection struct {
 	bgLock sync.Mutex
 
 	// maxRead is the maximum size of a read buffer in in bytes.
+	// Initialized from a fuse fs parameter.
 	maxRead uint32
 
 	// maxWrite is the maximum size of a write buffer in bytes.
@@ -206,7 +207,7 @@ type connection struct {
 }
 
 // newFUSEConnection creates a FUSE connection to fd.
-func newFUSEConnection(_ context.Context, fd *vfs.FileDescription, maxInFlightRequests uint64) (*connection, error) {
+func newFUSEConnection(_ context.Context, fd *vfs.FileDescription, opts *filesystemOptions) (*connection, error) {
 	// Mark the device as ready so it can be used. /dev/fuse can only be used if the FD was used to
 	// mount a FUSE filesystem.
 	fuseFD := fd.Impl().(*DeviceFD)
@@ -216,13 +217,14 @@ func newFUSEConnection(_ context.Context, fd *vfs.FileDescription, maxInFlightRe
 	hdrLen := uint32((*linux.FUSEHeaderOut)(nil).SizeBytes())
 	fuseFD.writeBuf = make([]byte, hdrLen)
 	fuseFD.completions = make(map[linux.FUSEOpID]*futureResponse)
-	fuseFD.fullQueueCh = make(chan struct{}, maxInFlightRequests)
+	fuseFD.fullQueueCh = make(chan struct{}, opts.maxActiveRequests)
 	fuseFD.writeCursor = 0
 
 	return &connection{
 		fd:                  fuseFD,
 		maxBackground:       fuseDefaultMaxBackground,
 		congestionThreshold: fuseDefaultCongestionThreshold,
+		maxRead:             opts.maxRead,
 		maxPages:            fuseDefaultMaxPagesPerReq,
 		initializedChan:     make(chan struct{}),
 		connected:           true,
diff --git a/pkg/sentry/fsimpl/fuse/dev.go b/pkg/sentry/fsimpl/fuse/dev.go
index e2de8e0973..fd3592e327 100644
--- a/pkg/sentry/fsimpl/fuse/dev.go
+++ b/pkg/sentry/fsimpl/fuse/dev.go
@@ -401,10 +401,12 @@ func (fd *DeviceFD) sendError(ctx context.Context, errno int32, req *Request) er
 // receiver is going to be waiting on the future channel. This is to be used by:
 // FUSE_INIT.
 func (fd *DeviceFD) noReceiverAction(ctx context.Context, r *Response) error {
-	if r.opcode == linux.FUSE_INIT {
+	switch r.opcode {
+	case linux.FUSE_INIT:
 		creds := auth.CredentialsFromContext(ctx)
 		rootUserNs := kernel.KernelFromContext(ctx).RootUserNamespace()
 		return fd.fs.conn.InitRecv(r, creds.HasCapabilityIn(linux.CAP_SYS_ADMIN, rootUserNs))
+		// TODO(gvisor.dev/issue/3247): support async read: correctly process the response using information from r.options.
 	}
 
 	return nil
diff --git a/pkg/sentry/fsimpl/fuse/fusefs.go b/pkg/sentry/fsimpl/fuse/fusefs.go
index 4dc8ef9935..65e22ba4d3 100644
--- a/pkg/sentry/fsimpl/fuse/fusefs.go
+++ b/pkg/sentry/fsimpl/fuse/fusefs.go
@@ -16,6 +16,7 @@
 package fuse
 
 import (
+	"math"
 	"strconv"
 	"sync/atomic"
 
@@ -58,6 +59,11 @@ type filesystemOptions struct {
 	// exist at any time. Any further requests will block when trying to
 	// Call the server.
 	maxActiveRequests uint64
+
+	// maxRead is the max number of bytes to read,
+	// specified as "max_read" in fs parameters.
+	// If not specified by user, use math.MaxUint32 as default value.
+	maxRead uint32
 }
 
 // filesystem implements vfs.FilesystemImpl.
@@ -144,6 +150,21 @@ func (fsType FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt
 	// Set the maxInFlightRequests option.
 	fsopts.maxActiveRequests = maxActiveRequestsDefault
 
+	if maxReadStr, ok := mopts["max_read"]; ok {
+		delete(mopts, "max_read")
+		maxRead, err := strconv.ParseUint(maxReadStr, 10, 32)
+		if err != nil {
+			log.Warningf("%s.GetFilesystem: invalid max_read: max_read=%s", fsType.Name(), maxReadStr)
+			return nil, nil, syserror.EINVAL
+		}
+		if maxRead < fuseMinMaxRead {
+			maxRead = fuseMinMaxRead
+		}
+		fsopts.maxRead = uint32(maxRead)
+	} else {
+		fsopts.maxRead = math.MaxUint32
+	}
+
 	// Check for unparsed options.
 	if len(mopts) != 0 {
 		log.Warningf("%s.GetFilesystem: unknown options: %v", fsType.Name(), mopts)
@@ -179,7 +200,7 @@ func NewFUSEFilesystem(ctx context.Context, devMinor uint32, opts *filesystemOpt
 		opts:     opts,
 	}
 
-	conn, err := newFUSEConnection(ctx, device, opts.maxActiveRequests)
+	conn, err := newFUSEConnection(ctx, device, opts)
 	if err != nil {
 		log.Warningf("fuse.NewFUSEFilesystem: NewFUSEConnection failed with error: %v", err)
 		return nil, syserror.EINVAL
@@ -244,6 +265,7 @@ func (fs *filesystem) newInode(nodeID uint64, attr linux.FUSEAttr) *kernfs.Dentr
 	i := &inode{fs: fs, NodeID: nodeID}
 	creds := auth.Credentials{EffectiveKGID: auth.KGID(attr.UID), EffectiveKUID: auth.KUID(attr.UID)}
 	i.InodeAttrs.Init(&creds, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), linux.FileMode(attr.Mode))
+	atomic.StoreUint64(&i.size, attr.Size)
 	i.OrderedChildren.Init(kernfs.OrderedChildrenOptions{})
 	i.EnableLeakCheck()
 	i.dentry.Init(i)
@@ -269,10 +291,13 @@ func (i *inode) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentr
 		fd = &(directoryFD.fileDescription)
 		fdImpl = directoryFD
 	} else {
-		// FOPEN_KEEP_CACHE is the defualt flag for noOpen.
-		fd = &fileDescription{OpenFlag: linux.FOPEN_KEEP_CACHE}
-		fdImpl = fd
+		regularFD := &regularFileFD{}
+		fd = &(regularFD.fileDescription)
+		fdImpl = regularFD
 	}
+	// FOPEN_KEEP_CACHE is the defualt flag for noOpen.
+	fd.OpenFlag = linux.FOPEN_KEEP_CACHE
+
 	// Only send open request when FUSE server support open or is opening a directory.
 	if !i.fs.conn.noOpen || isDir {
 		kernelTask := kernel.TaskFromContext(ctx)
@@ -281,21 +306,25 @@ func (i *inode) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentr
 			return nil, syserror.EINVAL
 		}
 
+		// Build the request.
 		var opcode linux.FUSEOpcode
 		if isDir {
 			opcode = linux.FUSE_OPENDIR
 		} else {
 			opcode = linux.FUSE_OPEN
 		}
+
 		in := linux.FUSEOpenIn{Flags: opts.Flags & ^uint32(linux.O_CREAT|linux.O_EXCL|linux.O_NOCTTY)}
 		if !i.fs.conn.atomicOTrunc {
 			in.Flags &= ^uint32(linux.O_TRUNC)
 		}
+
 		req, err := i.fs.conn.NewRequest(auth.CredentialsFromContext(ctx), uint32(kernelTask.ThreadID()), i.NodeID, opcode, &in)
 		if err != nil {
 			return nil, err
 		}
 
+		// Send the request and receive the reply.
 		res, err := i.fs.conn.Call(kernelTask, req)
 		if err != nil {
 			return nil, err
@@ -309,15 +338,17 @@ func (i *inode) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentr
 			if err := res.UnmarshalPayload(&out); err != nil {
 				return nil, err
 			}
+
+			// Process the reply.
 			fd.OpenFlag = out.OpenFlag
+			if isDir {
+				fd.OpenFlag &= ^uint32(linux.FOPEN_DIRECT_IO)
+			}
+
 			fd.Fh = out.Fh
 		}
 	}
 
-	if isDir {
-		fd.OpenFlag &= ^uint32(linux.FOPEN_DIRECT_IO)
-	}
-
 	// TODO(gvisor.dev/issue/3234): invalidate mmap after implemented it for FUSE Inode
 	fd.DirectIO = fd.OpenFlag&linux.FOPEN_DIRECT_IO != 0
 	fdOptions := &vfs.FileDescriptionOptions{}
@@ -457,6 +488,16 @@ func (i *inode) Readlink(ctx context.Context, mnt *vfs.Mount) (string, error) {
 	return i.link, nil
 }
 
+// getFUSEAttr returns a linux.FUSEAttr of this inode stored in local cache.
+// TODO(gvisor.dev/issue/3679): Add support for other fields.
+func (i *inode) getFUSEAttr() linux.FUSEAttr {
+	return linux.FUSEAttr{
+		Ino:  i.Ino(),
+		Size: atomic.LoadUint64(&i.size),
+		Mode: uint32(i.Mode()),
+	}
+}
+
 // statFromFUSEAttr makes attributes from linux.FUSEAttr to linux.Statx. The
 // opts.Sync attribute is ignored since the synchronization is handled by the
 // FUSE server.
@@ -510,47 +551,90 @@ func statFromFUSEAttr(attr linux.FUSEAttr, mask, devMinor uint32) linux.Statx {
 	return stat
 }
 
-// Stat implements kernfs.Inode.Stat.
-func (i *inode) Stat(ctx context.Context, fs *vfs.Filesystem, opts vfs.StatOptions) (linux.Statx, error) {
-	fusefs := fs.Impl().(*filesystem)
-	conn := fusefs.conn
-	task, creds := kernel.TaskFromContext(ctx), auth.CredentialsFromContext(ctx)
+// getAttr gets the attribute of this inode by issuing a FUSE_GETATTR request
+// or read from local cache.
+// It updates the corresponding attributes if necessary.
+func (i *inode) getAttr(ctx context.Context, fs *vfs.Filesystem, opts vfs.StatOptions) (linux.FUSEAttr, error) {
+	attributeVersion := atomic.LoadUint64(&i.fs.conn.attributeVersion)
+
+	// TODO(gvisor.dev/issue/3679): send the request only if
+	// - invalid local cache for fields specified in the opts.Mask
+	// - forced update
+	// - i.attributeTime expired
+	// If local cache is still valid, return local cache.
+	// Currently we always send a request,
+	// and we always set the metadata with the new result,
+	// unless attributeVersion has changed.
+
+	task := kernel.TaskFromContext(ctx)
 	if task == nil {
 		log.Warningf("couldn't get kernel task from context")
-		return linux.Statx{}, syserror.EINVAL
+		return linux.FUSEAttr{}, syserror.EINVAL
 	}
 
+	creds := auth.CredentialsFromContext(ctx)
+
 	var in linux.FUSEGetAttrIn
 	// We don't set any attribute in the request, because in VFS2 fstat(2) will
 	// finally be translated into vfs.FilesystemImpl.StatAt() (see
 	// pkg/sentry/syscalls/linux/vfs2/stat.go), resulting in the same flow
 	// as stat(2). Thus GetAttrFlags and Fh variable will never be used in VFS2.
-	req, err := conn.NewRequest(creds, uint32(task.ThreadID()), i.NodeID, linux.FUSE_GETATTR, &in)
+	req, err := i.fs.conn.NewRequest(creds, uint32(task.ThreadID()), i.NodeID, linux.FUSE_GETATTR, &in)
 	if err != nil {
-		return linux.Statx{}, err
+		return linux.FUSEAttr{}, err
 	}
 
-	res, err := conn.Call(task, req)
+	res, err := i.fs.conn.Call(task, req)
 	if err != nil {
-		return linux.Statx{}, err
+		return linux.FUSEAttr{}, err
 	}
 	if err := res.Error(); err != nil {
-		return linux.Statx{}, err
+		return linux.FUSEAttr{}, err
 	}
 
 	var out linux.FUSEGetAttrOut
 	if err := res.UnmarshalPayload(&out); err != nil {
-		return linux.Statx{}, err
+		return linux.FUSEAttr{}, err
 	}
 
-	// Set all metadata into kernfs.InodeAttrs.
+	// Local version is newer, return the local one.
+	// Skip the update.
+	if attributeVersion != 0 && atomic.LoadUint64(&i.attributeVersion) > attributeVersion {
+		return i.getFUSEAttr(), nil
+	}
+
+	// Set the metadata of kernfs.InodeAttrs.
 	if err := i.SetStat(ctx, fs, creds, vfs.SetStatOptions{
-		Stat: statFromFUSEAttr(out.Attr, linux.STATX_ALL, fusefs.devMinor),
+		Stat: statFromFUSEAttr(out.Attr, linux.STATX_ALL, i.fs.devMinor),
 	}); err != nil {
+		return linux.FUSEAttr{}, err
+	}
+
+	// Set the size if no error (after SetStat() check).
+	atomic.StoreUint64(&i.size, out.Attr.Size)
+
+	return out.Attr, nil
+}
+
+// reviseAttr attempts to update the attributes for internal purposes
+// by calling getAttr with a pre-specified mask.
+// Used by read, write, lseek.
+func (i *inode) reviseAttr(ctx context.Context) error {
+	// Never need atime for internal purposes.
+	_, err := i.getAttr(ctx, i.fs.VFSFilesystem(), vfs.StatOptions{
+		Mask: linux.STATX_BASIC_STATS &^ linux.STATX_ATIME,
+	})
+	return err
+}
+
+// Stat implements kernfs.Inode.Stat.
+func (i *inode) Stat(ctx context.Context, fs *vfs.Filesystem, opts vfs.StatOptions) (linux.Statx, error) {
+	attr, err := i.getAttr(ctx, fs, opts)
+	if err != nil {
 		return linux.Statx{}, err
 	}
 
-	return statFromFUSEAttr(out.Attr, opts.Mask, fusefs.devMinor), nil
+	return statFromFUSEAttr(attr, opts.Mask, i.fs.devMinor), nil
 }
 
 // DecRef implements kernfs.Inode.
diff --git a/pkg/sentry/fsimpl/fuse/init.go b/pkg/sentry/fsimpl/fuse/init.go
index 779c2bd3f1..2ff2542b62 100644
--- a/pkg/sentry/fsimpl/fuse/init.go
+++ b/pkg/sentry/fsimpl/fuse/init.go
@@ -29,9 +29,10 @@ const (
 	// Follow the same behavior as unix fuse implementation.
 	fuseMaxTimeGranNs = 1000000000
 
-	// Minimum value for MaxWrite.
+	// Minimum value for MaxWrite and MaxRead.
 	// Follow the same behavior as unix fuse implementation.
 	fuseMinMaxWrite = 4096
+	fuseMinMaxRead  = 4096
 
 	// Temporary default value for max readahead, 128kb.
 	fuseDefaultMaxReadahead = 131072
diff --git a/pkg/sentry/fsimpl/fuse/read_write.go b/pkg/sentry/fsimpl/fuse/read_write.go
new file mode 100644
index 0000000000..4ef8531dc6
--- /dev/null
+++ b/pkg/sentry/fsimpl/fuse/read_write.go
@@ -0,0 +1,152 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package fuse
+
+import (
+	"io"
+	"sync/atomic"
+
+	"gvisor.dev/gvisor/pkg/abi/linux"
+	"gvisor.dev/gvisor/pkg/context"
+	"gvisor.dev/gvisor/pkg/log"
+	"gvisor.dev/gvisor/pkg/sentry/kernel"
+	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+	"gvisor.dev/gvisor/pkg/syserror"
+	"gvisor.dev/gvisor/pkg/usermem"
+)
+
+// ReadInPages sends FUSE_READ requests for the size after round it up to
+// a multiple of page size, blocks on it for reply, processes the reply
+// and returns the payload (or joined payloads) as a byte slice.
+// This is used for the general purpose reading.
+// We do not support direct IO (which read the exact number of bytes)
+// at this moment.
+func (fs *filesystem) ReadInPages(ctx context.Context, fd *regularFileFD, off uint64, size uint32) ([][]byte, uint32, error) {
+	attributeVersion := atomic.LoadUint64(&fs.conn.attributeVersion)
+
+	t := kernel.TaskFromContext(ctx)
+	if t == nil {
+		log.Warningf("fusefs.Read: couldn't get kernel task from context")
+		return nil, 0, syserror.EINVAL
+	}
+
+	// Round up to a multiple of page size.
+	readSize, _ := usermem.PageRoundUp(uint64(size))
+
+	// One request cannnot exceed either maxRead or maxPages.
+	maxPages := fs.conn.maxRead >> usermem.PageShift
+	if maxPages > uint32(fs.conn.maxPages) {
+		maxPages = uint32(fs.conn.maxPages)
+	}
+
+	var outs [][]byte
+	var sizeRead uint32
+
+	// readSize is a multiple of usermem.PageSize.
+	// Always request bytes as a multiple of pages.
+	pagesRead, pagesToRead := uint32(0), uint32(readSize>>usermem.PageShift)
+
+	// Reuse the same struct for unmarshalling to avoid unnecessary memory allocation.
+	in := linux.FUSEReadIn{
+		Fh:        fd.Fh,
+		LockOwner: 0, // TODO(gvisor.dev/issue/3245): file lock
+		ReadFlags: 0, // TODO(gvisor.dev/issue/3245): |= linux.FUSE_READ_LOCKOWNER
+		Flags:     fd.statusFlags(),
+	}
+
+	// This loop is intended for fragmented read where the bytes to read is
+	// larger than either the maxPages or maxRead.
+	// For the majority of reads with normal size, this loop should only
+	// execute once.
+	for pagesRead < pagesToRead {
+		pagesCanRead := pagesToRead - pagesRead
+		if pagesCanRead > maxPages {
+			pagesCanRead = maxPages
+		}
+
+		in.Offset = off + (uint64(pagesRead) << usermem.PageShift)
+		in.Size = pagesCanRead << usermem.PageShift
+
+		req, err := fs.conn.NewRequest(auth.CredentialsFromContext(ctx), uint32(t.ThreadID()), fd.inode().NodeID, linux.FUSE_READ, &in)
+		if err != nil {
+			return nil, 0, err
+		}
+
+		// TODO(gvisor.dev/issue/3247): support async read.
+
+		res, err := fs.conn.Call(t, req)
+		if err != nil {
+			return nil, 0, err
+		}
+		if err := res.Error(); err != nil {
+			return nil, 0, err
+		}
+
+		// Not enough bytes in response,
+		// either we reached EOF,
+		// or the FUSE server sends back a response
+		// that cannot even fit the hdr.
+		if len(res.data) <= res.hdr.SizeBytes() {
+			// We treat both case as EOF here for now
+			// since there is no reliable way to detect
+			// the over-short hdr case.
+			break
+		}
+
+		// Directly using the slice to avoid extra copy.
+		out := res.data[res.hdr.SizeBytes():]
+
+		outs = append(outs, out)
+		sizeRead += uint32(len(out))
+
+		pagesRead += pagesCanRead
+	}
+
+	defer fs.ReadCallback(ctx, fd, off, size, sizeRead, attributeVersion)
+
+	// No bytes returned: offset >= EOF.
+	if len(outs) == 0 {
+		return nil, 0, io.EOF
+	}
+
+	return outs, sizeRead, nil
+}
+
+// ReadCallback updates several information after receiving a read response.
+// Due to readahead, sizeRead can be larger than size.
+func (fs *filesystem) ReadCallback(ctx context.Context, fd *regularFileFD, off uint64, size uint32, sizeRead uint32, attributeVersion uint64) {
+	// TODO(gvisor.dev/issue/3247): support async read.
+	// If this is called by an async read, correctly process it.
+	// May need to update the signature.
+
+	i := fd.inode()
+	// TODO(gvisor.dev/issue/1193): Invalidate or update atime.
+
+	// Reached EOF.
+	if sizeRead < size {
+		// TODO(gvisor.dev/issue/3630): If we have writeback cache, then we need to fill this hole.
+		// Might need to update the buf to be returned from the Read().
+
+		// Update existing size.
+		newSize := off + uint64(sizeRead)
+		fs.conn.mu.Lock()
+		if attributeVersion == i.attributeVersion && newSize < atomic.LoadUint64(&i.size) {
+			fs.conn.attributeVersion++
+			i.attributeVersion = i.fs.conn.attributeVersion
+			atomic.StoreUint64(&i.size, newSize)
+		}
+		fs.conn.mu.Unlock()
+	}
+}
diff --git a/pkg/sentry/fsimpl/fuse/regular_file.go b/pkg/sentry/fsimpl/fuse/regular_file.go
new file mode 100644
index 0000000000..37ce4e2687
--- /dev/null
+++ b/pkg/sentry/fsimpl/fuse/regular_file.go
@@ -0,0 +1,125 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package fuse
+
+import (
+	"io"
+	"math"
+	"sync"
+	"sync/atomic"
+
+	"gvisor.dev/gvisor/pkg/abi/linux"
+	"gvisor.dev/gvisor/pkg/context"
+	"gvisor.dev/gvisor/pkg/sentry/vfs"
+	"gvisor.dev/gvisor/pkg/syserror"
+	"gvisor.dev/gvisor/pkg/usermem"
+)
+
+type regularFileFD struct {
+	fileDescription
+
+	// off is the file offset.
+	off int64
+	// offMu protects off.
+	offMu sync.Mutex
+}
+
+// PRead implements vfs.FileDescriptionImpl.PRead.
+func (fd *regularFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) {
+	if offset < 0 {
+		return 0, syserror.EINVAL
+	}
+
+	// Check that flags are supported.
+	//
+	// TODO(gvisor.dev/issue/2601): Support select preadv2 flags.
+	if opts.Flags&^linux.RWF_HIPRI != 0 {
+		return 0, syserror.EOPNOTSUPP
+	}
+
+	size := dst.NumBytes()
+	if size == 0 {
+		// Early return if count is 0.
+		return 0, nil
+	} else if size > math.MaxUint32 {
+		// FUSE only supports uint32 for size.
+		// Overflow.
+		return 0, syserror.EINVAL
+	}
+
+	// TODO(gvisor.dev/issue/3678): Add direct IO support.
+
+	inode := fd.inode()
+
+	// Reading beyond EOF, update file size if outdated.
+	if uint64(offset+size) > atomic.LoadUint64(&inode.size) {
+		if err := inode.reviseAttr(ctx); err != nil {
+			return 0, err
+		}
+		// If the offset after update is still too large, return error.
+		if uint64(offset) >= atomic.LoadUint64(&inode.size) {
+			return 0, io.EOF
+		}
+	}
+
+	// Truncate the read with updated file size.
+	fileSize := atomic.LoadUint64(&inode.size)
+	if uint64(offset+size) > fileSize {
+		size = int64(fileSize) - offset
+	}
+
+	buffers, n, err := inode.fs.ReadInPages(ctx, fd, uint64(offset), uint32(size))
+	if err != nil {
+		return 0, err
+	}
+
+	// TODO(gvisor.dev/issue/3237): support indirect IO (e.g. caching),
+	// store the bytes that were read ahead.
+
+	// Update the number of bytes to copy for short read.
+	if n < uint32(size) {
+		size = int64(n)
+	}
+
+	// Copy the bytes read to the dst.
+	// This loop is intended for fragmented reads.
+	// For the majority of reads, this loop only execute once.
+	var copied int64
+	for _, buffer := range buffers {
+		toCopy := int64(len(buffer))
+		if copied+toCopy > size {
+			toCopy = size - copied
+		}
+		cp, err := dst.DropFirst64(copied).CopyOut(ctx, buffer[:toCopy])
+		if err != nil {
+			return 0, err
+		}
+		if int64(cp) != toCopy {
+			return 0, syserror.EIO
+		}
+		copied += toCopy
+	}
+
+	return copied, nil
+}
+
+// Read implements vfs.FileDescriptionImpl.Read.
+func (fd *regularFileFD) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) {
+	fd.offMu.Lock()
+	n, err := fd.PRead(ctx, dst, fd.off, opts)
+	fd.off += n
+	fd.offMu.Unlock()
+	return n, err
+}
diff --git a/test/fuse/BUILD b/test/fuse/BUILD
index 8bde81e3ca..cae51ce490 100644
--- a/test/fuse/BUILD
+++ b/test/fuse/BUILD
@@ -36,3 +36,8 @@ syscall_test(
     fuse = "True",
     test = "//test/fuse/linux:mkdir_test",
 )
+
+syscall_test(
+    fuse = "True",
+    test = "//test/fuse/linux:read_test",
+)
diff --git a/test/fuse/linux/BUILD b/test/fuse/linux/BUILD
index 298ea11f8e..8afd28f16c 100644
--- a/test/fuse/linux/BUILD
+++ b/test/fuse/linux/BUILD
@@ -112,3 +112,16 @@ cc_library(
         "@com_google_absl//absl/strings:str_format",
     ],
 )
+
+cc_binary(
+    name = "read_test",
+    testonly = 1,
+    srcs = ["read_test.cc"],
+    deps = [
+        gtest,
+        ":fuse_base",
+        "//test/util:fuse_util",
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
\ No newline at end of file
diff --git a/test/fuse/linux/fuse_base.cc b/test/fuse/linux/fuse_base.cc
index 98b4e1466b..e3c6b585cc 100644
--- a/test/fuse/linux/fuse_base.cc
+++ b/test/fuse/linux/fuse_base.cc
@@ -129,7 +129,8 @@ void FuseTest::SkipServerActualRequest() {
 
 // Sends the `kSetInodeLookup` command, expected mode, and the path of the
 // inode to create under the mount point.
-void FuseTest::SetServerInodeLookup(const std::string& path, mode_t mode) {
+void FuseTest::SetServerInodeLookup(const std::string& path, mode_t mode,
+                                    uint64_t size) {
   uint32_t cmd = static_cast<uint32_t>(FuseTestCmd::kSetInodeLookup);
   EXPECT_THAT(RetryEINTR(write)(sock_[0], &cmd, sizeof(cmd)),
               SyscallSucceedsWithValue(sizeof(cmd)));
@@ -137,6 +138,9 @@ void FuseTest::SetServerInodeLookup(const std::string& path, mode_t mode) {
   EXPECT_THAT(RetryEINTR(write)(sock_[0], &mode, sizeof(mode)),
               SyscallSucceedsWithValue(sizeof(mode)));
 
+  EXPECT_THAT(RetryEINTR(write)(sock_[0], &size, sizeof(size)),
+              SyscallSucceedsWithValue(sizeof(size)));
+
   // Pad 1 byte for null-terminate c-string.
   EXPECT_THAT(RetryEINTR(write)(sock_[0], path.c_str(), path.size() + 1),
               SyscallSucceedsWithValue(path.size() + 1));
@@ -144,10 +148,10 @@ void FuseTest::SetServerInodeLookup(const std::string& path, mode_t mode) {
   WaitServerComplete();
 }
 
-void FuseTest::MountFuse() {
+void FuseTest::MountFuse(const char* mountOpts) {
   EXPECT_THAT(dev_fd_ = open("/dev/fuse", O_RDWR), SyscallSucceeds());
 
-  std::string mount_opts = absl::StrFormat("fd=%d,%s", dev_fd_, kMountOpts);
+  std::string mount_opts = absl::StrFormat("fd=%d,%s", dev_fd_, mountOpts);
   mount_point_ = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
   EXPECT_THAT(mount("fuse", mount_point_.path().c_str(), "fuse",
                     MS_NODEV | MS_NOSUID, mount_opts.c_str()),
@@ -311,11 +315,15 @@ void FuseTest::ServerHandleCommand() {
 // request with this specific path comes in.
 void FuseTest::ServerReceiveInodeLookup() {
   mode_t mode;
+  uint64_t size;
   std::vector<char> buf(FUSE_MIN_READ_BUFFER);
 
   EXPECT_THAT(RetryEINTR(read)(sock_[1], &mode, sizeof(mode)),
               SyscallSucceedsWithValue(sizeof(mode)));
 
+  EXPECT_THAT(RetryEINTR(read)(sock_[1], &size, sizeof(size)),
+              SyscallSucceedsWithValue(sizeof(size)));
+
   EXPECT_THAT(RetryEINTR(read)(sock_[1], buf.data(), buf.size()),
               SyscallSucceeds());
 
@@ -332,6 +340,9 @@ void FuseTest::ServerReceiveInodeLookup() {
   // comply with the unqiueness of different path.
   ++nodeid_;
 
+  // Set the size.
+  out_payload.attr.size = size;
+
   memcpy(buf.data(), &out_header, sizeof(out_header));
   memcpy(buf.data() + sizeof(out_header), &out_payload, sizeof(out_payload));
   lookups_.AddMemBlock(FUSE_LOOKUP, buf.data(), out_len);
diff --git a/test/fuse/linux/fuse_base.h b/test/fuse/linux/fuse_base.h
index ff4c4499df..452748d6de 100644
--- a/test/fuse/linux/fuse_base.h
+++ b/test/fuse/linux/fuse_base.h
@@ -137,7 +137,8 @@ class FuseTest : public ::testing::Test {
   // path, pretending there is an inode and avoid ENOENT when testing. If mode
   // is not given, it creates a regular file with mode 0600.
   void SetServerInodeLookup(const std::string& path,
-                            mode_t mode = S_IFREG | S_IRUSR | S_IWUSR);
+                            mode_t mode = S_IFREG | S_IRUSR | S_IWUSR,
+                            uint64_t size = 512);
 
   // Called by the testing thread to ask the FUSE server for its next received
   // FUSE request. Be sure to use the corresponding struct of iovec to receive
@@ -166,16 +167,16 @@ class FuseTest : public ::testing::Test {
  protected:
   TempPath mount_point_;
 
-  // Unmounts the mountpoint of the FUSE server.
-  void UnmountFuse();
-
- private:
   // Opens /dev/fuse and inherit the file descriptor for the FUSE server.
-  void MountFuse();
+  void MountFuse(const char* mountOpts = kMountOpts);
 
   // Creates a socketpair for communication and forks FUSE server.
   void SetUpFuseServer();
 
+  // Unmounts the mountpoint of the FUSE server.
+  void UnmountFuse();
+
+ private:
   // Sends a FuseTestCmd and gets a uint32_t data from the FUSE server.
   inline uint32_t GetServerData(uint32_t cmd);
 
diff --git a/test/fuse/linux/read_test.cc b/test/fuse/linux/read_test.cc
new file mode 100644
index 0000000000..c702651bda
--- /dev/null
+++ b/test/fuse/linux/read_test.cc
@@ -0,0 +1,390 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/fuse.h>
+#include <sys/stat.h>
+#include <sys/statfs.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <string>
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "test/fuse/linux/fuse_base.h"
+#include "test/util/fuse_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+class ReadTest : public FuseTest {
+  void SetUp() override {
+    FuseTest::SetUp();
+    test_file_path_ = JoinPath(mount_point_.path().c_str(), test_file_);
+  }
+
+  // TearDown overrides the parent's function
+  // to skip checking the unconsumed release request at the end.
+  void TearDown() override { UnmountFuse(); }
+
+ protected:
+  const std::string test_file_ = "test_file";
+  const mode_t test_file_mode_ = S_IFREG | S_IRWXU | S_IRWXG | S_IRWXO;
+  const uint64_t test_fh_ = 1;
+  const uint32_t open_flag_ = O_RDWR;
+
+  std::string test_file_path_;
+
+  PosixErrorOr<FileDescriptor> OpenTestFile(const std::string &path,
+                                            uint64_t size = 512) {
+    SetServerInodeLookup(test_file_, test_file_mode_, size);
+
+    struct fuse_out_header out_header_open = {
+        .len = sizeof(struct fuse_out_header) + sizeof(struct fuse_open_out),
+    };
+    struct fuse_open_out out_payload_open = {
+        .fh = test_fh_,
+        .open_flags = open_flag_,
+    };
+    auto iov_out_open = FuseGenerateIovecs(out_header_open, out_payload_open);
+    SetServerResponse(FUSE_OPEN, iov_out_open);
+
+    auto res = Open(path.c_str(), open_flag_);
+    if (res.ok()) {
+      SkipServerActualRequest();
+    }
+    return res;
+  }
+};
+
+class ReadTestSmallMaxRead : public ReadTest {
+  void SetUp() override {
+    MountFuse(mountOpts);
+    SetUpFuseServer();
+    test_file_path_ = JoinPath(mount_point_.path().c_str(), test_file_);
+  }
+
+ protected:
+  constexpr static char mountOpts[] =
+      "rootmode=755,user_id=0,group_id=0,max_read=4096";
+  // 4096 is hard-coded as the max_read in mount options.
+  const int size_fragment = 4096;
+};
+
+TEST_F(ReadTest, ReadWhole) {
+  auto fd = ASSERT_NO_ERRNO_AND_VALUE(OpenTestFile(test_file_path_));
+
+  // Prepare for the read.
+  const int n_read = 5;
+  std::vector<char> data(n_read);
+  RandomizeBuffer(data.data(), data.size());
+  struct fuse_out_header out_header_read = {
+      .len =
+          static_cast<uint32_t>(sizeof(struct fuse_out_header) + data.size()),
+  };
+  auto iov_out_read = FuseGenerateIovecs(out_header_read, data);
+  SetServerResponse(FUSE_READ, iov_out_read);
+
+  // Read the whole "file".
+  std::vector<char> buf(n_read);
+  EXPECT_THAT(read(fd.get(), buf.data(), n_read),
+              SyscallSucceedsWithValue(n_read));
+
+  // Check the read request.
+  struct fuse_in_header in_header_read;
+  struct fuse_read_in in_payload_read;
+  auto iov_in = FuseGenerateIovecs(in_header_read, in_payload_read);
+  GetServerActualRequest(iov_in);
+
+  EXPECT_EQ(in_payload_read.fh, test_fh_);
+  EXPECT_EQ(in_header_read.len,
+            sizeof(in_header_read) + sizeof(in_payload_read));
+  EXPECT_EQ(in_header_read.opcode, FUSE_READ);
+  EXPECT_EQ(in_payload_read.offset, 0);
+  EXPECT_EQ(buf, data);
+}
+
+TEST_F(ReadTest, ReadPartial) {
+  auto fd = ASSERT_NO_ERRNO_AND_VALUE(OpenTestFile(test_file_path_));
+
+  // Prepare for the read.
+  const int n_data = 10;
+  std::vector<char> data(n_data);
+  RandomizeBuffer(data.data(), data.size());
+  // Note: due to read ahead, current read implementation will treat any
+  // response that is longer than requested as correct (i.e. not reach the EOF).
+  // Therefore, the test below should make sure the size to read does not exceed
+  // n_data.
+  struct fuse_out_header out_header_read = {
+      .len =
+          static_cast<uint32_t>(sizeof(struct fuse_out_header) + data.size()),
+  };
+  auto iov_out_read = FuseGenerateIovecs(out_header_read, data);
+  struct fuse_in_header in_header_read;
+  struct fuse_read_in in_payload_read;
+  auto iov_in = FuseGenerateIovecs(in_header_read, in_payload_read);
+
+  std::vector<char> buf(n_data);
+
+  // Read 1 bytes.
+  SetServerResponse(FUSE_READ, iov_out_read);
+  EXPECT_THAT(read(fd.get(), buf.data(), 1), SyscallSucceedsWithValue(1));
+
+  // Check the 1-byte read request.
+  GetServerActualRequest(iov_in);
+  EXPECT_EQ(in_payload_read.fh, test_fh_);
+  EXPECT_EQ(in_header_read.len,
+            sizeof(in_header_read) + sizeof(in_payload_read));
+  EXPECT_EQ(in_header_read.opcode, FUSE_READ);
+  EXPECT_EQ(in_payload_read.offset, 0);
+
+  // Read 3 bytes.
+  SetServerResponse(FUSE_READ, iov_out_read);
+  EXPECT_THAT(read(fd.get(), buf.data(), 3), SyscallSucceedsWithValue(3));
+
+  // Check the 3-byte read request.
+  GetServerActualRequest(iov_in);
+  EXPECT_EQ(in_payload_read.fh, test_fh_);
+  EXPECT_EQ(in_payload_read.offset, 1);
+
+  // Read 5 bytes.
+  SetServerResponse(FUSE_READ, iov_out_read);
+  EXPECT_THAT(read(fd.get(), buf.data(), 5), SyscallSucceedsWithValue(5));
+
+  // Check the 5-byte read request.
+  GetServerActualRequest(iov_in);
+  EXPECT_EQ(in_payload_read.fh, test_fh_);
+  EXPECT_EQ(in_payload_read.offset, 4);
+}
+
+TEST_F(ReadTest, PRead) {
+  const int file_size = 512;
+  auto fd = ASSERT_NO_ERRNO_AND_VALUE(OpenTestFile(test_file_path_, file_size));
+
+  // Prepare for the read.
+  const int n_read = 5;
+  std::vector<char> data(n_read);
+  RandomizeBuffer(data.data(), data.size());
+  struct fuse_out_header out_header_read = {
+      .len =
+          static_cast<uint32_t>(sizeof(struct fuse_out_header) + data.size()),
+  };
+  auto iov_out_read = FuseGenerateIovecs(out_header_read, data);
+  SetServerResponse(FUSE_READ, iov_out_read);
+
+  // Read some bytes.
+  std::vector<char> buf(n_read);
+  const int offset_read = file_size >> 1;
+  EXPECT_THAT(pread(fd.get(), buf.data(), n_read, offset_read),
+              SyscallSucceedsWithValue(n_read));
+
+  // Check the read request.
+  struct fuse_in_header in_header_read;
+  struct fuse_read_in in_payload_read;
+  auto iov_in = FuseGenerateIovecs(in_header_read, in_payload_read);
+  GetServerActualRequest(iov_in);
+
+  EXPECT_EQ(in_payload_read.fh, test_fh_);
+  EXPECT_EQ(in_header_read.len,
+            sizeof(in_header_read) + sizeof(in_payload_read));
+  EXPECT_EQ(in_header_read.opcode, FUSE_READ);
+  EXPECT_EQ(in_payload_read.offset, offset_read);
+  EXPECT_EQ(buf, data);
+}
+
+TEST_F(ReadTest, ReadZero) {
+  auto fd = ASSERT_NO_ERRNO_AND_VALUE(OpenTestFile(test_file_path_));
+
+  // Issue the read.
+  std::vector<char> buf;
+  EXPECT_THAT(read(fd.get(), buf.data(), 0), SyscallSucceedsWithValue(0));
+}
+
+TEST_F(ReadTest, ReadShort) {
+  auto fd = ASSERT_NO_ERRNO_AND_VALUE(OpenTestFile(test_file_path_));
+
+  // Prepare for the short read.
+  const int n_read = 5;
+  std::vector<char> data(n_read >> 1);
+  RandomizeBuffer(data.data(), data.size());
+  struct fuse_out_header out_header_read = {
+      .len =
+          static_cast<uint32_t>(sizeof(struct fuse_out_header) + data.size()),
+  };
+  auto iov_out_read = FuseGenerateIovecs(out_header_read, data);
+  SetServerResponse(FUSE_READ, iov_out_read);
+
+  // Read the whole "file".
+  std::vector<char> buf(n_read);
+  EXPECT_THAT(read(fd.get(), buf.data(), n_read),
+              SyscallSucceedsWithValue(data.size()));
+
+  // Check the read request.
+  struct fuse_in_header in_header_read;
+  struct fuse_read_in in_payload_read;
+  auto iov_in = FuseGenerateIovecs(in_header_read, in_payload_read);
+  GetServerActualRequest(iov_in);
+
+  EXPECT_EQ(in_payload_read.fh, test_fh_);
+  EXPECT_EQ(in_header_read.len,
+            sizeof(in_header_read) + sizeof(in_payload_read));
+  EXPECT_EQ(in_header_read.opcode, FUSE_READ);
+  EXPECT_EQ(in_payload_read.offset, 0);
+  std::vector<char> short_buf(buf.begin(), buf.begin() + data.size());
+  EXPECT_EQ(short_buf, data);
+}
+
+TEST_F(ReadTest, ReadShortEOF) {
+  auto fd = ASSERT_NO_ERRNO_AND_VALUE(OpenTestFile(test_file_path_));
+
+  // Prepare for the short read.
+  struct fuse_out_header out_header_read = {
+      .len = static_cast<uint32_t>(sizeof(struct fuse_out_header)),
+  };
+  auto iov_out_read = FuseGenerateIovecs(out_header_read);
+  SetServerResponse(FUSE_READ, iov_out_read);
+
+  // Read the whole "file".
+  const int n_read = 10;
+  std::vector<char> buf(n_read);
+  EXPECT_THAT(read(fd.get(), buf.data(), n_read), SyscallSucceedsWithValue(0));
+
+  // Check the read request.
+  struct fuse_in_header in_header_read;
+  struct fuse_read_in in_payload_read;
+  auto iov_in = FuseGenerateIovecs(in_header_read, in_payload_read);
+  GetServerActualRequest(iov_in);
+
+  EXPECT_EQ(in_payload_read.fh, test_fh_);
+  EXPECT_EQ(in_header_read.len,
+            sizeof(in_header_read) + sizeof(in_payload_read));
+  EXPECT_EQ(in_header_read.opcode, FUSE_READ);
+  EXPECT_EQ(in_payload_read.offset, 0);
+}
+
+TEST_F(ReadTestSmallMaxRead, ReadSmallMaxRead) {
+  const int n_fragment = 10;
+  const int n_read = size_fragment * n_fragment;
+
+  auto fd = ASSERT_NO_ERRNO_AND_VALUE(OpenTestFile(test_file_path_, n_read));
+
+  // Prepare for the read.
+  std::vector<char> data(size_fragment);
+  RandomizeBuffer(data.data(), data.size());
+  struct fuse_out_header out_header_read = {
+      .len =
+          static_cast<uint32_t>(sizeof(struct fuse_out_header) + data.size()),
+  };
+  auto iov_out_read = FuseGenerateIovecs(out_header_read, data);
+
+  for (int i = 0; i < n_fragment; ++i) {
+    SetServerResponse(FUSE_READ, iov_out_read);
+  }
+
+  // Read the whole "file".
+  std::vector<char> buf(n_read);
+  EXPECT_THAT(read(fd.get(), buf.data(), n_read),
+              SyscallSucceedsWithValue(n_read));
+
+  ASSERT_EQ(GetServerNumUnsentResponses(), 0);
+  ASSERT_EQ(GetServerNumUnconsumedRequests(), n_fragment);
+
+  // Check each read segment.
+  struct fuse_in_header in_header_read;
+  struct fuse_read_in in_payload_read;
+  auto iov_in = FuseGenerateIovecs(in_header_read, in_payload_read);
+
+  for (int i = 0; i < n_fragment; ++i) {
+    GetServerActualRequest(iov_in);
+    EXPECT_EQ(in_payload_read.fh, test_fh_);
+    EXPECT_EQ(in_header_read.len,
+              sizeof(in_header_read) + sizeof(in_payload_read));
+    EXPECT_EQ(in_header_read.opcode, FUSE_READ);
+    EXPECT_EQ(in_payload_read.offset, i * size_fragment);
+    EXPECT_EQ(in_payload_read.size, size_fragment);
+
+    auto it = buf.begin() + i * size_fragment;
+    EXPECT_EQ(std::vector<char>(it, it + size_fragment), data);
+  }
+}
+
+TEST_F(ReadTestSmallMaxRead, ReadSmallMaxReadShort) {
+  const int n_fragment = 10;
+  const int n_read = size_fragment * n_fragment;
+
+  auto fd = ASSERT_NO_ERRNO_AND_VALUE(OpenTestFile(test_file_path_, n_read));
+
+  // Prepare for the read.
+  std::vector<char> data(size_fragment);
+  RandomizeBuffer(data.data(), data.size());
+  struct fuse_out_header out_header_read = {
+      .len =
+          static_cast<uint32_t>(sizeof(struct fuse_out_header) + data.size()),
+  };
+  auto iov_out_read = FuseGenerateIovecs(out_header_read, data);
+
+  for (int i = 0; i < n_fragment - 1; ++i) {
+    SetServerResponse(FUSE_READ, iov_out_read);
+  }
+
+  // The last fragment is a short read.
+  std::vector<char> half_data(data.begin(), data.begin() + (data.size() >> 1));
+  struct fuse_out_header out_header_read_short = {
+      .len = static_cast<uint32_t>(sizeof(struct fuse_out_header) +
+                                   half_data.size()),
+  };
+  auto iov_out_read_short =
+      FuseGenerateIovecs(out_header_read_short, half_data);
+  SetServerResponse(FUSE_READ, iov_out_read_short);
+
+  // Read the whole "file".
+  std::vector<char> buf(n_read);
+  EXPECT_THAT(read(fd.get(), buf.data(), n_read),
+              SyscallSucceedsWithValue(n_read - (data.size() >> 1)));
+
+  ASSERT_EQ(GetServerNumUnsentResponses(), 0);
+  ASSERT_EQ(GetServerNumUnconsumedRequests(), n_fragment);
+
+  // Check each read segment.
+  struct fuse_in_header in_header_read;
+  struct fuse_read_in in_payload_read;
+  auto iov_in = FuseGenerateIovecs(in_header_read, in_payload_read);
+
+  for (int i = 0; i < n_fragment; ++i) {
+    GetServerActualRequest(iov_in);
+    EXPECT_EQ(in_payload_read.fh, test_fh_);
+    EXPECT_EQ(in_header_read.len,
+              sizeof(in_header_read) + sizeof(in_payload_read));
+    EXPECT_EQ(in_header_read.opcode, FUSE_READ);
+    EXPECT_EQ(in_payload_read.offset, i * size_fragment);
+    EXPECT_EQ(in_payload_read.size, size_fragment);
+
+    auto it = buf.begin() + i * size_fragment;
+    if (i != n_fragment - 1) {
+      EXPECT_EQ(std::vector<char>(it, it + data.size()), data);
+    } else {
+      EXPECT_EQ(std::vector<char>(it, it + half_data.size()), half_data);
+    }
+  }
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
\ No newline at end of file

From a94377620401aee2b3e37d16f90054f7ddc756da Mon Sep 17 00:00:00 2001
From: Ridwan Sharif <ridwanmsharif@google.com>
Date: Tue, 11 Aug 2020 12:13:01 -0400
Subject: [PATCH 180/211] Implement FUSE_RMDIR

Fixes #3587

Co-authored-by: Craig Chi <craigchi@google.com>
---
 pkg/abi/linux/fuse.go                  | 26 +++++++++
 pkg/sentry/fsimpl/fuse/fusefs.go       | 27 ++++++++++
 pkg/sentry/fsimpl/kernfs/filesystem.go |  7 ++-
 test/fuse/BUILD                        |  6 +++
 test/fuse/linux/BUILD                  | 14 +++++
 test/fuse/linux/rmdir_test.cc          | 73 ++++++++++++++++++++++++++
 6 files changed, 152 insertions(+), 1 deletion(-)
 create mode 100644 test/fuse/linux/rmdir_test.cc

diff --git a/pkg/abi/linux/fuse.go b/pkg/abi/linux/fuse.go
index 0ece7b7566..c75debb8cb 100644
--- a/pkg/abi/linux/fuse.go
+++ b/pkg/abi/linux/fuse.go
@@ -569,3 +569,29 @@ func (r *FUSEMkdirIn) MarshalUnsafe(buf []byte) {
 func (r *FUSEMkdirIn) SizeBytes() int {
 	return r.MkdirMeta.SizeBytes() + len(r.Name) + 1
 }
+
+// FUSERmDirIn is the request sent by the kernel to the daemon
+// when trying to remove a directory.
+//
+// Dynamically-sized objects cannot be marshalled.
+type FUSERmDirIn struct {
+	marshal.StubMarshallable
+
+	// Name is a directory name to be looked up.
+	Name string
+}
+
+// MarshalUnsafe serializes r.name to the dst buffer.
+func (r *FUSERmDirIn) MarshalUnsafe(buf []byte) {
+	copy(buf, r.Name)
+}
+
+// SizeBytes is the size of the memory representation of FUSERmDirIn.
+func (r *FUSERmDirIn) SizeBytes() int {
+	return len(r.Name) + 1
+}
+
+// UnmarshalUnsafe deserializes r.name from the src buffer.
+func (r *FUSERmDirIn) UnmarshalUnsafe(src []byte) {
+	r.Name = string(src)
+}
diff --git a/pkg/sentry/fsimpl/fuse/fusefs.go b/pkg/sentry/fsimpl/fuse/fusefs.go
index 65e22ba4d3..c55ea927a9 100644
--- a/pkg/sentry/fsimpl/fuse/fusefs.go
+++ b/pkg/sentry/fsimpl/fuse/fusefs.go
@@ -426,6 +426,33 @@ func (i *inode) NewDir(ctx context.Context, name string, opts vfs.MkdirOptions)
 	return i.newEntry(ctx, name, linux.S_IFDIR, linux.FUSE_MKDIR, &in)
 }
 
+// RmDir implements kernfs.Inode.RmDir.
+func (i *inode) RmDir(ctx context.Context, name string, child *vfs.Dentry) error {
+	fusefs := i.fs
+	task, creds := kernel.TaskFromContext(ctx), auth.CredentialsFromContext(ctx)
+
+	in := linux.FUSERmDirIn{Name: name}
+	req, err := fusefs.conn.NewRequest(creds, uint32(task.ThreadID()), i.NodeID, linux.FUSE_RMDIR, &in)
+	if err != nil {
+		return err
+	}
+
+	res, err := i.fs.conn.Call(task, req)
+	if err != nil {
+		return err
+	}
+	if err := res.Error(); err != nil {
+		return err
+	}
+
+	// TODO(Before merging): When creating new nodes, should we add nodes to the ordered children?
+	// If so we'll probably need to call this. We will also need to add them with the writable flag when
+	// appropriate.
+	// return i.OrderedChildren.RmDir(ctx, name, child)
+
+	return nil
+}
+
 // newEntry calls FUSE server for entry creation and allocates corresponding entry according to response.
 // Shared by FUSE_MKNOD, FUSE_MKDIR, FUSE_SYMLINK, FUSE_LINK and FUSE_LOOKUP.
 func (i *inode) newEntry(ctx context.Context, name string, fileType linux.FileMode, opcode linux.FUSEOpcode, payload marshal.Marshallable) (*vfs.Dentry, error) {
diff --git a/pkg/sentry/fsimpl/kernfs/filesystem.go b/pkg/sentry/fsimpl/kernfs/filesystem.go
index 7aaf1146d3..2823c3b1a3 100644
--- a/pkg/sentry/fsimpl/kernfs/filesystem.go
+++ b/pkg/sentry/fsimpl/kernfs/filesystem.go
@@ -657,6 +657,10 @@ func (fs *Filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa
 func (fs *Filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error {
 	fs.mu.Lock()
 	defer fs.mu.Unlock()
+
+	// Store the name before walkExistingLocked as rp will be advanced past the
+	// name in the following call.
+	name := rp.Component()
 	vfsd, inode, err := fs.walkExistingLocked(ctx, rp)
 	fs.processDeferredDecRefsLocked(ctx)
 	if err != nil {
@@ -686,7 +690,8 @@ func (fs *Filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error
 	if err := virtfs.PrepareDeleteDentry(mntns, vfsd); err != nil {
 		return err
 	}
-	if err := parentDentry.inode.RmDir(ctx, rp.Component(), vfsd); err != nil {
+
+	if err := parentDentry.inode.RmDir(ctx, name, vfsd); err != nil {
 		virtfs.AbortDeleteDentry(vfsd)
 		return err
 	}
diff --git a/test/fuse/BUILD b/test/fuse/BUILD
index cae51ce490..30d2a871f3 100644
--- a/test/fuse/BUILD
+++ b/test/fuse/BUILD
@@ -41,3 +41,9 @@ syscall_test(
     fuse = "True",
     test = "//test/fuse/linux:read_test",
 )
+
+syscall_test(
+    test = "//test/fuse/linux:rmdir_test",
+    vfs2 = "True",
+    fuse = "True",
+)
diff --git a/test/fuse/linux/BUILD b/test/fuse/linux/BUILD
index 8afd28f16c..159428fce9 100644
--- a/test/fuse/linux/BUILD
+++ b/test/fuse/linux/BUILD
@@ -98,6 +98,20 @@ cc_binary(
     ],
 )
 
+cc_binary(
+    name = "rmdir_test",
+    testonly = 1,
+    srcs = ["rmdir_test.cc"],
+    deps = [
+        gtest,
+        ":fuse_base",
+        "//test/util:fs_util",
+        "//test/util:fuse_util",
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
 cc_library(
     name = "fuse_base",
     testonly = 1,
diff --git a/test/fuse/linux/rmdir_test.cc b/test/fuse/linux/rmdir_test.cc
new file mode 100644
index 0000000000..913d3f9101
--- /dev/null
+++ b/test/fuse/linux/rmdir_test.cc
@@ -0,0 +1,73 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/fuse.h>
+#include <sys/stat.h>
+#include <sys/statfs.h>
+#include <sys/types.h>
+#include <sys/uio.h>
+#include <unistd.h>
+
+#include <string>
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "test/fuse/linux/fuse_base.h"
+#include "test/util/fs_util.h"
+#include "test/util/fuse_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+class RmDirTest : public FuseTest {
+ protected:
+  const std::string test_dir_name_ = "test_dir";
+  const mode_t test_dir_mode_ = S_IFDIR | S_IRWXU | S_IRWXG | S_IRWXO;
+};
+
+TEST_F(RmDirTest, NormalRmDir) {
+  const std::string test_dir_path_ =
+      JoinPath(mount_point_.path().c_str(), test_dir_name_);
+
+  SetServerInodeLookup(test_dir_name_, test_dir_mode_);
+
+  // RmDir code.
+  struct fuse_out_header rmdir_header = {
+      .len = sizeof(struct fuse_out_header),
+  };
+
+  auto iov_out = FuseGenerateIovecs(rmdir_header);
+  SetServerResponse(FUSE_RMDIR, iov_out);
+
+  ASSERT_THAT(rmdir(test_dir_path_.c_str()), SyscallSucceeds());
+
+  struct fuse_in_header in_header;
+  std::vector<char> actual_dirname(test_dir_name_.length() + 1);
+  auto iov_in = FuseGenerateIovecs(in_header, actual_dirname);
+  GetServerActualRequest(iov_in);
+
+  EXPECT_EQ(in_header.len, sizeof(in_header) + test_dir_name_.length() + 1);
+  EXPECT_EQ(in_header.opcode, FUSE_RMDIR);
+  EXPECT_EQ(std::string(actual_dirname.data()), test_dir_name_);
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor

From eccdd440899113c229f4abea53c03364d7f9875c Mon Sep 17 00:00:00 2001
From: Ridwan Sharif <ridwanmsharif@google.com>
Date: Mon, 27 Jul 2020 14:42:31 -0400
Subject: [PATCH 181/211] fuse: Implement IterDirents for directory file
 description

Fixes #3255.

This change adds support for IterDirents. You can now use `ls` in
the FUSE sandbox.

Co-authored-by: Craig Chi <craigchi@google.com>
---
 pkg/abi/linux/fuse.go                        | 131 ++++++++++++-
 pkg/sentry/fsimpl/fuse/connection.go         |   9 +-
 pkg/sentry/fsimpl/fuse/directory.go          |  54 ++++++
 pkg/sentry/fsimpl/fuse/file.go               |   6 +-
 pkg/sentry/fsimpl/kernfs/kernfs.go           |   2 +-
 pkg/sentry/vfs/file_description_impl_util.go |   2 +-
 test/fuse/BUILD                              |   6 +-
 test/fuse/linux/BUILD                        |  14 ++
 test/fuse/linux/readdir_test.cc              | 188 +++++++++++++++++++
 9 files changed, 403 insertions(+), 9 deletions(-)
 create mode 100644 test/fuse/linux/readdir_test.cc

diff --git a/pkg/abi/linux/fuse.go b/pkg/abi/linux/fuse.go
index c75debb8cb..e7b5f45deb 100644
--- a/pkg/abi/linux/fuse.go
+++ b/pkg/abi/linux/fuse.go
@@ -14,7 +14,10 @@
 
 package linux
 
-import "gvisor.dev/gvisor/tools/go_marshal/marshal"
+import (
+	"gvisor.dev/gvisor/tools/go_marshal/marshal"
+	"gvisor.dev/gvisor/tools/go_marshal/primitive"
+)
 
 // +marshal
 type FUSEOpcode uint32
@@ -186,9 +189,9 @@ const (
 
 // Constants relevant to FUSE operations.
 const (
-	FUSE_NAME_MAX     = 1024
-	FUSE_PAGE_SIZE    = 4096
-	FUSE_DIRENT_ALIGN = 8
+	FUSE_NAME_MAX       = 1024
+	FUSE_PAGE_SIZE      = 4096
+	FUSE_DIRENT_ALIGN   = 8
 )
 
 // FUSEInitIn is the request sent by the kernel to the daemon,
@@ -595,3 +598,123 @@ func (r *FUSERmDirIn) SizeBytes() int {
 func (r *FUSERmDirIn) UnmarshalUnsafe(src []byte) {
 	r.Name = string(src)
 }
+
+// FUSEDirents is a list of Dirents received from the FUSE daemon server.
+// It is used for FUSE_READDIR.
+//
+// Dynamically-sized objects cannot be marshalled.
+type FUSEDirents struct {
+	marshal.StubMarshallable
+
+	Dirents []*FUSEDirent
+}
+
+// FUSEDirent is a Dirent received from the FUSE daemon server.
+// It is used for FUSE_READDIR.
+//
+// Dynamically-sized objects cannot be marshalled.
+type FUSEDirent struct {
+	marshal.StubMarshallable
+
+	// Meta contains all the static fields of FUSEDirent.
+	Meta FUSEDirentMeta
+
+	// Name is the filename of the dirent.
+	Name string
+}
+
+// FUSEDirentMeta contains all the static fields of FUSEDirent.
+// It is used for FUSE_READDIR.
+//
+// +marshal
+type FUSEDirentMeta struct {
+	// Inode of the dirent.
+	Ino uint64
+
+	// Offset of the dirent.
+	Off uint64
+
+	// NameLen is the length of the dirent name.
+	NameLen uint32
+
+	// Type of the dirent.
+	Type uint32
+}
+
+// MarshalUnsafe serializes FUSEDirents to the dst buffer.
+func (r *FUSEDirents) MarshalUnsafe(dst []byte) {
+	for _, dirent := range r.Dirents {
+		dirent.MarshalUnsafe(dst)
+		dst = dst[dirent.SizeBytes():]
+	}
+}
+
+// SizeBytes is the size of the memory representation of FUSEDirents.
+func (r *FUSEDirents) SizeBytes() int {
+	var sizeBytes int
+	for _, dirent := range r.Dirents {
+		sizeBytes += dirent.SizeBytes()
+	}
+
+	return sizeBytes
+}
+
+// UnmarshalUnsafe deserializes FUSEDirents from the src buffer.
+func (r *FUSEDirents) UnmarshalUnsafe(src []byte) {
+	for {
+		if len(src) <= (*FUSEDirentMeta)(nil).SizeBytes() {
+			break
+		}
+
+		// Its unclear how many dirents there are in src. Each dirent is dynamically
+		// sized and so we can't make assumptions about how many dirents we can allocate.
+		if r.Dirents == nil {
+			r.Dirents = make([]*FUSEDirent, 0)
+		}
+
+		// We have to allocate a struct for each dirent - there must be a better way
+		// to do this. Linux allocates 1 page to store all the dirents and then
+		// simply reads them from the page.
+		var dirent FUSEDirent
+		dirent.UnmarshalUnsafe(src)
+		r.Dirents = append(r.Dirents, &dirent)
+
+		src = src[dirent.SizeBytes():]
+	}
+}
+
+// MarshalUnsafe serializes FUSEDirent to the dst buffer.
+func (r *FUSEDirent) MarshalUnsafe(dst []byte) {
+	r.Meta.MarshalUnsafe(dst)
+	dst = dst[r.Meta.SizeBytes():]
+
+	name := primitive.ByteSlice(r.Name)
+	name.MarshalUnsafe(dst)
+}
+
+// SizeBytes is the size of the memory representation of FUSEDirent.
+func (r *FUSEDirent) SizeBytes() int {
+	dataSize := r.Meta.SizeBytes() + len(r.Name)
+
+	// Each Dirent must be padded such that its size is a multiple
+	// of FUSE_DIRENT_ALIGN. Similar to the fuse dirent alignment
+	// in linux/fuse.h.
+	return (dataSize + (FUSE_DIRENT_ALIGN - 1)) & ^(FUSE_DIRENT_ALIGN - 1)
+}
+
+// UnmarshalUnsafe deserializes FUSEDirent from the src buffer.
+func (r *FUSEDirent) UnmarshalUnsafe(src []byte) {
+	r.Meta.UnmarshalUnsafe(src)
+	src = src[r.Meta.SizeBytes():]
+
+	if r.Meta.NameLen > FUSE_NAME_MAX {
+		// The name is too long and therefore invalid. We don't
+		// need to unmarshal the name since it'll be thrown away.
+		return
+	}
+
+	buf := make([]byte, r.Meta.NameLen)
+	name := primitive.ByteSlice(buf)
+	name.UnmarshalUnsafe(src[:r.Meta.NameLen])
+	r.Name = string(name)
+}
diff --git a/pkg/sentry/fsimpl/fuse/connection.go b/pkg/sentry/fsimpl/fuse/connection.go
index a6525249d5..f1a5c2ecbf 100644
--- a/pkg/sentry/fsimpl/fuse/connection.go
+++ b/pkg/sentry/fsimpl/fuse/connection.go
@@ -21,6 +21,8 @@ import (
 	"sync/atomic"
 	"syscall"
 
+	"gvisor.dev/gvisor/tools/go_marshal/marshal"
+
 	"gvisor.dev/gvisor/pkg/abi/linux"
 	"gvisor.dev/gvisor/pkg/context"
 	"gvisor.dev/gvisor/pkg/log"
@@ -29,7 +31,6 @@ import (
 	"gvisor.dev/gvisor/pkg/sentry/vfs"
 	"gvisor.dev/gvisor/pkg/syserror"
 	"gvisor.dev/gvisor/pkg/waiter"
-	"gvisor.dev/gvisor/tools/go_marshal/marshal"
 )
 
 // maxActiveRequestsDefault is the default setting controlling the upper bound
@@ -352,6 +353,12 @@ func (r *Response) UnmarshalPayload(m marshal.Marshallable) error {
 		return fmt.Errorf("payload too small. Minimum data lenth required: %d,  but got data length %d", wantDataLen, haveDataLen)
 	}
 
+	// The response data is empty unless there is some payload. And so, doesn't
+	// need to be unmarshalled.
+	if r.data == nil {
+		return nil
+	}
+
 	m.UnmarshalUnsafe(r.data[hdrLen:])
 	return nil
 }
diff --git a/pkg/sentry/fsimpl/fuse/directory.go b/pkg/sentry/fsimpl/fuse/directory.go
index 44d41712a1..8c59680e88 100644
--- a/pkg/sentry/fsimpl/fuse/directory.go
+++ b/pkg/sentry/fsimpl/fuse/directory.go
@@ -15,7 +15,12 @@
 package fuse
 
 import (
+	"sync/atomic"
+
+	"gvisor.dev/gvisor/pkg/abi/linux"
 	"gvisor.dev/gvisor/pkg/context"
+	"gvisor.dev/gvisor/pkg/sentry/kernel"
+	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
 	"gvisor.dev/gvisor/pkg/sentry/vfs"
 	"gvisor.dev/gvisor/pkg/syserror"
 	"gvisor.dev/gvisor/pkg/usermem"
@@ -49,3 +54,52 @@ func (directoryFD) PWrite(ctx context.Context, src usermem.IOSequence, offset in
 func (directoryFD) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) {
 	return 0, syserror.EISDIR
 }
+
+// IterDirents implements FileDescriptionImpl.IterDirents.
+func (dir *directoryFD) IterDirents(ctx context.Context, callback vfs.IterDirentsCallback) error {
+	fusefs := dir.inode().fs
+	task, creds := kernel.TaskFromContext(ctx), auth.CredentialsFromContext(ctx)
+
+	in := linux.FUSEReadIn{
+		Fh:     dir.Fh,
+		Offset: uint64(atomic.LoadInt64(&dir.off)),
+		Size:   linux.FUSE_PAGE_SIZE,
+		Flags:  dir.statusFlags(),
+	}
+
+	/// TODO(gVisor.dev/issue/3404): Support FUSE_READDIRPLUS.
+	req, err := fusefs.conn.NewRequest(creds, uint32(task.ThreadID()), dir.inode().NodeID, linux.FUSE_READDIR, &in)
+	if err != nil {
+		return err
+	}
+
+	res, err := fusefs.conn.Call(task, req)
+	if err != nil {
+		return err
+	}
+	if err := res.Error(); err != nil {
+		return err
+	}
+
+	var out linux.FUSEDirents
+	if err := res.UnmarshalPayload(&out); err != nil {
+		return err
+	}
+
+	for _, fuseDirent := range out.Dirents {
+		nextOff := int64(fuseDirent.Meta.Off) + 1
+		dirent := vfs.Dirent{
+			Name:    fuseDirent.Name,
+			Type:    uint8(fuseDirent.Meta.Type),
+			Ino:     fuseDirent.Meta.Ino,
+			NextOff: nextOff,
+		}
+
+		if err := callback.Handle(dirent); err != nil {
+			return err
+		}
+		atomic.StoreInt64(&dir.off, nextOff)
+	}
+
+	return nil
+}
diff --git a/pkg/sentry/fsimpl/fuse/file.go b/pkg/sentry/fsimpl/fuse/file.go
index 01d20caf62..186ec23620 100644
--- a/pkg/sentry/fsimpl/fuse/file.go
+++ b/pkg/sentry/fsimpl/fuse/file.go
@@ -42,6 +42,9 @@ type fileDescription struct {
 
 	// OpenFlag is the flag returned by open.
 	OpenFlag uint32
+
+	// off is the file offset.
+	off int64
 }
 
 func (fd *fileDescription) dentry() *kernfs.Dentry {
@@ -119,5 +122,6 @@ func (fd *fileDescription) Stat(ctx context.Context, opts vfs.StatOptions) (linu
 
 // SetStat implements FileDescriptionImpl.SetStat.
 func (fd *fileDescription) SetStat(ctx context.Context, opts vfs.SetStatOptions) error {
-	return nil
+	creds := auth.CredentialsFromContext(ctx)
+	return fd.inode().SetStat(ctx, fd.inode().fs.VFSFilesystem(), creds, opts)
 }
diff --git a/pkg/sentry/fsimpl/kernfs/kernfs.go b/pkg/sentry/fsimpl/kernfs/kernfs.go
index f656e2a8be..61189af252 100644
--- a/pkg/sentry/fsimpl/kernfs/kernfs.go
+++ b/pkg/sentry/fsimpl/kernfs/kernfs.go
@@ -425,7 +425,7 @@ type inodeDynamicLookup interface {
 	Valid(ctx context.Context) bool
 
 	// IterDirents is used to iterate over dynamically created entries. It invokes
-	// cb on each entry in the directory represented by the FileDescription.
+	// cb on each entry in the directory represented by the Inode.
 	// 'offset' is the offset for the entire IterDirents call, which may include
 	// results from the caller (e.g. "." and ".."). 'relOffset' is the offset
 	// inside the entries returned by this IterDirents invocation. In other words,
diff --git a/pkg/sentry/vfs/file_description_impl_util.go b/pkg/sentry/vfs/file_description_impl_util.go
index 68b80a951a..2b668fd89d 100644
--- a/pkg/sentry/vfs/file_description_impl_util.go
+++ b/pkg/sentry/vfs/file_description_impl_util.go
@@ -107,7 +107,7 @@ func (FileDescriptionDefaultImpl) Write(ctx context.Context, src usermem.IOSeque
 // file_operations::iterate == file_operations::iterate_shared == NULL in
 // Linux.
 func (FileDescriptionDefaultImpl) IterDirents(ctx context.Context, cb IterDirentsCallback) error {
-	return syserror.ENOTDIR
+	return syserror.ENOSYS
 }
 
 // Seek implements FileDescriptionImpl.Seek analogously to
diff --git a/test/fuse/BUILD b/test/fuse/BUILD
index 30d2a871f3..a1b29aa33a 100644
--- a/test/fuse/BUILD
+++ b/test/fuse/BUILD
@@ -43,7 +43,11 @@ syscall_test(
 )
 
 syscall_test(
+    fuse = "True",
     test = "//test/fuse/linux:rmdir_test",
-    vfs2 = "True",
+)
+
+syscall_test(
     fuse = "True",
+    test = "//test/fuse/linux:readdir_test",
 )
diff --git a/test/fuse/linux/BUILD b/test/fuse/linux/BUILD
index 159428fce9..23c9fba318 100644
--- a/test/fuse/linux/BUILD
+++ b/test/fuse/linux/BUILD
@@ -112,6 +112,20 @@ cc_binary(
     ],
 )
 
+cc_binary(
+    name = "readdir_test",
+    testonly = 1,
+    srcs = ["readdir_test.cc"],
+    deps = [
+        gtest,
+        ":fuse_base",
+        "//test/util:fs_util",
+        "//test/util:fuse_util",
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
 cc_library(
     name = "fuse_base",
     testonly = 1,
diff --git a/test/fuse/linux/readdir_test.cc b/test/fuse/linux/readdir_test.cc
new file mode 100644
index 0000000000..17fb630ee5
--- /dev/null
+++ b/test/fuse/linux/readdir_test.cc
@@ -0,0 +1,188 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/fuse.h>
+#include <sys/stat.h>
+#include <sys/statfs.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <string>
+
+#include "gtest/gtest.h"
+#include "test/fuse/linux/fuse_base.h"
+#include "test/util/fuse_util.h"
+#include "test/util/test_util.h"
+
+#define FUSE_NAME_OFFSET offsetof(struct fuse_dirent, name)
+#define FUSE_DIRENT_ALIGN(x) \
+  (((x) + sizeof(uint64_t) - 1) & ~(sizeof(uint64_t) - 1))
+#define FUSE_DIRENT_SIZE(d) FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET + (d)->namelen)
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+class ReaddirTest : public FuseTest {
+ public:
+  void fill_fuse_dirent(char *buf, const char *name) {
+    size_t namelen = strlen(name);
+    size_t entlen = FUSE_NAME_OFFSET + namelen;
+    size_t entlen_padded = FUSE_DIRENT_ALIGN(entlen);
+    struct fuse_dirent *dirent;
+
+    dirent = reinterpret_cast<struct fuse_dirent *>(buf);
+    dirent->namelen = namelen;
+    memcpy(dirent->name, name, namelen);
+    memset(dirent->name + namelen, 0, entlen_padded - entlen);
+  }
+
+ protected:
+  const std::string test_dir_name_ = "test_dir";
+};
+
+TEST_F(ReaddirTest, SingleEntry) {
+  const std::string test_dir_path =
+      JoinPath(mount_point_.path().c_str(), test_dir_name_);
+
+  // We need to make sure the test dir is a directory that can be found.
+  mode_t expected_mode =
+      S_IFDIR | S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH;
+  struct fuse_attr dir_attr = {
+      .ino = 1,
+      .size = 512,
+      .blocks = 4,
+      .mode = expected_mode,
+      .blksize = 4096,
+  };
+  struct fuse_out_header stat_header = {
+      .len = sizeof(struct fuse_out_header) + sizeof(struct fuse_attr_out),
+  };
+
+  struct fuse_attr_out stat_payload = {
+      .attr_valid_nsec = 2,
+      .attr = dir_attr,
+  };
+
+  // We need to make sure the test dir is a directory that can be found.
+  struct fuse_out_header lookup_header = {
+      .len = sizeof(struct fuse_out_header) + sizeof(struct fuse_entry_out),
+  };
+  struct fuse_entry_out lookup_payload = {
+      .nodeid = 1,
+      .entry_valid = true,
+      .attr_valid = true,
+      .attr = dir_attr,
+  };
+
+  struct fuse_out_header open_header = {
+      .len = sizeof(struct fuse_out_header) + sizeof(struct fuse_open_out),
+  };
+  struct fuse_open_out open_payload = {
+      .fh = 1,
+  };
+  auto iov_out = FuseGenerateIovecs(lookup_header, lookup_payload);
+  SetServerResponse(FUSE_LOOKUP, iov_out);
+
+  iov_out = FuseGenerateIovecs(open_header, open_payload);
+  SetServerResponse(FUSE_OPENDIR, iov_out);
+
+  iov_out = FuseGenerateIovecs(stat_header, stat_payload);
+  SetServerResponse(FUSE_GETATTR, iov_out);
+
+  DIR *dir = opendir(test_dir_path.c_str());
+
+  // The opendir command makes three syscalls. Lookup the dir file, stat it and
+  // open.
+  // We don't need to inspect those headers in this test.
+  SkipServerActualRequest();  // LOOKUP.
+  SkipServerActualRequest();  // GETATTR.
+  SkipServerActualRequest();  // OPENDIR.
+
+  // Readdir test code.
+  std::string dot = ".";
+  std::string dot_dot = "..";
+  std::string test_file = "testFile";
+
+  // Figure out how many dirents to send over and allocate them appropriately.
+  // Each dirent has a dynamic name and a static metadata part. The dirent size
+  // is aligned to being a multiple of 8.
+  size_t dot_file_dirent_size =
+      FUSE_DIRENT_ALIGN(dot.length() + FUSE_NAME_OFFSET);
+  size_t dot_dot_file_dirent_size =
+      FUSE_DIRENT_ALIGN(dot_dot.length() + FUSE_NAME_OFFSET);
+  size_t test_file_dirent_size =
+      FUSE_DIRENT_ALIGN(test_file.length() + FUSE_NAME_OFFSET);
+
+  // Create an appropriately sized payload.
+  size_t readdir_payload_size =
+      test_file_dirent_size + dot_file_dirent_size + dot_dot_file_dirent_size;
+  char readdir_payload[readdir_payload_size];
+
+  fill_fuse_dirent(readdir_payload, dot.c_str());
+  fill_fuse_dirent(readdir_payload + dot_file_dirent_size, dot_dot.c_str());
+  fill_fuse_dirent(
+      readdir_payload + dot_file_dirent_size + dot_dot_file_dirent_size,
+      test_file.c_str());
+
+  std::vector<char> readdir_payload_vec(readdir_payload,
+                                        readdir_payload + readdir_payload_size);
+  struct fuse_out_header readdir_header = {
+      .len = uint32_t(sizeof(struct fuse_out_header) + sizeof(readdir_payload)),
+  };
+  struct fuse_out_header readdir_header_break = {
+      .len = uint32_t(sizeof(struct fuse_out_header)),
+  };
+
+  iov_out = FuseGenerateIovecs(readdir_header, readdir_payload_vec);
+  SetServerResponse(FUSE_READDIR, iov_out);
+
+  iov_out = FuseGenerateIovecs(readdir_header_break);
+  SetServerResponse(FUSE_READDIR, iov_out);
+
+  struct dirent *entry;
+  entry = readdir(dir);
+  EXPECT_EQ(std::string(entry->d_name), dot);
+
+  entry = readdir(dir);
+  EXPECT_EQ(std::string(entry->d_name), dot_dot);
+
+  entry = readdir(dir);
+  EXPECT_EQ(std::string(entry->d_name), test_file);
+
+  entry = readdir(dir);
+  EXPECT_TRUE((entry == NULL));
+
+  SkipServerActualRequest();  // READDIR.
+  SkipServerActualRequest();  // READDIR with no data.
+
+  // Clean up.
+  closedir(dir);
+
+  struct fuse_in_header in_header;
+  struct fuse_release_in in_payload;
+
+  auto iov_in = FuseGenerateIovecs(in_header, in_payload);
+  GetServerActualRequest(iov_in);
+  EXPECT_EQ(in_header.len, sizeof(in_header) + sizeof(in_payload));
+  EXPECT_EQ(in_header.opcode, FUSE_RELEASEDIR);
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
\ No newline at end of file

From 0f2a8b08f13cc39d924c328bc7a9bab73c1c2328 Mon Sep 17 00:00:00 2001
From: Ridwan Sharif <ridwanmsharif@google.com>
Date: Wed, 19 Aug 2020 20:11:59 -0400
Subject: [PATCH 182/211] fuse: use safe go_marshal API for FUSE

Until #3698 is resolved, this change is needed to ensure we're not
corrupting memory anywhere.
---
 pkg/abi/linux/fuse.go                         | 97 ++++++++++++++++++-
 pkg/sentry/fsimpl/fuse/connection.go          |  9 +-
 tools/go_marshal/marshal/marshal_impl_util.go |  2 +-
 3 files changed, 103 insertions(+), 5 deletions(-)

diff --git a/pkg/abi/linux/fuse.go b/pkg/abi/linux/fuse.go
index e7b5f45deb..d105c51762 100644
--- a/pkg/abi/linux/fuse.go
+++ b/pkg/abi/linux/fuse.go
@@ -359,7 +359,12 @@ type FUSELookupIn struct {
 
 // MarshalUnsafe serializes r.name to the dst buffer.
 func (r *FUSELookupIn) MarshalUnsafe(buf []byte) {
-	copy(buf, []byte(r.Name))
+	copy(buf, r.Name)
+}
+
+// MarshalBytes serializes r.name to the dst buffer.
+func (r *FUSELookupIn) MarshalBytes(buf []byte) {
+	copy(buf, r.Name)
 }
 
 // SizeBytes is the size of the memory representation of FUSELookupIn.
@@ -491,6 +496,12 @@ func (r *FUSEMknodIn) MarshalUnsafe(buf []byte) {
 	copy(buf[r.MknodMeta.SizeBytes():], r.Name)
 }
 
+// MarshalBytes serializes r.MknodMeta and r.Name to the dst buffer.
+func (r *FUSEMknodIn) MarshalBytes(buf []byte) {
+	r.MknodMeta.MarshalBytes(buf[:r.MknodMeta.SizeBytes()])
+	copy(buf[r.MknodMeta.SizeBytes():], r.Name)
+}
+
 // SizeBytes is the size of the memory representation of FUSEMknodIn.
 // 1 extra byte for null-terminated string.
 func (r *FUSEMknodIn) SizeBytes() int {
@@ -518,6 +529,13 @@ func (r *FUSESymLinkIn) MarshalUnsafe(buf []byte) {
 	copy(buf[len(r.Name)+1:], r.Target)
 }
 
+// MarshalBytes serializes r.Name and r.Target to the dst buffer.
+// Left null-termination at end of r.Name and r.Target.
+func (r *FUSESymLinkIn) MarshalBytes(buf []byte) {
+	copy(buf, r.Name)
+	copy(buf[len(r.Name)+1:], r.Target)
+}
+
 // SizeBytes is the size of the memory representation of FUSESymLinkIn.
 // 2 extra bytes for null-terminated string.
 func (r *FUSESymLinkIn) SizeBytes() int {
@@ -530,6 +548,9 @@ type FUSEEmptyIn struct{ marshal.StubMarshallable }
 // MarshalUnsafe do nothing for marshal.
 func (r *FUSEEmptyIn) MarshalUnsafe(buf []byte) {}
 
+// MarshalBytes do nothing for marshal.
+func (r *FUSEEmptyIn) MarshalBytes(buf []byte) {}
+
 // SizeBytes is 0 for empty request.
 func (r *FUSEEmptyIn) SizeBytes() int {
 	return 0
@@ -567,6 +588,12 @@ func (r *FUSEMkdirIn) MarshalUnsafe(buf []byte) {
 	copy(buf[r.MkdirMeta.SizeBytes():], r.Name)
 }
 
+// MarshalBytes serializes r.MkdirMeta and r.Name to the dst buffer.
+func (r *FUSEMkdirIn) MarshalBytes(buf []byte) {
+	r.MkdirMeta.MarshalBytes(buf[:r.MkdirMeta.SizeBytes()])
+	copy(buf[r.MkdirMeta.SizeBytes():], r.Name)
+}
+
 // SizeBytes is the size of the memory representation of FUSEMkdirIn.
 // 1 extra byte for null-terminated Name string.
 func (r *FUSEMkdirIn) SizeBytes() int {
@@ -589,6 +616,11 @@ func (r *FUSERmDirIn) MarshalUnsafe(buf []byte) {
 	copy(buf, r.Name)
 }
 
+// MarshalBytes serializes r.name to the dst buffer.
+func (r *FUSERmDirIn) MarshalBytes(buf []byte) {
+	copy(buf, r.Name)
+}
+
 // SizeBytes is the size of the memory representation of FUSERmDirIn.
 func (r *FUSERmDirIn) SizeBytes() int {
 	return len(r.Name) + 1
@@ -599,6 +631,11 @@ func (r *FUSERmDirIn) UnmarshalUnsafe(src []byte) {
 	r.Name = string(src)
 }
 
+// UnmarshalBytes deserializes r.name from the src buffer.
+func (r *FUSERmDirIn) UnmarshalBytes(src []byte) {
+	r.Name = string(src)
+}
+
 // FUSEDirents is a list of Dirents received from the FUSE daemon server.
 // It is used for FUSE_READDIR.
 //
@@ -649,6 +686,14 @@ func (r *FUSEDirents) MarshalUnsafe(dst []byte) {
 	}
 }
 
+// MarshalBytes serializes FUSEDirents to the dst buffer.
+func (r *FUSEDirents) MarshalBytes(dst []byte) {
+	for _, dirent := range r.Dirents {
+		dirent.MarshalBytes(dst)
+		dst = dst[dirent.SizeBytes():]
+	}
+}
+
 // SizeBytes is the size of the memory representation of FUSEDirents.
 func (r *FUSEDirents) SizeBytes() int {
 	var sizeBytes int
@@ -683,6 +728,30 @@ func (r *FUSEDirents) UnmarshalUnsafe(src []byte) {
 	}
 }
 
+// UnmarshalBytes deserializes FUSEDirents from the src buffer.
+func (r *FUSEDirents) UnmarshalBytes(src []byte) {
+	for {
+		if len(src) <= (*FUSEDirentMeta)(nil).SizeBytes() {
+			break
+		}
+
+		// Its unclear how many dirents there are in src. Each dirent is dynamically
+		// sized and so we can't make assumptions about how many dirents we can allocate.
+		if r.Dirents == nil {
+			r.Dirents = make([]*FUSEDirent, 0)
+		}
+
+		// We have to allocate a struct for each dirent - there must be a better way
+		// to do this. Linux allocates 1 page to store all the dirents and then
+		// simply reads them from the page.
+		var dirent FUSEDirent
+		dirent.UnmarshalBytes(src)
+		r.Dirents = append(r.Dirents, &dirent)
+
+		src = src[dirent.SizeBytes():]
+	}
+}
+
 // MarshalUnsafe serializes FUSEDirent to the dst buffer.
 func (r *FUSEDirent) MarshalUnsafe(dst []byte) {
 	r.Meta.MarshalUnsafe(dst)
@@ -692,6 +761,15 @@ func (r *FUSEDirent) MarshalUnsafe(dst []byte) {
 	name.MarshalUnsafe(dst)
 }
 
+// MarshalBytes serializes FUSEDirent to the dst buffer.
+func (r *FUSEDirent) MarshalBytes(dst []byte) {
+	r.Meta.MarshalBytes(dst)
+	dst = dst[r.Meta.SizeBytes():]
+
+	name := primitive.ByteSlice(r.Name)
+	name.MarshalBytes(dst)
+}
+
 // SizeBytes is the size of the memory representation of FUSEDirent.
 func (r *FUSEDirent) SizeBytes() int {
 	dataSize := r.Meta.SizeBytes() + len(r.Name)
@@ -718,3 +796,20 @@ func (r *FUSEDirent) UnmarshalUnsafe(src []byte) {
 	name.UnmarshalUnsafe(src[:r.Meta.NameLen])
 	r.Name = string(name)
 }
+
+// UnmarshalBytes deserializes FUSEDirent from the src buffer.
+func (r *FUSEDirent) UnmarshalBytes(src []byte) {
+	r.Meta.UnmarshalBytes(src)
+	src = src[r.Meta.SizeBytes():]
+
+	if r.Meta.NameLen > FUSE_NAME_MAX {
+		// The name is too long and therefore invalid. We don't
+		// need to unmarshal the name since it'll be thrown away.
+		return
+	}
+
+	buf := make([]byte, r.Meta.NameLen)
+	name := primitive.ByteSlice(buf)
+	name.UnmarshalBytes(src[:r.Meta.NameLen])
+	r.Name = string(name)
+}
diff --git a/pkg/sentry/fsimpl/fuse/connection.go b/pkg/sentry/fsimpl/fuse/connection.go
index f1a5c2ecbf..f7d1a5c52d 100644
--- a/pkg/sentry/fsimpl/fuse/connection.go
+++ b/pkg/sentry/fsimpl/fuse/connection.go
@@ -270,8 +270,10 @@ func (conn *connection) NewRequest(creds *auth.Credentials, pid uint32, ino uint
 	}
 
 	buf := make([]byte, hdr.Len)
-	hdr.MarshalUnsafe(buf[:hdrLen])
-	payload.MarshalUnsafe(buf[hdrLen:])
+
+	// TODO(gVisor.dev/3698): Use the unsafe version once go_marshal is safe to use again.
+	hdr.MarshalBytes(buf[:hdrLen])
+	payload.MarshalBytes(buf[hdrLen:])
 
 	return &Request{
 		id:   hdr.Unique,
@@ -359,7 +361,8 @@ func (r *Response) UnmarshalPayload(m marshal.Marshallable) error {
 		return nil
 	}
 
-	m.UnmarshalUnsafe(r.data[hdrLen:])
+	// TODO(gVisor.dev/3698): Use the unsafe version once go_marshal is safe to use again.
+	m.UnmarshalBytes(r.data[hdrLen:])
 	return nil
 }
 
diff --git a/tools/go_marshal/marshal/marshal_impl_util.go b/tools/go_marshal/marshal/marshal_impl_util.go
index 89c7d35759..1724652f71 100644
--- a/tools/go_marshal/marshal/marshal_impl_util.go
+++ b/tools/go_marshal/marshal/marshal_impl_util.go
@@ -44,7 +44,7 @@ func (StubMarshallable) MarshalBytes(dst []byte) {
 
 // UnmarshalBytes implements Marshallable.UnmarshalBytes.
 func (StubMarshallable) UnmarshalBytes(src []byte) {
-	panic("Please implement your own UnMarshalBytes function")
+	panic("Please implement your own UnmarshalBytes function")
 }
 
 // Packed implements Marshallable.Packed.

From 3c7692e8c5bd21432e8ecab9556722d00f54384a Mon Sep 17 00:00:00 2001
From: Craig Chi <craigchi@google.com>
Date: Wed, 19 Aug 2020 22:12:15 -0700
Subject: [PATCH 183/211] Fix FUSE_READDIR offset issue

According to readdir(3), the offset attribute in struct dirent is the
offset to the next dirent instead of the offset of itself. Send the
successive FUSE_READDIR requests with the offset retrieved from the last
entry.

Updates #3255
---
 pkg/sentry/fsimpl/fuse/directory.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pkg/sentry/fsimpl/fuse/directory.go b/pkg/sentry/fsimpl/fuse/directory.go
index 8c59680e88..a83357129f 100644
--- a/pkg/sentry/fsimpl/fuse/directory.go
+++ b/pkg/sentry/fsimpl/fuse/directory.go
@@ -87,7 +87,7 @@ func (dir *directoryFD) IterDirents(ctx context.Context, callback vfs.IterDirent
 	}
 
 	for _, fuseDirent := range out.Dirents {
-		nextOff := int64(fuseDirent.Meta.Off) + 1
+		nextOff := int64(fuseDirent.Meta.Off)
 		dirent := vfs.Dirent{
 			Name:    fuseDirent.Name,
 			Type:    uint8(fuseDirent.Meta.Type),

From aad7e25632ee972bd026c83b3881b2166175b4db Mon Sep 17 00:00:00 2001
From: Craig Chi <craigchi@google.com>
Date: Thu, 20 Aug 2020 10:40:41 -0700
Subject: [PATCH 184/211] Implementing inode.Getlink

kernfs uses inode.Getlink to resolve symlink when look up paths.

Updates #3452
---
 pkg/sentry/fsimpl/fuse/fusefs.go | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/pkg/sentry/fsimpl/fuse/fusefs.go b/pkg/sentry/fsimpl/fuse/fusefs.go
index c55ea927a9..e1bbb4b522 100644
--- a/pkg/sentry/fsimpl/fuse/fusefs.go
+++ b/pkg/sentry/fsimpl/fuse/fusefs.go
@@ -488,6 +488,12 @@ func (i *inode) newEntry(ctx context.Context, name string, fileType linux.FileMo
 	return child.VFSDentry(), nil
 }
 
+// Getlink implements Inode.Getlink.
+func (i *inode) Getlink(ctx context.Context, mnt *vfs.Mount) (vfs.VirtualDentry, string, error) {
+	path, err := i.Readlink(ctx, mnt)
+	return vfs.VirtualDentry{}, path, err
+}
+
 // Readlink implements kernfs.Inode.Readlink.
 func (i *inode) Readlink(ctx context.Context, mnt *vfs.Mount) (string, error) {
 	if i.Mode().FileType()&linux.S_IFLNK == 0 {

From 3bd85840c8f0364083c88d65c2bc1f968069b04e Mon Sep 17 00:00:00 2001
From: Craig Chi <craigchi@google.com>
Date: Thu, 20 Aug 2020 12:31:52 -0700
Subject: [PATCH 185/211] Support multiple FUSE kernel versions of FUSE_INIT
 response struct

The fuse_init_out struct changes in different FUSE kernel versions. A
FUSE server may implement older versions of fuse_init_out, but they
share common attributes from the beginning. Implement variable-length
marshallable interface to support older versions of ABI.

Fixes #3707
---
 pkg/abi/linux/fuse.go                | 58 ++++++++++++++++++++++++++++
 pkg/sentry/fsimpl/fuse/connection.go |  5 +++
 pkg/sentry/fsimpl/fuse/init.go       |  6 +--
 3 files changed, 66 insertions(+), 3 deletions(-)

diff --git a/pkg/abi/linux/fuse.go b/pkg/abi/linux/fuse.go
index d105c51762..ca4ee5e80f 100644
--- a/pkg/abi/linux/fuse.go
+++ b/pkg/abi/linux/fuse.go
@@ -15,6 +15,7 @@
 package linux
 
 import (
+	"gvisor.dev/gvisor/pkg/usermem"
 	"gvisor.dev/gvisor/tools/go_marshal/marshal"
 	"gvisor.dev/gvisor/tools/go_marshal/primitive"
 )
@@ -262,6 +263,63 @@ type FUSEInitOut struct {
 	_ [8]uint32
 }
 
+// FUSEInitRes is a variable-length wrapper of FUSEInitOut. The FUSE server may
+// implement older version of FUSE protocol, which contains a FUSEInitOut with
+// less attributes.
+//
+// Dynamically-sized objects cannot be marshalled.
+type FUSEInitRes struct {
+	marshal.StubMarshallable
+
+	// InitOut contains the response from the FUSE server.
+	InitOut FUSEInitOut
+
+	// Len is the total length of bytes of the response.
+	Len uint32
+}
+
+// UnMarshalBytes deserializes src to the InitOut attribute in a FUSEInitRes.
+func (r *FUSEInitRes) UnmarshalBytes(src []byte) {
+	out := &r.InitOut
+
+	// Introduced before FUSE kernel version 7.13.
+	out.Major = uint32(usermem.ByteOrder.Uint32(src[:4]))
+	src = src[4:]
+	out.Minor = uint32(usermem.ByteOrder.Uint32(src[:4]))
+	src = src[4:]
+	out.MaxReadahead = uint32(usermem.ByteOrder.Uint32(src[:4]))
+	src = src[4:]
+	out.Flags = uint32(usermem.ByteOrder.Uint32(src[:4]))
+	src = src[4:]
+	out.MaxBackground = uint16(usermem.ByteOrder.Uint16(src[:2]))
+	src = src[2:]
+	out.CongestionThreshold = uint16(usermem.ByteOrder.Uint16(src[:2]))
+	src = src[2:]
+	out.MaxWrite = uint32(usermem.ByteOrder.Uint32(src[:4]))
+	src = src[4:]
+
+	// Introduced in FUSE kernel version 7.23.
+	if len(src) >= 4 {
+		out.TimeGran = uint32(usermem.ByteOrder.Uint32(src[:4]))
+		src = src[4:]
+	}
+	// Introduced in FUSE kernel version 7.28.
+	if len(src) >= 2 {
+		out.MaxPages = uint16(usermem.ByteOrder.Uint16(src[:2]))
+		src = src[2:]
+	}
+	// Introduced in FUSE kernel version 7.31.
+	if len(src) >= 2 {
+		out.MapAlignment = uint16(usermem.ByteOrder.Uint16(src[:2]))
+		src = src[2:]
+	}
+}
+
+// SizeBytes is the size of the payload of the FUSE_INIT response.
+func (r *FUSEInitRes) SizeBytes() int {
+	return int(r.Len)
+}
+
 // FUSEGetAttrIn is the request sent by the kernel to the daemon,
 // to get the attribute of a inode.
 //
diff --git a/pkg/sentry/fsimpl/fuse/connection.go b/pkg/sentry/fsimpl/fuse/connection.go
index f7d1a5c52d..0e91bb18e3 100644
--- a/pkg/sentry/fsimpl/fuse/connection.go
+++ b/pkg/sentry/fsimpl/fuse/connection.go
@@ -345,6 +345,11 @@ func (r *Response) Error() error {
 	return error(sysErrNo)
 }
 
+// DataLen returns the size of the response without the header.
+func (r *Response) DataLen() uint32 {
+	return r.hdr.Len - uint32(r.hdr.SizeBytes())
+}
+
 // UnmarshalPayload unmarshals the response data into m.
 func (r *Response) UnmarshalPayload(m marshal.Marshallable) error {
 	hdrLen := r.hdr.SizeBytes()
diff --git a/pkg/sentry/fsimpl/fuse/init.go b/pkg/sentry/fsimpl/fuse/init.go
index 2ff2542b62..6384cbbdba 100644
--- a/pkg/sentry/fsimpl/fuse/init.go
+++ b/pkg/sentry/fsimpl/fuse/init.go
@@ -76,12 +76,12 @@ func (conn *connection) InitRecv(res *Response, hasSysAdminCap bool) error {
 		return err
 	}
 
-	var out linux.FUSEInitOut
-	if err := res.UnmarshalPayload(&out); err != nil {
+	initRes := linux.FUSEInitRes{Len: res.DataLen()}
+	if err := res.UnmarshalPayload(&initRes); err != nil {
 		return err
 	}
 
-	return conn.initProcessReply(&out, hasSysAdminCap)
+	return conn.initProcessReply(&initRes.InitOut, hasSysAdminCap)
 }
 
 // Process the FUSE_INIT reply from the FUSE server.

From 9cc683af1e5c003ccc4f5a72e6b5b207e8426e1a Mon Sep 17 00:00:00 2001
From: Boyuan He & Ridwan Sharif <heboyuan@google.com>
Date: Wed, 26 Aug 2020 15:26:46 -0400
Subject: [PATCH 186/211] fuse: add benchmarking support for FUSE

This change adds the following:
-  Add support for containerizing syscall tests for FUSE
-  Mount tmpfs in the container so we can run benchmarks against it
-  Run the server in a background process
-  benchmarks for fuse syscall

Co-authored-by: Ridwan Sharif <ridwanmsharif@google.com>
---
 Makefile                                 |   4 +
 images/basic/fuse/Dockerfile             |  15 +++
 pkg/sentry/fs/g3doc/fuse.md              |  47 ++++++++++
 pkg/test/dockerutil/container.go         |  10 ++
 pkg/test/dockerutil/dockerutil.go        |  20 ++++
 scripts/common_build.sh                  |   5 +-
 test/e2e/integration_test.go             |   6 +-
 test/fuse/BUILD                          |  46 ++++++++++
 test/fuse/benchmark/BUILD                |  91 ++++++++++++++++++
 test/fuse/benchmark/mkdir_benchmark.cc   |  51 +++++++++++
 test/fuse/benchmark/open_benchmark.cc    |  60 ++++++++++++
 test/fuse/benchmark/read_benchmark.cc    |  57 ++++++++++++
 test/fuse/benchmark/stat_benchmark.cc    |  65 +++++++++++++
 test/fuse/benchmark/symlink_benchmark.cc |  60 ++++++++++++
 test/image/BUILD                         |   1 +
 test/image/image_test.go                 |  52 +++++++++++
 test/runner/BUILD                        |   3 +
 test/runner/defs.bzl                     |  28 ++++++
 test/runner/runner.go                    | 112 ++++++++++++++++++++---
 19 files changed, 717 insertions(+), 16 deletions(-)
 create mode 100644 images/basic/fuse/Dockerfile
 create mode 100644 test/fuse/benchmark/BUILD
 create mode 100644 test/fuse/benchmark/mkdir_benchmark.cc
 create mode 100644 test/fuse/benchmark/open_benchmark.cc
 create mode 100644 test/fuse/benchmark/read_benchmark.cc
 create mode 100644 test/fuse/benchmark/stat_benchmark.cc
 create mode 100644 test/fuse/benchmark/symlink_benchmark.cc

diff --git a/Makefile b/Makefile
index d9e3206b44..c8fc6f1e04 100644
--- a/Makefile
+++ b/Makefile
@@ -336,6 +336,10 @@ RUNTIME_BIN     := $(RUNTIME_DIR)/runsc
 RUNTIME_LOG_DIR := $(RUNTIME_DIR)/logs
 RUNTIME_LOGS    := $(RUNTIME_LOG_DIR)/runsc.log.%TEST%.%TIMESTAMP%.%COMMAND%
 
+ifeq (,$(RUNTIME_NAME))
+RUNTIME_NAME := $(RUNTIME)
+endif
+
 dev: ## Installs a set of local runtimes. Requires sudo.
 	@$(call submake,refresh ARGS="--net-raw")
 	@$(call submake,configure RUNTIME_NAME="$(RUNTIME)" ARGS="--net-raw")
diff --git a/images/basic/fuse/Dockerfile b/images/basic/fuse/Dockerfile
new file mode 100644
index 0000000000..9e88aa2c5f
--- /dev/null
+++ b/images/basic/fuse/Dockerfile
@@ -0,0 +1,15 @@
+FROM ubuntu:20.04
+
+ENV DEBIAN_FRONTEND=noninteractive
+
+RUN apt-get update
+RUN apt-get install -y build-essential git pkg-config fuse3 libfuse3-3 libfuse3-dev strace
+
+WORKDIR /fus
+
+RUN mkdir -pv mountpoint
+RUN git clone https://github.com/libfuse/libfuse
+
+RUN gcc -Wall ./libfuse/example/passthrough.c `pkg-config fuse3 --cflags --libs` -o server-bin
+
+CMD ["bash"]
diff --git a/pkg/sentry/fs/g3doc/fuse.md b/pkg/sentry/fs/g3doc/fuse.md
index 2ca84dd746..496e339ce3 100644
--- a/pkg/sentry/fs/g3doc/fuse.md
+++ b/pkg/sentry/fs/g3doc/fuse.md
@@ -254,6 +254,53 @@ I/O syscalls like `read(2)`, `write(2)` and `mmap(2)`.
 -   `FUSE_BMAP`: Old address space API for block defrag. Probably not needed.
 -   `FUSE_NOTIFY_REPLY`: [TODO: what does this do?]
 
+## Benchmark FUSE
+
+FUSE benchmark makes FUSE syscall inside docker container to make sure required
+environment conditions are met - such as having the right libraries to start a
+FUSE server.
+
+### Setup
+
+To run benchmark:
+
+1. Make sure you have `Docker` installed.
+2. Download all docker images `make load-all-images`.
+3. Config `runsc` docker runtime to have VFS2 and FUSE supported.
+(e.g.  `make configure RUNTIME=runsc ARGS="--vfs2 --fuse ..." ...`)
+
+You should now have a runtime with the following options configured in
+`/etc/docker/daemon.json`
+```
+"runsc": {
+            "path": "path/to/your/runsc",
+            "runtimeArgs": [
+                "--vfs2",
+                "--fuse"
+                ... 
+            ]
+        }
+```
+
+### Running benchmarks
+With above setup, benchmark can be run with following command
+```
+bazel test --test_output=all --cache_test_results=no --test_arg=-test.bench= //path/to:target
+```
+For example: if you want to run stat test
+```
+bazel test --test_output=all --cache_test_results=no --test_arg=-test.bench= //test/fuse:open_benchmark_runsc_ptrace_vfs2_fuse_container
+```
+
+Note:
+- test target need to have `vfs2_fuse_container` to run in container with `vfs2` and `fuse` enabled
+- `test_output` set to `all` to view the result in terminal
+- `--cache_test_results` set to `no` to avoid cached benchmark
+
+### Use your fuse server
+
+To use your own FUSE server, change the `images/basic/fuse/Dockerfile` to compile your FUSE server into the container and name it `server-bin`.
+
 # References
 
 -   [fuse(4) Linux manual page](https://www.man7.org/linux/man-pages/man4/fuse.4.html)
diff --git a/pkg/test/dockerutil/container.go b/pkg/test/dockerutil/container.go
index 64d17f661e..727be26b25 100644
--- a/pkg/test/dockerutil/container.go
+++ b/pkg/test/dockerutil/container.go
@@ -136,6 +136,11 @@ func MakeNativeContainer(ctx context.Context, logger testutil.Logger) *Container
 	}
 }
 
+// Runtime returns the runtime of the container.
+func (c *Container) Runtime() string {
+	return c.runtime
+}
+
 // AddProfile adds a profile to this container.
 func (c *Container) AddProfile(p Profile) {
 	c.profiles = append(c.profiles, p)
@@ -541,3 +546,8 @@ func (c *Container) CleanUp(ctx context.Context) {
 	// Forget all mounts.
 	c.mounts = nil
 }
+
+// CopyErr returns the error that happened during copy.
+func (c *Container) CopyErr() error {
+	return c.copyErr
+}
diff --git a/pkg/test/dockerutil/dockerutil.go b/pkg/test/dockerutil/dockerutil.go
index 7027df1a5d..a2d7e8c85e 100644
--- a/pkg/test/dockerutil/dockerutil.go
+++ b/pkg/test/dockerutil/dockerutil.go
@@ -121,6 +121,26 @@ func UsingVFS2() (bool, error) {
 	return false, nil
 }
 
+// UsingFUSE returns true if the 'runtime' has the fuse flag set.
+func UsingFUSE() (bool, error) {
+	rMap, err := runtimeMap()
+	if err != nil {
+		return false, err
+	}
+
+	list, ok := rMap["runtimeArgs"].([]interface{})
+	if !ok {
+		return false, fmt.Errorf("unexpected format: %v", rMap)
+	}
+
+	for _, element := range list {
+		if element == "--fuse" {
+			return true, nil
+		}
+	}
+	return false, nil
+}
+
 func runtimeMap() (map[string]interface{}, error) {
 	// Read the configuration data; the file must exist.
 	configBytes, err := ioutil.ReadFile(*config)
diff --git a/scripts/common_build.sh b/scripts/common_build.sh
index d4a6c49081..6874e56f9a 100755
--- a/scripts/common_build.sh
+++ b/scripts/common_build.sh
@@ -109,8 +109,9 @@ function collect_logs() {
 }
 
 function find_branch_name() {
-  git branch --show-current \
+  (git branch --show-current \
     || git rev-parse HEAD \
     || bazel info workspace \
-    | xargs basename
+    | xargs basename) \
+    | tr '/' '-'
 }
diff --git a/test/e2e/integration_test.go b/test/e2e/integration_test.go
index 809244babd..0c82e98d4f 100644
--- a/test/e2e/integration_test.go
+++ b/test/e2e/integration_test.go
@@ -168,10 +168,10 @@ func TestCheckpointRestore(t *testing.T) {
 	}
 
 	// TODO(gvisor.dev/issue/3373): Remove after implementing.
-	if usingVFS2, err := dockerutil.UsingVFS2(); usingVFS2 {
-		t.Skip("CheckpointRestore not implemented in VFS2.")
-	} else if err != nil {
+	if usingVFS2, err := dockerutil.UsingVFS2(); err != nil {
 		t.Fatalf("failed to read config for runtime %s: %v", dockerutil.Runtime(), err)
+	} else if usingVFS2 {
+		t.Skip("CheckpointRestore not implemented in VFS2.")
 	}
 
 	ctx := context.Background()
diff --git a/test/fuse/BUILD b/test/fuse/BUILD
index a1b29aa33a..02498b3a1a 100644
--- a/test/fuse/BUILD
+++ b/test/fuse/BUILD
@@ -51,3 +51,49 @@ syscall_test(
     fuse = "True",
     test = "//test/fuse/linux:readdir_test",
 )
+
+
+syscall_test(
+    size = "large",
+    add_overlay = True,
+    debug = False,
+    setup_command = "'./server-bin mountpoint'",
+    test = "//test/fuse/benchmark:stat_benchmark",
+    use_image = "basic/fuse",
+)
+
+syscall_test(
+    size = "large",
+    add_overlay = True,
+    debug = False,
+    setup_command = "'./server-bin mountpoint'",
+    test = "//test/fuse/benchmark:open_benchmark",
+    use_image = "basic/fuse",
+)
+
+syscall_test(
+    size = "large",
+    add_overlay = True,
+    debug = False,
+    setup_command = "'./server-bin mountpoint'",
+    test = "//test/fuse/benchmark:read_benchmark",
+    use_image = "basic/fuse",
+)
+
+syscall_test(
+    size = "large",
+    add_overlay = True,
+    debug = False,
+    setup_command = "'./server-bin mountpoint'",
+    test = "//test/fuse/benchmark:symlink_benchmark",
+    use_image = "basic/fuse",
+)
+
+syscall_test(
+    size = "large",
+    add_overlay = True,
+    debug = False,
+    setup_command = "'./server-bin mountpoint'",
+    test = "//test/fuse/benchmark:mkdir_benchmark",
+    use_image = "basic/fuse",
+)
diff --git a/test/fuse/benchmark/BUILD b/test/fuse/benchmark/BUILD
new file mode 100644
index 0000000000..16369d99b9
--- /dev/null
+++ b/test/fuse/benchmark/BUILD
@@ -0,0 +1,91 @@
+load("//tools:defs.bzl", "cc_binary", "gbenchmark", "gtest")
+
+package(
+    default_visibility = ["//:sandbox"],
+    licenses = ["notice"],
+)
+
+cc_binary(
+    name = "stat_benchmark",
+    testonly = 1,
+    srcs = [
+        "stat_benchmark.cc",
+    ],
+    deps = [
+        gbenchmark,
+        gtest,
+        "//test/util:fs_util",
+        "//test/util:temp_path",
+        "//test/util:test_main",
+        "//test/util:test_util",
+        "@com_google_absl//absl/strings",
+    ],
+)
+
+cc_binary(
+    name = "open_benchmark",
+    testonly = 1,
+    srcs = [
+        "open_benchmark.cc",
+    ],
+    deps = [
+        gbenchmark,
+        gtest,
+        "//test/util:fs_util",
+        "//test/util:temp_path",
+        "//test/util:test_main",
+        "//test/util:test_util",
+        "@com_google_absl//absl/strings",
+    ],
+)
+
+cc_binary(
+    name = "read_benchmark",
+    testonly = 1,
+    srcs = [
+        "read_benchmark.cc",
+    ],
+    deps = [
+        gbenchmark,
+        gtest,
+        "//test/util:fs_util",
+        "//test/util:temp_path",
+        "//test/util:test_main",
+        "//test/util:test_util",
+        "@com_google_absl//absl/strings",
+    ],
+)
+
+cc_binary(
+    name = "symlink_benchmark",
+    testonly = 1,
+    srcs = [
+        "symlink_benchmark.cc",
+    ],
+    deps = [
+        gbenchmark,
+        gtest,
+        "//test/util:fs_util",
+        "//test/util:temp_path",
+        "//test/util:test_main",
+        "//test/util:test_util",
+        "@com_google_absl//absl/strings",
+    ],
+)
+
+cc_binary(
+    name = "mkdir_benchmark",
+    testonly = 1,
+    srcs = [
+        "mkdir_benchmark.cc",
+    ],
+    deps = [
+        gbenchmark,
+        gtest,
+        "//test/util:fs_util",
+        "//test/util:temp_path",
+        "//test/util:test_main",
+        "//test/util:test_util",
+        "@com_google_absl//absl/strings",
+    ],
+)
diff --git a/test/fuse/benchmark/mkdir_benchmark.cc b/test/fuse/benchmark/mkdir_benchmark.cc
new file mode 100644
index 0000000000..30759603e4
--- /dev/null
+++ b/test/fuse/benchmark/mkdir_benchmark.cc
@@ -0,0 +1,51 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "absl/strings/str_cat.h"
+#include "benchmark/benchmark.h"
+#include "gtest/gtest.h"
+#include "test/util/fs_util.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+void BM_Mkdir(benchmark::State& state) {
+  const char* fuse_prefix = getenv("TEST_FUSEPRE");
+  ASSERT_NE(fuse_prefix, nullptr);
+
+  const TempPath top_dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  std::string dir_path = top_dir.path();
+
+  int index = 0;
+  for (auto t : state) {
+    const std::string new_dir_path = absl::StrCat(dir_path, index);
+    ASSERT_THAT(mkdir(new_dir_path.c_str(), 0777), SyscallSucceeds());
+    index++;
+  }
+}
+
+BENCHMARK(BM_Mkdir)->Range(1, 128)->UseRealTime();
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/fuse/benchmark/open_benchmark.cc b/test/fuse/benchmark/open_benchmark.cc
new file mode 100644
index 0000000000..11c1c1c804
--- /dev/null
+++ b/test/fuse/benchmark/open_benchmark.cc
@@ -0,0 +1,60 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <fcntl.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "benchmark/benchmark.h"
+#include "gtest/gtest.h"
+#include "test/util/fs_util.h"
+#include "test/util/logging.h"
+#include "test/util/temp_path.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+void BM_Open(benchmark::State& state) {
+  const char* fuse_prefix = getenv("TEST_FUSEPRE");
+  ASSERT_NE(fuse_prefix, nullptr);
+
+  const int size = state.range(0);
+  std::vector<TempPath> cache;
+  for (int i = 0; i < size; i++) {
+    auto path = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+    cache.emplace_back(std::move(path));
+  }
+
+  unsigned int seed = 1;
+  for (auto _ : state) {
+    const int chosen = rand_r(&seed) % size;
+    const std::string file_path = JoinPath(fuse_prefix, cache[chosen].path());
+    int fd = open(file_path.c_str(), O_RDONLY);
+    TEST_CHECK(fd != -1);
+    close(fd);
+  }
+}
+
+BENCHMARK(BM_Open)->Range(1, 128)->UseRealTime();
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/fuse/benchmark/read_benchmark.cc b/test/fuse/benchmark/read_benchmark.cc
new file mode 100644
index 0000000000..2106b7d5a3
--- /dev/null
+++ b/test/fuse/benchmark/read_benchmark.cc
@@ -0,0 +1,57 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <fcntl.h>
+#include <stdlib.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "benchmark/benchmark.h"
+#include "gtest/gtest.h"
+#include "test/util/fs_util.h"
+#include "test/util/logging.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+void BM_Read(benchmark::State& state) {
+  const char* fuse_prefix = getenv("TEST_FUSEPRE");
+  ASSERT_NE(fuse_prefix, nullptr);
+
+  const int size = state.range(0);
+  const std::string contents(size, 0);
+  auto path = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+      GetAbsoluteTestTmpdir(), contents, TempPath::kDefaultFileMode));
+  FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(
+      Open(JoinPath(fuse_prefix, path.path()), O_RDONLY));
+
+  std::vector<char> buf(size);
+  for (auto _ : state) {
+    TEST_CHECK(PreadFd(fd.get(), buf.data(), buf.size(), 0) == size);
+  }
+
+  state.SetBytesProcessed(static_cast<int64_t>(size) *
+                          static_cast<int64_t>(state.iterations()));
+}
+
+BENCHMARK(BM_Read)->Range(1, 1 << 26)->UseRealTime();
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/fuse/benchmark/stat_benchmark.cc b/test/fuse/benchmark/stat_benchmark.cc
new file mode 100644
index 0000000000..d2ab6a7061
--- /dev/null
+++ b/test/fuse/benchmark/stat_benchmark.cc
@@ -0,0 +1,65 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "gtest/gtest.h"
+#include "absl/strings/str_cat.h"
+#include "benchmark/benchmark.h"
+#include "test/util/fs_util.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+// Creates a file in a nested directory hierarchy at least `depth` directories
+// deep, and stats that file multiple times.
+void BM_Stat(benchmark::State& state) {
+  const char* fuse_prefix = getenv("TEST_FUSEPRE");
+  ASSERT_NE(fuse_prefix, nullptr);
+
+  // Create nested directories with given depth.
+  int depth = state.range(0);
+  const TempPath top_dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  std::string dir_path = top_dir.path();
+
+  while (depth-- > 0) {
+    // Don't use TempPath because it will make paths too long to use.
+    //
+    // The top_dir destructor will clean up this whole tree.
+    dir_path = JoinPath(dir_path, absl::StrCat(depth));
+    ASSERT_NO_ERRNO(Mkdir(dir_path, 0755));
+  }
+
+  // Create the file that will be stat'd.
+  const TempPath file =
+      ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(dir_path));
+  std::string file_path = JoinPath(fuse_prefix, file.path());
+  struct stat st;
+  for (auto _ : state) {
+    ASSERT_THAT(stat(file_path.c_str(), &st), SyscallSucceeds());
+  }
+}
+
+BENCHMARK(BM_Stat)->Range(1, 100)->UseRealTime();
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/fuse/benchmark/symlink_benchmark.cc b/test/fuse/benchmark/symlink_benchmark.cc
new file mode 100644
index 0000000000..363b9a9760
--- /dev/null
+++ b/test/fuse/benchmark/symlink_benchmark.cc
@@ -0,0 +1,60 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "absl/strings/str_cat.h"
+#include "benchmark/benchmark.h"
+#include "gtest/gtest.h"
+#include "test/util/fs_util.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+void BM_Symlink(benchmark::State& state) {
+  char* fuse_prefix = getenv("TEST_FUSEPRE");
+  ASSERT_NE(fuse_prefix, nullptr);
+  const TempPath top_dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  std::string dir_path = top_dir.path();
+
+  const int size = state.range(0);
+  std::vector<TempPath> cache;
+  for (int i = 0; i < size; i++) {
+    auto path = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+    cache.emplace_back(std::move(path));
+  }
+
+  int index = 0;
+  unsigned int seed = 1;
+  for (auto t : state) {
+    const int chosen = rand_r(&seed) % size;
+    const std::string symlink_path = absl::StrCat(fuse_prefix, dir_path, index);
+    ASSERT_THAT(symlink(cache[chosen].path().c_str(), symlink_path.c_str()),
+                SyscallSucceeds());
+    index++;
+  }
+}
+
+BENCHMARK(BM_Symlink)->Range(1, 128)->UseRealTime();
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/image/BUILD b/test/image/BUILD
index e749e47d48..e270c52acb 100644
--- a/test/image/BUILD
+++ b/test/image/BUILD
@@ -24,6 +24,7 @@ go_test(
     deps = [
         "//pkg/test/dockerutil",
         "//pkg/test/testutil",
+        "@com_github_docker_docker//api/types/mount:go_default_library",
     ],
 )
 
diff --git a/test/image/image_test.go b/test/image/image_test.go
index ac6186688a..6b5928ef07 100644
--- a/test/image/image_test.go
+++ b/test/image/image_test.go
@@ -33,6 +33,7 @@ import (
 	"testing"
 	"time"
 
+	"github.com/docker/docker/api/types/mount"
 	"gvisor.dev/gvisor/pkg/test/dockerutil"
 	"gvisor.dev/gvisor/pkg/test/testutil"
 )
@@ -63,6 +64,57 @@ func TestHelloWorld(t *testing.T) {
 	}
 }
 
+// Test that the FUSE container is set up and being used properly.
+func TestFUSEInContainer(t *testing.T) {
+	if usingFUSE, err := dockerutil.UsingFUSE(); err != nil {
+		t.Fatalf("failed to read config for runtime %s: %v", dockerutil.Runtime(), err)
+	} else if !usingFUSE {
+		t.Skip("FUSE not being used.")
+	}
+
+	ctx := context.Background()
+	d := dockerutil.MakeContainer(ctx, t)
+	defer d.CleanUp(ctx)
+
+	tmpDir := "/tmpDir/"
+	// Run the basic container.
+	err := d.Spawn(ctx, dockerutil.RunOpts{
+		Image:      "basic/fuse",
+		Privileged: true,
+		CapAdd:     []string{"CAP_SYS_ADMIN"},
+
+		// Mount a tmpfs directory for benchmark.
+		Mounts: []mount.Mount{
+			{
+				Type:     mount.TypeTmpfs,
+				Target:   tmpDir,
+				ReadOnly: false,
+			},
+		},
+	}, "sleep", "1000")
+	if err != nil {
+		t.Fatalf("docker spawn failed: %v", err)
+	}
+
+	out, err := d.Exec(ctx, dockerutil.ExecOpts{
+		Privileged: true,
+	}, "/bin/sh", "-c", "ls")
+	if err != nil {
+		t.Fatalf("docker exec failed: %v, message %s", err, out)
+	}
+	if !strings.Contains(out, "server-bin") {
+		t.Fatalf("docker didn't find server binary: got %s", out)
+	}
+
+	// Run the server.
+	out, err = d.Exec(ctx, dockerutil.ExecOpts{
+		Privileged: true,
+	}, "/bin/sh", "-c", "./server-bin mountpoint")
+	if err != nil {
+		t.Fatalf("docker exec failed: %v, message %s", err, out)
+	}
+}
+
 func runHTTPRequest(port int) error {
 	url := fmt.Sprintf("http://localhost:%d/not-found", port)
 	resp, err := http.Get(url)
diff --git a/test/runner/BUILD b/test/runner/BUILD
index 582d2946d1..049c260816 100644
--- a/test/runner/BUILD
+++ b/test/runner/BUILD
@@ -11,11 +11,14 @@ go_binary(
     ],
     visibility = ["//:sandbox"],
     deps = [
+        "//pkg/context",
         "//pkg/log",
+        "//pkg/test/dockerutil",
         "//pkg/test/testutil",
         "//runsc/specutils",
         "//test/runner/gtest",
         "//test/uds",
+        "@com_github_docker_docker//api/types/mount:go_default_library",
         "@com_github_opencontainers_runtime_spec//specs-go:go_default_library",
         "@com_github_syndtr_gocapability//capability:go_default_library",
         "@org_golang_x_sys//unix:go_default_library",
diff --git a/test/runner/defs.bzl b/test/runner/defs.bzl
index 032ebd04e3..9dc955c77b 100644
--- a/test/runner/defs.bzl
+++ b/test/runner/defs.bzl
@@ -57,6 +57,8 @@ def _syscall_test(
         platform,
         use_tmpfs,
         tags,
+        use_image = "",
+        setup_command = "",
         network = "none",
         file_access = "exclusive",
         overlay = False,
@@ -79,6 +81,8 @@ def _syscall_test(
             name += "_fuse"
     if network != "none":
         name += "_" + network + "net"
+    if use_image != "":
+        name += "_container"
 
     # Apply all tags.
     if tags == None:
@@ -107,6 +111,8 @@ def _syscall_test(
         "--platform=" + platform,
         "--network=" + network,
         "--use-tmpfs=" + str(use_tmpfs),
+        "--use-image=" + use_image,
+        "--setup-command=" + setup_command,
         "--file-access=" + file_access,
         "--overlay=" + str(overlay),
         "--add-uds-tree=" + str(add_uds_tree),
@@ -132,6 +138,8 @@ def syscall_test(
         shard_count = 5,
         size = "small",
         use_tmpfs = False,
+        use_image = "",
+        setup_command = "",
         add_overlay = False,
         add_uds_tree = False,
         add_hostinet = False,
@@ -146,6 +154,8 @@ def syscall_test(
       shard_count: shards for defined tests.
       size: the defined test size.
       use_tmpfs: use tmpfs in the defined tests.
+      use_image: use specified docker image in the defined tests.
+      setup_command: command to set up the docker container. Should be used when ise_image is.
       add_overlay: add an overlay test.
       add_uds_tree: add a UDS test.
       add_hostinet: add a hostinet test.
@@ -178,8 +188,26 @@ def syscall_test(
         vfs2 = True,
         fuse = fuse,
     )
+
+    if use_image != "":
+        # Run the test in the container specified.
+        _syscall_test(
+            test = test,
+            shard_count = shard_count,
+            size = size,
+            platform = default_platform,
+            use_tmpfs = use_tmpfs,
+            use_image = use_image,
+            setup_command = setup_command,
+            add_uds_tree = add_uds_tree,
+            tags = platforms[default_platform] + vfs2_tags,
+            vfs2 = True,
+            fuse = True,
+        )
+
     if fuse:
         # Only generate *_vfs2_fuse target if fuse parameter is enabled.
+        # The rest of the targets don't support FUSE as of yet.
         return
 
     _syscall_test(
diff --git a/test/runner/runner.go b/test/runner/runner.go
index 22d535f8dc..fe501c4c78 100644
--- a/test/runner/runner.go
+++ b/test/runner/runner.go
@@ -23,16 +23,20 @@ import (
 	"os"
 	"os/exec"
 	"os/signal"
+	"path"
 	"path/filepath"
 	"strings"
 	"syscall"
 	"testing"
 	"time"
 
+	"github.com/docker/docker/api/types/mount"
 	specs "github.com/opencontainers/runtime-spec/specs-go"
 	"github.com/syndtr/gocapability/capability"
 	"golang.org/x/sys/unix"
+	"gvisor.dev/gvisor/pkg/context"
 	"gvisor.dev/gvisor/pkg/log"
+	"gvisor.dev/gvisor/pkg/test/dockerutil"
 	"gvisor.dev/gvisor/pkg/test/testutil"
 	"gvisor.dev/gvisor/runsc/specutils"
 	"gvisor.dev/gvisor/test/runner/gtest"
@@ -40,17 +44,19 @@ import (
 )
 
 var (
-	debug      = flag.Bool("debug", false, "enable debug logs")
-	strace     = flag.Bool("strace", false, "enable strace logs")
-	platform   = flag.String("platform", "ptrace", "platform to run on")
-	network    = flag.String("network", "none", "network stack to run on (sandbox, host, none)")
-	useTmpfs   = flag.Bool("use-tmpfs", false, "mounts tmpfs for /tmp")
-	fileAccess = flag.String("file-access", "exclusive", "mounts root in exclusive or shared mode")
-	overlay    = flag.Bool("overlay", false, "wrap filesystem mounts with writable tmpfs overlay")
-	vfs2       = flag.Bool("vfs2", false, "enable VFS2")
-	fuse       = flag.Bool("fuse", false, "enable FUSE")
-	parallel   = flag.Bool("parallel", false, "run tests in parallel")
-	runscPath  = flag.String("runsc", "", "path to runsc binary")
+	debug        = flag.Bool("debug", false, "enable debug logs")
+	strace       = flag.Bool("strace", false, "enable strace logs")
+	platform     = flag.String("platform", "ptrace", "platform to run on")
+	network      = flag.String("network", "none", "network stack to run on (sandbox, host, none)")
+	useTmpfs     = flag.Bool("use-tmpfs", false, "mounts tmpfs for /tmp")
+	useImage     = flag.String("use-image", "", "container image to use for test. Path relative to //images")
+	setupCommand = flag.String("setup-command", "", "command to run before running the test to set up container environment")
+	fileAccess   = flag.String("file-access", "exclusive", "mounts root in exclusive or shared mode")
+	overlay      = flag.Bool("overlay", false, "wrap filesystem mounts with writable tmpfs overlay")
+	vfs2         = flag.Bool("vfs2", false, "enable VFS2")
+	fuse         = flag.Bool("fuse", false, "enable FUSE")
+	parallel     = flag.Bool("parallel", false, "run tests in parallel")
+	runscPath    = flag.String("runsc", "", "path to runsc binary")
 
 	addUDSTree = flag.Bool("add-uds-tree", false, "expose a tree of UDS utilities for use in tests")
 )
@@ -313,8 +319,92 @@ func setupUDSTree(spec *specs.Spec) (cleanup func(), err error) {
 	return cleanup, nil
 }
 
+func runTestCaseInContainer(testBin string, tc gtest.TestCase, image string, t *testing.T) {
+	if usingFUSE, err := dockerutil.UsingFUSE(); err != nil {
+		t.Fatalf("failed to read config for runtime %s: %v", dockerutil.Runtime(), err)
+	} else if !usingFUSE {
+		t.Skip("FUSE not being used.")
+	}
+
+	ctx := context.Background()
+	d := dockerutil.MakeContainer(ctx, t)
+	defer d.CleanUp(ctx)
+
+	// Run the basic container.
+	tmpDir := "/tmpDir"
+	testBinDir := "/testDir"
+	opts := dockerutil.RunOpts{
+		Image:      image,
+		Privileged: true,
+		CapAdd:     []string{"CAP_SYS_ADMIN"},
+
+		// Mount a tmpfs directory to use when benchmarking.
+		Mounts: []mount.Mount{
+			{
+				Type:     mount.TypeTmpfs,
+				Target:   tmpDir,
+				ReadOnly: false,
+			},
+		},
+		Env: []string{
+			fmt.Sprintf("TEST_TMPDIR=%s", tmpDir),
+			fmt.Sprintf("TEST_FUSEPRE=%s", "/fus/mountpoint"),
+		},
+	}
+	wd, err := os.Getwd()
+	if err != nil {
+		t.Fatalf("Getwd run failed: %v", err)
+	}
+
+	wdPathToTestBin := strings.TrimPrefix(testBin, wd)
+	containerTestBin := path.Join(testBinDir, path.Base(wdPathToTestBin))
+	d.CopyFiles(&opts, testBinDir, wdPathToTestBin)
+	if err := d.CopyErr(); err != nil {
+		t.Fatalf("Copy failed %v", err)
+	}
+
+	err = d.Spawn(ctx, opts, "sleep", "1000")
+	if err != nil {
+		t.Fatalf("docker run failed: %v", err)
+	}
+
+	// Run the server setup command.
+	if *setupCommand != "" {
+		out, err := d.Exec(ctx, dockerutil.ExecOpts{
+			Privileged: true,
+		}, "/bin/sh", "-c", *setupCommand)
+		if err != nil {
+			t.Fatalf("docker exec failed: %v with output %v", err, out)
+		}
+	}
+
+	cmd := "chmod +x " + containerTestBin
+	out, err := d.Exec(ctx, dockerutil.ExecOpts{
+		Privileged: true,
+	}, "/bin/sh", "-c", cmd)
+	if err != nil {
+		t.Fatalf("docker exec failed: %v with output %v", err, out)
+	}
+
+	cmd = containerTestBin + " " + strings.Join(tc.Args(), " ")
+	out, err = d.Exec(ctx, dockerutil.ExecOpts{
+		Privileged: true,
+	}, "/bin/sh", "-c", cmd)
+	if err != nil {
+		t.Fatalf("docker exec failed: %v with output %v", err, out)
+	}
+
+	fmt.Print(out)
+	return
+}
+
 // runsTestCaseRunsc runs the test case in runsc.
 func runTestCaseRunsc(testBin string, tc gtest.TestCase, t *testing.T) {
+	if *useImage != "" {
+		runTestCaseInContainer(testBin, tc, *useImage, t)
+		return
+	}
+
 	// Run a new container with the test executable and filter for the
 	// given test suite and name.
 	spec := testutil.NewSpecWithArgs(append([]string{testBin}, tc.Args()...)...)

From 97d51984586f2ea9a6a0eb718af1cc85dabf1fea Mon Sep 17 00:00:00 2001
From: Craig Chi <craig08@users.noreply.github.com>
Date: Wed, 2 Sep 2020 13:30:56 -0700
Subject: [PATCH 187/211] fuse: remove unused marshalling functions

This commit removes unused marshalling functions in linux abi package
and moves self-defined FUSEInitRes wrapper to fuse package.

Updates #3707
---
 pkg/abi/linux/fuse.go                      | 175 ---------------------
 pkg/sentry/fsimpl/fuse/BUILD               |   1 +
 pkg/sentry/fsimpl/fuse/init.go             |   4 +-
 pkg/sentry/fsimpl/fuse/request_response.go |  78 +++++++++
 4 files changed, 81 insertions(+), 177 deletions(-)
 create mode 100644 pkg/sentry/fsimpl/fuse/request_response.go

diff --git a/pkg/abi/linux/fuse.go b/pkg/abi/linux/fuse.go
index ca4ee5e80f..5616290a5c 100644
--- a/pkg/abi/linux/fuse.go
+++ b/pkg/abi/linux/fuse.go
@@ -15,7 +15,6 @@
 package linux
 
 import (
-	"gvisor.dev/gvisor/pkg/usermem"
 	"gvisor.dev/gvisor/tools/go_marshal/marshal"
 	"gvisor.dev/gvisor/tools/go_marshal/primitive"
 )
@@ -263,63 +262,6 @@ type FUSEInitOut struct {
 	_ [8]uint32
 }
 
-// FUSEInitRes is a variable-length wrapper of FUSEInitOut. The FUSE server may
-// implement older version of FUSE protocol, which contains a FUSEInitOut with
-// less attributes.
-//
-// Dynamically-sized objects cannot be marshalled.
-type FUSEInitRes struct {
-	marshal.StubMarshallable
-
-	// InitOut contains the response from the FUSE server.
-	InitOut FUSEInitOut
-
-	// Len is the total length of bytes of the response.
-	Len uint32
-}
-
-// UnMarshalBytes deserializes src to the InitOut attribute in a FUSEInitRes.
-func (r *FUSEInitRes) UnmarshalBytes(src []byte) {
-	out := &r.InitOut
-
-	// Introduced before FUSE kernel version 7.13.
-	out.Major = uint32(usermem.ByteOrder.Uint32(src[:4]))
-	src = src[4:]
-	out.Minor = uint32(usermem.ByteOrder.Uint32(src[:4]))
-	src = src[4:]
-	out.MaxReadahead = uint32(usermem.ByteOrder.Uint32(src[:4]))
-	src = src[4:]
-	out.Flags = uint32(usermem.ByteOrder.Uint32(src[:4]))
-	src = src[4:]
-	out.MaxBackground = uint16(usermem.ByteOrder.Uint16(src[:2]))
-	src = src[2:]
-	out.CongestionThreshold = uint16(usermem.ByteOrder.Uint16(src[:2]))
-	src = src[2:]
-	out.MaxWrite = uint32(usermem.ByteOrder.Uint32(src[:4]))
-	src = src[4:]
-
-	// Introduced in FUSE kernel version 7.23.
-	if len(src) >= 4 {
-		out.TimeGran = uint32(usermem.ByteOrder.Uint32(src[:4]))
-		src = src[4:]
-	}
-	// Introduced in FUSE kernel version 7.28.
-	if len(src) >= 2 {
-		out.MaxPages = uint16(usermem.ByteOrder.Uint16(src[:2]))
-		src = src[2:]
-	}
-	// Introduced in FUSE kernel version 7.31.
-	if len(src) >= 2 {
-		out.MapAlignment = uint16(usermem.ByteOrder.Uint16(src[:2]))
-		src = src[2:]
-	}
-}
-
-// SizeBytes is the size of the payload of the FUSE_INIT response.
-func (r *FUSEInitRes) SizeBytes() int {
-	return int(r.Len)
-}
-
 // FUSEGetAttrIn is the request sent by the kernel to the daemon,
 // to get the attribute of a inode.
 //
@@ -415,11 +357,6 @@ type FUSELookupIn struct {
 	Name string
 }
 
-// MarshalUnsafe serializes r.name to the dst buffer.
-func (r *FUSELookupIn) MarshalUnsafe(buf []byte) {
-	copy(buf, r.Name)
-}
-
 // MarshalBytes serializes r.name to the dst buffer.
 func (r *FUSELookupIn) MarshalBytes(buf []byte) {
 	copy(buf, r.Name)
@@ -548,12 +485,6 @@ type FUSEMknodIn struct {
 	Name string
 }
 
-// MarshalUnsafe serializes r.MknodMeta and r.Name to the dst buffer.
-func (r *FUSEMknodIn) MarshalUnsafe(buf []byte) {
-	r.MknodMeta.MarshalUnsafe(buf[:r.MknodMeta.SizeBytes()])
-	copy(buf[r.MknodMeta.SizeBytes():], r.Name)
-}
-
 // MarshalBytes serializes r.MknodMeta and r.Name to the dst buffer.
 func (r *FUSEMknodIn) MarshalBytes(buf []byte) {
 	r.MknodMeta.MarshalBytes(buf[:r.MknodMeta.SizeBytes()])
@@ -580,13 +511,6 @@ type FUSESymLinkIn struct {
 	Target string
 }
 
-// MarshalUnsafe serializes r.Name and r.Target to the dst buffer.
-// Left null-termination at end of r.Name and r.Target.
-func (r *FUSESymLinkIn) MarshalUnsafe(buf []byte) {
-	copy(buf, r.Name)
-	copy(buf[len(r.Name)+1:], r.Target)
-}
-
 // MarshalBytes serializes r.Name and r.Target to the dst buffer.
 // Left null-termination at end of r.Name and r.Target.
 func (r *FUSESymLinkIn) MarshalBytes(buf []byte) {
@@ -603,9 +527,6 @@ func (r *FUSESymLinkIn) SizeBytes() int {
 // FUSEEmptyIn is used by operations without request body.
 type FUSEEmptyIn struct{ marshal.StubMarshallable }
 
-// MarshalUnsafe do nothing for marshal.
-func (r *FUSEEmptyIn) MarshalUnsafe(buf []byte) {}
-
 // MarshalBytes do nothing for marshal.
 func (r *FUSEEmptyIn) MarshalBytes(buf []byte) {}
 
@@ -640,12 +561,6 @@ type FUSEMkdirIn struct {
 	Name string
 }
 
-// MarshalUnsafe serializes r.MkdirMeta and r.Name to the dst buffer.
-func (r *FUSEMkdirIn) MarshalUnsafe(buf []byte) {
-	r.MkdirMeta.MarshalUnsafe(buf[:r.MkdirMeta.SizeBytes()])
-	copy(buf[r.MkdirMeta.SizeBytes():], r.Name)
-}
-
 // MarshalBytes serializes r.MkdirMeta and r.Name to the dst buffer.
 func (r *FUSEMkdirIn) MarshalBytes(buf []byte) {
 	r.MkdirMeta.MarshalBytes(buf[:r.MkdirMeta.SizeBytes()])
@@ -669,11 +584,6 @@ type FUSERmDirIn struct {
 	Name string
 }
 
-// MarshalUnsafe serializes r.name to the dst buffer.
-func (r *FUSERmDirIn) MarshalUnsafe(buf []byte) {
-	copy(buf, r.Name)
-}
-
 // MarshalBytes serializes r.name to the dst buffer.
 func (r *FUSERmDirIn) MarshalBytes(buf []byte) {
 	copy(buf, r.Name)
@@ -684,16 +594,6 @@ func (r *FUSERmDirIn) SizeBytes() int {
 	return len(r.Name) + 1
 }
 
-// UnmarshalUnsafe deserializes r.name from the src buffer.
-func (r *FUSERmDirIn) UnmarshalUnsafe(src []byte) {
-	r.Name = string(src)
-}
-
-// UnmarshalBytes deserializes r.name from the src buffer.
-func (r *FUSERmDirIn) UnmarshalBytes(src []byte) {
-	r.Name = string(src)
-}
-
 // FUSEDirents is a list of Dirents received from the FUSE daemon server.
 // It is used for FUSE_READDIR.
 //
@@ -736,22 +636,6 @@ type FUSEDirentMeta struct {
 	Type uint32
 }
 
-// MarshalUnsafe serializes FUSEDirents to the dst buffer.
-func (r *FUSEDirents) MarshalUnsafe(dst []byte) {
-	for _, dirent := range r.Dirents {
-		dirent.MarshalUnsafe(dst)
-		dst = dst[dirent.SizeBytes():]
-	}
-}
-
-// MarshalBytes serializes FUSEDirents to the dst buffer.
-func (r *FUSEDirents) MarshalBytes(dst []byte) {
-	for _, dirent := range r.Dirents {
-		dirent.MarshalBytes(dst)
-		dst = dst[dirent.SizeBytes():]
-	}
-}
-
 // SizeBytes is the size of the memory representation of FUSEDirents.
 func (r *FUSEDirents) SizeBytes() int {
 	var sizeBytes int
@@ -762,30 +646,6 @@ func (r *FUSEDirents) SizeBytes() int {
 	return sizeBytes
 }
 
-// UnmarshalUnsafe deserializes FUSEDirents from the src buffer.
-func (r *FUSEDirents) UnmarshalUnsafe(src []byte) {
-	for {
-		if len(src) <= (*FUSEDirentMeta)(nil).SizeBytes() {
-			break
-		}
-
-		// Its unclear how many dirents there are in src. Each dirent is dynamically
-		// sized and so we can't make assumptions about how many dirents we can allocate.
-		if r.Dirents == nil {
-			r.Dirents = make([]*FUSEDirent, 0)
-		}
-
-		// We have to allocate a struct for each dirent - there must be a better way
-		// to do this. Linux allocates 1 page to store all the dirents and then
-		// simply reads them from the page.
-		var dirent FUSEDirent
-		dirent.UnmarshalUnsafe(src)
-		r.Dirents = append(r.Dirents, &dirent)
-
-		src = src[dirent.SizeBytes():]
-	}
-}
-
 // UnmarshalBytes deserializes FUSEDirents from the src buffer.
 func (r *FUSEDirents) UnmarshalBytes(src []byte) {
 	for {
@@ -810,24 +670,6 @@ func (r *FUSEDirents) UnmarshalBytes(src []byte) {
 	}
 }
 
-// MarshalUnsafe serializes FUSEDirent to the dst buffer.
-func (r *FUSEDirent) MarshalUnsafe(dst []byte) {
-	r.Meta.MarshalUnsafe(dst)
-	dst = dst[r.Meta.SizeBytes():]
-
-	name := primitive.ByteSlice(r.Name)
-	name.MarshalUnsafe(dst)
-}
-
-// MarshalBytes serializes FUSEDirent to the dst buffer.
-func (r *FUSEDirent) MarshalBytes(dst []byte) {
-	r.Meta.MarshalBytes(dst)
-	dst = dst[r.Meta.SizeBytes():]
-
-	name := primitive.ByteSlice(r.Name)
-	name.MarshalBytes(dst)
-}
-
 // SizeBytes is the size of the memory representation of FUSEDirent.
 func (r *FUSEDirent) SizeBytes() int {
 	dataSize := r.Meta.SizeBytes() + len(r.Name)
@@ -838,23 +680,6 @@ func (r *FUSEDirent) SizeBytes() int {
 	return (dataSize + (FUSE_DIRENT_ALIGN - 1)) & ^(FUSE_DIRENT_ALIGN - 1)
 }
 
-// UnmarshalUnsafe deserializes FUSEDirent from the src buffer.
-func (r *FUSEDirent) UnmarshalUnsafe(src []byte) {
-	r.Meta.UnmarshalUnsafe(src)
-	src = src[r.Meta.SizeBytes():]
-
-	if r.Meta.NameLen > FUSE_NAME_MAX {
-		// The name is too long and therefore invalid. We don't
-		// need to unmarshal the name since it'll be thrown away.
-		return
-	}
-
-	buf := make([]byte, r.Meta.NameLen)
-	name := primitive.ByteSlice(buf)
-	name.UnmarshalUnsafe(src[:r.Meta.NameLen])
-	r.Name = string(name)
-}
-
 // UnmarshalBytes deserializes FUSEDirent from the src buffer.
 func (r *FUSEDirent) UnmarshalBytes(src []byte) {
 	r.Meta.UnmarshalBytes(src)
diff --git a/pkg/sentry/fsimpl/fuse/BUILD b/pkg/sentry/fsimpl/fuse/BUILD
index a6ee6100d5..86dc6a7a41 100644
--- a/pkg/sentry/fsimpl/fuse/BUILD
+++ b/pkg/sentry/fsimpl/fuse/BUILD
@@ -40,6 +40,7 @@ go_library(
         "register.go",
         "regular_file.go",
         "request_list.go",
+        "request_response.go",
     ],
     visibility = ["//pkg/sentry:internal"],
     deps = [
diff --git a/pkg/sentry/fsimpl/fuse/init.go b/pkg/sentry/fsimpl/fuse/init.go
index 6384cbbdba..256b6fb652 100644
--- a/pkg/sentry/fsimpl/fuse/init.go
+++ b/pkg/sentry/fsimpl/fuse/init.go
@@ -76,12 +76,12 @@ func (conn *connection) InitRecv(res *Response, hasSysAdminCap bool) error {
 		return err
 	}
 
-	initRes := linux.FUSEInitRes{Len: res.DataLen()}
+	initRes := fuseInitRes{initLen: res.DataLen()}
 	if err := res.UnmarshalPayload(&initRes); err != nil {
 		return err
 	}
 
-	return conn.initProcessReply(&initRes.InitOut, hasSysAdminCap)
+	return conn.initProcessReply(&initRes.initOut, hasSysAdminCap)
 }
 
 // Process the FUSE_INIT reply from the FUSE server.
diff --git a/pkg/sentry/fsimpl/fuse/request_response.go b/pkg/sentry/fsimpl/fuse/request_response.go
new file mode 100644
index 0000000000..ae71b5e28b
--- /dev/null
+++ b/pkg/sentry/fsimpl/fuse/request_response.go
@@ -0,0 +1,78 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package fuse
+
+import (
+	"gvisor.dev/gvisor/pkg/abi/linux"
+	"gvisor.dev/gvisor/pkg/usermem"
+	"gvisor.dev/gvisor/tools/go_marshal/marshal"
+)
+
+// fuseInitRes is a variable-length wrapper of linux.FUSEInitOut. The FUSE
+// server may implement an older version of FUSE protocol, which contains a
+// linux.FUSEInitOut with less attributes.
+//
+// Dynamically-sized objects cannot be marshalled.
+type fuseInitRes struct {
+	marshal.StubMarshallable
+
+	// initOut contains the response from the FUSE server.
+	initOut linux.FUSEInitOut
+
+	// initLen is the total length of bytes of the response.
+	initLen uint32
+}
+
+// UnmarshalBytes deserializes src to the initOut attribute in a fuseInitRes.
+func (r *fuseInitRes) UnmarshalBytes(src []byte) {
+	out := &r.initOut
+
+	// Introduced before FUSE kernel version 7.13.
+	out.Major = uint32(usermem.ByteOrder.Uint32(src[:4]))
+	src = src[4:]
+	out.Minor = uint32(usermem.ByteOrder.Uint32(src[:4]))
+	src = src[4:]
+	out.MaxReadahead = uint32(usermem.ByteOrder.Uint32(src[:4]))
+	src = src[4:]
+	out.Flags = uint32(usermem.ByteOrder.Uint32(src[:4]))
+	src = src[4:]
+	out.MaxBackground = uint16(usermem.ByteOrder.Uint16(src[:2]))
+	src = src[2:]
+	out.CongestionThreshold = uint16(usermem.ByteOrder.Uint16(src[:2]))
+	src = src[2:]
+	out.MaxWrite = uint32(usermem.ByteOrder.Uint32(src[:4]))
+	src = src[4:]
+
+	// Introduced in FUSE kernel version 7.23.
+	if len(src) >= 4 {
+		out.TimeGran = uint32(usermem.ByteOrder.Uint32(src[:4]))
+		src = src[4:]
+	}
+	// Introduced in FUSE kernel version 7.28.
+	if len(src) >= 2 {
+		out.MaxPages = uint16(usermem.ByteOrder.Uint16(src[:2]))
+		src = src[2:]
+	}
+	// Introduced in FUSE kernel version 7.31.
+	if len(src) >= 2 {
+		out.MapAlignment = uint16(usermem.ByteOrder.Uint16(src[:2]))
+		src = src[2:]
+	}
+}
+
+// SizeBytes is the size of the payload of the FUSE_INIT response.
+func (r *fuseInitRes) SizeBytes() int {
+	return int(r.initLen)
+}

From 8554fda1b86e1ae430f155c41a820e6ce6632057 Mon Sep 17 00:00:00 2001
From: jinmouil <67118279+jinmouil@users.noreply.github.com>
Date: Wed, 2 Sep 2020 13:50:31 -0700
Subject: [PATCH 188/211] Downgrade FUSE minor version support and clarify
 comments

---
 pkg/abi/linux/fuse.go                      | 53 +++++++++-------------
 pkg/sentry/fsimpl/fuse/connection.go       | 20 ++++----
 pkg/sentry/fsimpl/fuse/init.go             |  2 -
 pkg/sentry/fsimpl/fuse/request_response.go |  5 --
 4 files changed, 31 insertions(+), 49 deletions(-)

diff --git a/pkg/abi/linux/fuse.go b/pkg/abi/linux/fuse.go
index 5616290a5c..ed40df564d 100644
--- a/pkg/abi/linux/fuse.go
+++ b/pkg/abi/linux/fuse.go
@@ -151,34 +151,27 @@ type FUSEWriteIn struct {
 }
 
 // FUSE_INIT flags, consistent with the ones in include/uapi/linux/fuse.h.
+// Our taget version is 7.23 but we have few implemented in advance.
 const (
-	FUSE_ASYNC_READ          = 1 << 0
-	FUSE_POSIX_LOCKS         = 1 << 1
-	FUSE_FILE_OPS            = 1 << 2
-	FUSE_ATOMIC_O_TRUNC      = 1 << 3
-	FUSE_EXPORT_SUPPORT      = 1 << 4
-	FUSE_BIG_WRITES          = 1 << 5
-	FUSE_DONT_MASK           = 1 << 6
-	FUSE_SPLICE_WRITE        = 1 << 7
-	FUSE_SPLICE_MOVE         = 1 << 8
-	FUSE_SPLICE_READ         = 1 << 9
-	FUSE_FLOCK_LOCKS         = 1 << 10
-	FUSE_HAS_IOCTL_DIR       = 1 << 11
-	FUSE_AUTO_INVAL_DATA     = 1 << 12
-	FUSE_DO_READDIRPLUS      = 1 << 13
-	FUSE_READDIRPLUS_AUTO    = 1 << 14
-	FUSE_ASYNC_DIO           = 1 << 15
-	FUSE_WRITEBACK_CACHE     = 1 << 16
-	FUSE_NO_OPEN_SUPPORT     = 1 << 17
-	FUSE_PARALLEL_DIROPS     = 1 << 18
-	FUSE_HANDLE_KILLPRIV     = 1 << 19
-	FUSE_POSIX_ACL           = 1 << 20
-	FUSE_ABORT_ERROR         = 1 << 21
-	FUSE_MAX_PAGES           = 1 << 22
-	FUSE_CACHE_SYMLINKS      = 1 << 23
-	FUSE_NO_OPENDIR_SUPPORT  = 1 << 24
-	FUSE_EXPLICIT_INVAL_DATA = 1 << 25
-	FUSE_MAP_ALIGNMENT       = 1 << 26
+	FUSE_ASYNC_READ       = 1 << 0
+	FUSE_POSIX_LOCKS      = 1 << 1
+	FUSE_FILE_OPS         = 1 << 2
+	FUSE_ATOMIC_O_TRUNC   = 1 << 3
+	FUSE_EXPORT_SUPPORT   = 1 << 4
+	FUSE_BIG_WRITES       = 1 << 5
+	FUSE_DONT_MASK        = 1 << 6
+	FUSE_SPLICE_WRITE     = 1 << 7
+	FUSE_SPLICE_MOVE      = 1 << 8
+	FUSE_SPLICE_READ      = 1 << 9
+	FUSE_FLOCK_LOCKS      = 1 << 10
+	FUSE_HAS_IOCTL_DIR    = 1 << 11
+	FUSE_AUTO_INVAL_DATA  = 1 << 12
+	FUSE_DO_READDIRPLUS   = 1 << 13
+	FUSE_READDIRPLUS_AUTO = 1 << 14
+	FUSE_ASYNC_DIO        = 1 << 15
+	FUSE_WRITEBACK_CACHE  = 1 << 16
+	FUSE_NO_OPEN_SUPPORT  = 1 << 17
+	FUSE_MAX_PAGES        = 1 << 22 // From FUSE 7.28
 )
 
 // currently supported FUSE protocol version numbers.
@@ -214,7 +207,7 @@ type FUSEInitIn struct {
 }
 
 // FUSEInitOut is the reply sent by the daemon to the kernel
-// for FUSEInitIn.
+// for FUSEInitIn. We target FUSE 7.23; this struct supports 7.28.
 //
 // +marshal
 type FUSEInitOut struct {
@@ -255,9 +248,7 @@ type FUSEInitOut struct {
 	// if the value from daemon is too large.
 	MaxPages uint16
 
-	// MapAlignment is an unknown field and not used by this package at this moment.
-	// Use as a placeholder to be consistent with the FUSE protocol.
-	MapAlignment uint16
+	_ uint16
 
 	_ [8]uint32
 }
diff --git a/pkg/sentry/fsimpl/fuse/connection.go b/pkg/sentry/fsimpl/fuse/connection.go
index 0e91bb18e3..c6e064f700 100644
--- a/pkg/sentry/fsimpl/fuse/connection.go
+++ b/pkg/sentry/fsimpl/fuse/connection.go
@@ -85,16 +85,22 @@ type connection struct {
 	// attributeVersion is the version of connection's attributes.
 	attributeVersion uint64
 
+	// We target FUSE 7.23.
 	// The following FUSE_INIT flags are currently unsupported by this implementation:
 	// - FUSE_EXPORT_SUPPORT
-	// - FUSE_HANDLE_KILLPRIV
 	// - FUSE_POSIX_LOCKS: requires POSIX locks
 	// - FUSE_FLOCK_LOCKS: requires POSIX locks
 	// - FUSE_AUTO_INVAL_DATA: requires page caching eviction
-	// - FUSE_EXPLICIT_INVAL_DATA: requires page caching eviction
 	// - FUSE_DO_READDIRPLUS/FUSE_READDIRPLUS_AUTO: requires FUSE_READDIRPLUS implementation
 	// - FUSE_ASYNC_DIO
-	// - FUSE_POSIX_ACL: affects defaultPermissions, posixACL, xattr handler
+	// - FUSE_PARALLEL_DIROPS (7.25)
+	// - FUSE_HANDLE_KILLPRIV (7.26)
+	// - FUSE_POSIX_ACL: affects defaultPermissions, posixACL, xattr handler (7.26)
+	// - FUSE_ABORT_ERROR (7.27)
+	// - FUSE_CACHE_SYMLINKS (7.28)
+	// - FUSE_NO_OPENDIR_SUPPORT (7.29)
+	// - FUSE_EXPLICIT_INVAL_DATA: requires page caching eviction (7.30)
+	// - FUSE_MAP_ALIGNMENT (7.31)
 
 	// initialized after receiving FUSE_INIT reply.
 	// Until it's set, suspend sending FUSE requests.
@@ -181,19 +187,11 @@ type connection struct {
 	// Negotiated and only set in INIT.
 	asyncRead bool
 
-	// abortErr is true if kernel need to return an unique read error after abort.
-	// Negotiated and only set in INIT.
-	abortErr bool
-
 	// writebackCache is true for write-back cache policy,
 	// false for write-through policy.
 	// Negotiated and only set in INIT.
 	writebackCache bool
 
-	// cacheSymlinks if filesystem needs to cache READLINK responses in page cache.
-	// Negotiated and only set in INIT.
-	cacheSymlinks bool
-
 	// bigWrites if doing multi-page cached writes.
 	// Negotiated and only set in INIT.
 	bigWrites bool
diff --git a/pkg/sentry/fsimpl/fuse/init.go b/pkg/sentry/fsimpl/fuse/init.go
index 256b6fb652..a47309b6e7 100644
--- a/pkg/sentry/fsimpl/fuse/init.go
+++ b/pkg/sentry/fsimpl/fuse/init.go
@@ -132,8 +132,6 @@ func (conn *connection) initProcessReply(out *linux.FUSEInitOut, hasSysAdminCap
 		conn.bigWrites = out.Flags&linux.FUSE_BIG_WRITES != 0
 		conn.dontMask = out.Flags&linux.FUSE_DONT_MASK != 0
 		conn.writebackCache = out.Flags&linux.FUSE_WRITEBACK_CACHE != 0
-		conn.cacheSymlinks = out.Flags&linux.FUSE_CACHE_SYMLINKS != 0
-		conn.abortErr = out.Flags&linux.FUSE_ABORT_ERROR != 0
 
 		// TODO(gvisor.dev/issue/3195): figure out how to use TimeGran (0 < TimeGran <= fuseMaxTimeGranNs).
 
diff --git a/pkg/sentry/fsimpl/fuse/request_response.go b/pkg/sentry/fsimpl/fuse/request_response.go
index ae71b5e28b..a69b212218 100644
--- a/pkg/sentry/fsimpl/fuse/request_response.go
+++ b/pkg/sentry/fsimpl/fuse/request_response.go
@@ -65,11 +65,6 @@ func (r *fuseInitRes) UnmarshalBytes(src []byte) {
 		out.MaxPages = uint16(usermem.ByteOrder.Uint16(src[:2]))
 		src = src[2:]
 	}
-	// Introduced in FUSE kernel version 7.31.
-	if len(src) >= 2 {
-		out.MapAlignment = uint16(usermem.ByteOrder.Uint16(src[:2]))
-		src = src[2:]
-	}
 }
 
 // SizeBytes is the size of the payload of the FUSE_INIT response.

From 2541b190061c44fdd7fe825418c70944e70ff87a Mon Sep 17 00:00:00 2001
From: jinmouil <67118279+jinmouil@users.noreply.github.com>
Date: Wed, 2 Sep 2020 13:55:08 -0700
Subject: [PATCH 189/211] FUSE device: clean up readLocked

This change removes the unnecessary loop and avoids
the recursive call. It also fixes minor bugs in this
function.
---
 pkg/sentry/fsimpl/fuse/dev.go | 89 ++++++++++++++++++-----------------
 1 file changed, 46 insertions(+), 43 deletions(-)

diff --git a/pkg/sentry/fsimpl/fuse/dev.go b/pkg/sentry/fsimpl/fuse/dev.go
index fd3592e327..c53a380215 100644
--- a/pkg/sentry/fsimpl/fuse/dev.go
+++ b/pkg/sentry/fsimpl/fuse/dev.go
@@ -19,7 +19,6 @@ import (
 
 	"gvisor.dev/gvisor/pkg/abi/linux"
 	"gvisor.dev/gvisor/pkg/context"
-	"gvisor.dev/gvisor/pkg/log"
 	"gvisor.dev/gvisor/pkg/sentry/kernel"
 	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
 	"gvisor.dev/gvisor/pkg/sentry/vfs"
@@ -143,58 +142,62 @@ func (fd *DeviceFD) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.R
 }
 
 // readLocked implements the reading of the fuse device while locked with DeviceFD.mu.
+//
+// Preconditions: dst is large enough for any reasonable request.
 func (fd *DeviceFD) readLocked(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) {
-	if fd.queue.Empty() {
-		return 0, syserror.ErrWouldBlock
-	}
+	var req *Request
 
-	var readCursor uint32
-	var bytesRead int64
-	for {
-		req := fd.queue.Front()
-		if dst.NumBytes() < int64(req.hdr.Len) {
-			// The request is too large. Cannot process it. All requests must be smaller than the
-			// negotiated size as specified by Connection.MaxWrite set as part of the FUSE_INIT
-			// handshake.
-			errno := -int32(syscall.EIO)
-			if req.hdr.Opcode == linux.FUSE_SETXATTR {
-				errno = -int32(syscall.E2BIG)
-			}
-
-			// Return the error to the calling task.
-			if err := fd.sendError(ctx, errno, req); err != nil {
-				return 0, err
-			}
+	// Find the first valid request.
+	// For the normal case this loop only execute once.
+	for !fd.queue.Empty() {
+		req = fd.queue.Front()
 
-			// We're done with this request.
-			fd.queue.Remove(req)
-			if req.hdr.Opcode == linux.FUSE_RELEASE {
-				fd.numActiveRequests -= 1
-			}
+		if int64(req.hdr.Len) <= dst.NumBytes() {
+			break
+		}
 
-			// Restart the read as this request was invalid.
-			log.Warningf("fuse.DeviceFD.Read: request found was too large. Restarting read.")
-			return fd.readLocked(ctx, dst, opts)
+		// The request is too large. Cannot process it. All requests must be smaller than the
+		// negotiated size as specified by Connection.MaxWrite set as part of the FUSE_INIT
+		// handshake.
+		errno := -int32(syscall.EIO)
+		if req.hdr.Opcode == linux.FUSE_SETXATTR {
+			errno = -int32(syscall.E2BIG)
 		}
 
-		n, err := dst.CopyOut(ctx, req.data[readCursor:])
-		if err != nil {
+		// Return the error to the calling task.
+		if err := fd.sendError(ctx, errno, req); err != nil {
 			return 0, err
 		}
-		readCursor += uint32(n)
-		bytesRead += int64(n)
-
-		if readCursor >= req.hdr.Len {
-			// Fully done with this req, remove it from the queue.
-			fd.queue.Remove(req)
-			if req.hdr.Opcode == linux.FUSE_RELEASE {
-				fd.numActiveRequests -= 1
-			}
-			break
-		}
+
+		// We're done with this request.
+		fd.queue.Remove(req)
+		req = nil
+	}
+
+	if req == nil {
+		return 0, syserror.ErrWouldBlock
+	}
+
+	// We already checked the size: dst must be able to fit the whole request.
+	// Now we write the marshalled header, the payload,
+	// and the potential additional payload
+	// to the user memory IOSequence.
+
+	n, err := dst.CopyOut(ctx, req.data)
+	if err != nil {
+		return 0, err
+	}
+	if n != len(req.data) {
+		return 0, syserror.EIO
+	}
+
+	// Fully done with this req, remove it from the queue.
+	fd.queue.Remove(req)
+	if req.hdr.Opcode == linux.FUSE_RELEASE {
+		fd.numActiveRequests -= 1
 	}
 
-	return bytesRead, nil
+	return int64(n), nil
 }
 
 // PWrite implements vfs.FileDescriptionImpl.PWrite.

From deb8e24614036d61cf98a3eb0ca1e131834c05bd Mon Sep 17 00:00:00 2001
From: Craig Chi <craig08@users.noreply.github.com>
Date: Thu, 3 Sep 2020 14:06:59 -0700
Subject: [PATCH 190/211] Implement FUSE_CREATE

FUSE_CREATE is called when issuing creat(2) or open(2) with O_CREAT. It
creates a new file on the FUSE filesystem.

Fixes #3825
---
 pkg/abi/linux/fuse.go            |  42 ++++++++++
 pkg/sentry/fsimpl/fuse/fusefs.go |  18 +++++
 test/fuse/BUILD                  |   4 +
 test/fuse/linux/BUILD            |  17 +++-
 test/fuse/linux/create_test.cc   | 128 +++++++++++++++++++++++++++++++
 5 files changed, 208 insertions(+), 1 deletion(-)
 create mode 100644 test/fuse/linux/create_test.cc

diff --git a/pkg/abi/linux/fuse.go b/pkg/abi/linux/fuse.go
index ed40df564d..adc04dbccf 100644
--- a/pkg/abi/linux/fuse.go
+++ b/pkg/abi/linux/fuse.go
@@ -445,6 +445,48 @@ type FUSEReleaseIn struct {
 	LockOwner uint64
 }
 
+// FUSECreateMeta contains all the static fields of FUSECreateIn,
+// which is used for FUSE_CREATE.
+//
+// +marshal
+type FUSECreateMeta struct {
+	// Flags of the creating file.
+	Flags uint32
+
+	// Mode is the mode of the creating file.
+	Mode uint32
+
+	// Umask is the current file mode creation mask.
+	Umask uint32
+	_     uint32
+}
+
+// FUSECreateIn contains all the arguments sent by the kernel to the daemon, to
+// atomically create and open a new regular file.
+//
+// Dynamically-sized objects cannot be marshalled.
+type FUSECreateIn struct {
+	marshal.StubMarshallable
+
+	// CreateMeta contains mode, rdev and umash field for FUSE_MKNODS.
+	CreateMeta FUSECreateMeta
+
+	// Name is the name of the node to create.
+	Name string
+}
+
+// MarshalBytes serializes r.CreateMeta and r.Name to the dst buffer.
+func (r *FUSECreateIn) MarshalBytes(buf []byte) {
+	r.CreateMeta.MarshalBytes(buf[:r.CreateMeta.SizeBytes()])
+	copy(buf[r.CreateMeta.SizeBytes():], r.Name)
+}
+
+// SizeBytes is the size of the memory representation of FUSECreateIn.
+// 1 extra byte for null-terminated string.
+func (r *FUSECreateIn) SizeBytes() int {
+	return r.CreateMeta.SizeBytes() + len(r.Name) + 1
+}
+
 // FUSEMknodMeta contains all the static fields of FUSEMknodIn,
 // which is used for FUSE_MKNOD.
 //
diff --git a/pkg/sentry/fsimpl/fuse/fusefs.go b/pkg/sentry/fsimpl/fuse/fusefs.go
index e1bbb4b522..cfae9ed0d2 100644
--- a/pkg/sentry/fsimpl/fuse/fusefs.go
+++ b/pkg/sentry/fsimpl/fuse/fusefs.go
@@ -392,6 +392,24 @@ func (inode) Valid(ctx context.Context) bool {
 	return true
 }
 
+// NewFile implements kernfs.Inode.NewFile.
+func (i *inode) NewFile(ctx context.Context, name string, opts vfs.OpenOptions) (*vfs.Dentry, error) {
+	kernelTask := kernel.TaskFromContext(ctx)
+	if kernelTask == nil {
+		log.Warningf("fusefs.Inode.NewFile: couldn't get kernel task from context", i.NodeID)
+		return nil, syserror.EINVAL
+	}
+	in := linux.FUSECreateIn{
+		CreateMeta: linux.FUSECreateMeta{
+			Flags: opts.Flags,
+			Mode:  uint32(opts.Mode) | linux.S_IFREG,
+			Umask: uint32(kernelTask.FSContext().Umask()),
+		},
+		Name: name,
+	}
+	return i.newEntry(ctx, name, linux.S_IFREG, linux.FUSE_CREATE, &in)
+}
+
 // NewNode implements kernfs.Inode.NewNode.
 func (i *inode) NewNode(ctx context.Context, name string, opts vfs.MknodOptions) (*vfs.Dentry, error) {
 	in := linux.FUSEMknodIn{
diff --git a/test/fuse/BUILD b/test/fuse/BUILD
index 02498b3a1a..1a6b5b516b 100644
--- a/test/fuse/BUILD
+++ b/test/fuse/BUILD
@@ -52,6 +52,10 @@ syscall_test(
     test = "//test/fuse/linux:readdir_test",
 )
 
+syscall_test(
+    fuse = "True",
+    test = "//test/fuse/linux:create_test",
+)
 
 syscall_test(
     size = "large",
diff --git a/test/fuse/linux/BUILD b/test/fuse/linux/BUILD
index 23c9fba318..2bb956af9b 100644
--- a/test/fuse/linux/BUILD
+++ b/test/fuse/linux/BUILD
@@ -152,4 +152,19 @@ cc_binary(
         "//test/util:test_main",
         "//test/util:test_util",
     ],
-)
\ No newline at end of file
+)
+
+cc_binary(
+    name = "create_test",
+    testonly = 1,
+    srcs = ["create_test.cc"],
+    deps = [
+        gtest,
+        ":fuse_base",
+        "//test/util:fs_util",
+        "//test/util:fuse_util",
+        "//test/util:temp_umask",
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
diff --git a/test/fuse/linux/create_test.cc b/test/fuse/linux/create_test.cc
new file mode 100644
index 0000000000..9a0219a585
--- /dev/null
+++ b/test/fuse/linux/create_test.cc
@@ -0,0 +1,128 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/fuse.h>
+#include <sys/stat.h>
+#include <sys/statfs.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <string>
+
+#include "gtest/gtest.h"
+#include "test/fuse/linux/fuse_base.h"
+#include "test/util/fs_util.h"
+#include "test/util/fuse_util.h"
+#include "test/util/temp_umask.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+class CreateTest : public FuseTest {
+ protected:
+  const std::string test_file_name_ = "test_file";
+  const mode_t mode = S_IFREG | S_IRWXU | S_IRWXG | S_IRWXO;
+};
+
+TEST_F(CreateTest, CreateFile) {
+  const std::string test_file_path =
+      JoinPath(mount_point_.path().c_str(), test_file_name_);
+
+  // Ensure the file doesn't exist.
+  struct fuse_out_header out_header = {
+      .len = sizeof(struct fuse_out_header),
+      .error = -ENOENT,
+  };
+  auto iov_out = FuseGenerateIovecs(out_header);
+  SetServerResponse(FUSE_LOOKUP, iov_out);
+
+  // creat(2) is equal to open(2) with open_flags O_CREAT | O_WRONLY | O_TRUNC.
+  const mode_t new_mask = S_IWGRP | S_IWOTH;
+  const int open_flags = O_CREAT | O_WRONLY | O_TRUNC;
+  out_header.error = 0;
+  out_header.len = sizeof(struct fuse_out_header) +
+                   sizeof(struct fuse_entry_out) + sizeof(struct fuse_open_out);
+  struct fuse_entry_out entry_payload = DefaultEntryOut(mode & ~new_mask, 2);
+  struct fuse_open_out out_payload = {
+      .fh = 1,
+      .open_flags = open_flags,
+  };
+  iov_out = FuseGenerateIovecs(out_header, entry_payload, out_payload);
+  SetServerResponse(FUSE_CREATE, iov_out);
+
+  // kernfs generates a successive FUSE_OPEN after the file is created. Linux's
+  // fuse kernel module will not send this FUSE_OPEN after creat(2).
+  out_header.len =
+      sizeof(struct fuse_out_header) + sizeof(struct fuse_open_out);
+  iov_out = FuseGenerateIovecs(out_header, out_payload);
+  SetServerResponse(FUSE_OPEN, iov_out);
+
+  int fd;
+  TempUmask mask(new_mask);
+  EXPECT_THAT(fd = creat(test_file_path.c_str(), mode), SyscallSucceeds());
+  EXPECT_THAT(fcntl(fd, F_GETFL),
+              SyscallSucceedsWithValue(open_flags & O_ACCMODE));
+
+  struct fuse_in_header in_header;
+  struct fuse_create_in in_payload;
+  std::vector<char> name(test_file_name_.size() + 1);
+  auto iov_in = FuseGenerateIovecs(in_header, in_payload, name);
+
+  // Skip the request of FUSE_LOOKUP.
+  SkipServerActualRequest();
+
+  // Get the first FUSE_CREATE.
+  GetServerActualRequest(iov_in);
+  EXPECT_EQ(in_header.len, sizeof(in_header) + sizeof(in_payload) +
+                               test_file_name_.size() + 1);
+  EXPECT_EQ(in_header.opcode, FUSE_CREATE);
+  EXPECT_EQ(in_payload.flags, open_flags);
+  EXPECT_EQ(in_payload.mode, mode & ~new_mask);
+  EXPECT_EQ(in_payload.umask, new_mask);
+  EXPECT_EQ(std::string(name.data()), test_file_name_);
+
+  // Get the successive FUSE_OPEN.
+  struct fuse_open_in in_payload_open;
+  iov_in = FuseGenerateIovecs(in_header, in_payload_open);
+  GetServerActualRequest(iov_in);
+  EXPECT_EQ(in_header.len, sizeof(in_header) + sizeof(in_payload_open));
+  EXPECT_EQ(in_header.opcode, FUSE_OPEN);
+  EXPECT_EQ(in_payload_open.flags, open_flags & O_ACCMODE);
+
+  EXPECT_THAT(close(fd), SyscallSucceeds());
+  // Skip the FUSE_RELEASE.
+  SkipServerActualRequest();
+}
+
+TEST_F(CreateTest, CreateFileAlreadyExists) {
+  const std::string test_file_path =
+      JoinPath(mount_point_.path().c_str(), test_file_name_);
+
+  const int open_flags = O_CREAT | O_EXCL;
+
+  SetServerInodeLookup(test_file_name_);
+
+  EXPECT_THAT(open(test_file_path.c_str(), mode, open_flags),
+              SyscallFailsWithErrno(EEXIST));
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor

From 77e3d54bae3197856535ea71ae4841e3360a1a28 Mon Sep 17 00:00:00 2001
From: Jinmou Li <jinmli@google.com>
Date: Tue, 1 Sep 2020 01:49:57 +0000
Subject: [PATCH 191/211] Implement FUSE_WRITE

This commit adds basic write(2) support for FUSE.
---
 pkg/abi/linux/fuse.go                  |  67 +++---
 pkg/sentry/fsimpl/fuse/connection.go   |   4 +
 pkg/sentry/fsimpl/fuse/dev.go          |  13 +-
 pkg/sentry/fsimpl/fuse/fusefs.go       |  10 +-
 pkg/sentry/fsimpl/fuse/read_write.go   |  90 ++++++++
 pkg/sentry/fsimpl/fuse/regular_file.go | 105 +++++++++
 test/fuse/BUILD                        |   5 +
 test/fuse/linux/BUILD                  |  13 ++
 test/fuse/linux/fuse_base.cc           |  13 +-
 test/fuse/linux/fuse_base.h            |   7 +-
 test/fuse/linux/write_test.cc          | 303 +++++++++++++++++++++++++
 11 files changed, 592 insertions(+), 38 deletions(-)
 create mode 100644 test/fuse/linux/write_test.cc

diff --git a/pkg/abi/linux/fuse.go b/pkg/abi/linux/fuse.go
index adc04dbccf..e49a92fb2f 100644
--- a/pkg/abi/linux/fuse.go
+++ b/pkg/abi/linux/fuse.go
@@ -124,32 +124,6 @@ type FUSEHeaderOut struct {
 	Unique FUSEOpID
 }
 
-// FUSEWriteIn is the header written by a daemon when it makes a
-// write request to the FUSE filesystem.
-//
-// +marshal
-type FUSEWriteIn struct {
-	// Fh specifies the file handle that is being written to.
-	Fh uint64
-
-	// Offset is the offset of the write.
-	Offset uint64
-
-	// Size is the size of data being written.
-	Size uint32
-
-	// WriteFlags is the flags used during the write.
-	WriteFlags uint32
-
-	// LockOwner is the ID of the lock owner.
-	LockOwner uint64
-
-	// Flags is the flags for the request.
-	Flags uint32
-
-	_ uint32
-}
-
 // FUSE_INIT flags, consistent with the ones in include/uapi/linux/fuse.h.
 // Our taget version is 7.23 but we have few implemented in advance.
 const (
@@ -427,6 +401,47 @@ type FUSEReadIn struct {
 	_ uint32
 }
 
+// FUSEWriteIn is the first part of the payload of the
+// request sent by the kernel to the daemon
+// for FUSE_WRITE (struct for FUSE version >= 7.9).
+//
+// The second part of the payload is the
+// binary bytes of the data to be written.
+//
+// +marshal
+type FUSEWriteIn struct {
+	// Fh is the file handle in userspace.
+	Fh uint64
+
+	// Offset is the write offset.
+	Offset uint64
+
+	// Size is the number of bytes to write.
+	Size uint32
+
+	// ReadFlags for this FUSE_WRITE request.
+	WriteFlags uint32
+
+	// LockOwner is the id of the lock owner if there is one.
+	LockOwner uint64
+
+	// Flags for the underlying file.
+	Flags uint32
+
+	_ uint32
+}
+
+// FUSEWriteOut is the payload of the reply sent by the daemon to the kernel
+// for a FUSE_WRITE request.
+//
+// +marshal
+type FUSEWriteOut struct {
+	// Size is the number of bytes written.
+	Size uint32
+
+	_ uint32
+}
+
 // FUSEReleaseIn is the request sent by the kernel to the daemon
 // when there is no more reference to a file.
 //
diff --git a/pkg/sentry/fsimpl/fuse/connection.go b/pkg/sentry/fsimpl/fuse/connection.go
index c6e064f700..8dd86afade 100644
--- a/pkg/sentry/fsimpl/fuse/connection.go
+++ b/pkg/sentry/fsimpl/fuse/connection.go
@@ -63,6 +63,10 @@ type Request struct {
 	id   linux.FUSEOpID
 	hdr  *linux.FUSEHeaderIn
 	data []byte
+
+	// payload for this request: extra bytes to write after
+	// the data slice. Used by FUSE_WRITE.
+	payload []byte
 }
 
 // Response represents an actual response from the server, including the
diff --git a/pkg/sentry/fsimpl/fuse/dev.go b/pkg/sentry/fsimpl/fuse/dev.go
index c53a380215..6022593d6d 100644
--- a/pkg/sentry/fsimpl/fuse/dev.go
+++ b/pkg/sentry/fsimpl/fuse/dev.go
@@ -152,7 +152,7 @@ func (fd *DeviceFD) readLocked(ctx context.Context, dst usermem.IOSequence, opts
 	for !fd.queue.Empty() {
 		req = fd.queue.Front()
 
-		if int64(req.hdr.Len) <= dst.NumBytes() {
+		if int64(req.hdr.Len)+int64(len(req.payload)) <= dst.NumBytes() {
 			break
 		}
 
@@ -191,6 +191,17 @@ func (fd *DeviceFD) readLocked(ctx context.Context, dst usermem.IOSequence, opts
 		return 0, syserror.EIO
 	}
 
+	if req.hdr.Opcode == linux.FUSE_WRITE {
+		written, err := dst.DropFirst(n).CopyOut(ctx, req.payload)
+		if err != nil {
+			return 0, err
+		}
+		if written != len(req.payload) {
+			return 0, syserror.EIO
+		}
+		n += int(written)
+	}
+
 	// Fully done with this req, remove it from the queue.
 	fd.queue.Remove(req)
 	if req.hdr.Opcode == linux.FUSE_RELEASE {
diff --git a/pkg/sentry/fsimpl/fuse/fusefs.go b/pkg/sentry/fsimpl/fuse/fusefs.go
index cfae9ed0d2..8cf13dcb6b 100644
--- a/pkg/sentry/fsimpl/fuse/fusefs.go
+++ b/pkg/sentry/fsimpl/fuse/fusefs.go
@@ -18,6 +18,7 @@ package fuse
 import (
 	"math"
 	"strconv"
+	"sync"
 	"sync/atomic"
 
 	"gvisor.dev/gvisor/pkg/abi/linux"
@@ -228,13 +229,18 @@ type inode struct {
 	kernfs.InodeNotSymlink
 	kernfs.OrderedChildren
 
-	NodeID uint64
 	dentry kernfs.Dentry
-	locks  vfs.FileLocks
 
 	// the owning filesystem. fs is immutable.
 	fs *filesystem
 
+	// metaDataMu protects the metadata of this inode.
+	metadataMu sync.Mutex
+
+	NodeID uint64
+
+	locks vfs.FileLocks
+
 	// size of the file.
 	size uint64
 
diff --git a/pkg/sentry/fsimpl/fuse/read_write.go b/pkg/sentry/fsimpl/fuse/read_write.go
index 4ef8531dc6..22a018e5e7 100644
--- a/pkg/sentry/fsimpl/fuse/read_write.go
+++ b/pkg/sentry/fsimpl/fuse/read_write.go
@@ -150,3 +150,93 @@ func (fs *filesystem) ReadCallback(ctx context.Context, fd *regularFileFD, off u
 		fs.conn.mu.Unlock()
 	}
 }
+
+// Write sends FUSE_WRITE requests and return the bytes
+// written according to the response.
+//
+// Preconditions: len(data) == size.
+func (fs *filesystem) Write(ctx context.Context, fd *regularFileFD, off uint64, size uint32, data []byte) (uint32, error) {
+	t := kernel.TaskFromContext(ctx)
+	if t == nil {
+		log.Warningf("fusefs.Read: couldn't get kernel task from context")
+		return 0, syserror.EINVAL
+	}
+
+	// One request cannnot exceed either maxWrite or maxPages.
+	maxWrite := uint32(fs.conn.maxPages) << usermem.PageShift
+	if maxWrite > fs.conn.maxWrite {
+		maxWrite = fs.conn.maxWrite
+	}
+
+	// Reuse the same struct for unmarshalling to avoid unnecessary memory allocation.
+	in := linux.FUSEWriteIn{
+		Fh: fd.Fh,
+		// TODO(gvisor.dev/issue/3245): file lock
+		LockOwner: 0,
+		// TODO(gvisor.dev/issue/3245): |= linux.FUSE_READ_LOCKOWNER
+		// TODO(gvisor.dev/issue/3237): |= linux.FUSE_WRITE_CACHE (not added yet)
+		WriteFlags: 0,
+		Flags:      fd.statusFlags(),
+	}
+
+	var written uint32
+
+	// This loop is intended for fragmented write where the bytes to write is
+	// larger than either the maxWrite or maxPages or when bigWrites is false.
+	// Unless a small value for max_write is explicitly used, this loop
+	// is expected to execute only once for the majority of the writes.
+	for written < size {
+		toWrite := size - written
+
+		// Limit the write size to one page.
+		// Note that the bigWrites flag is obsolete,
+		// latest libfuse always sets it on.
+		if !fs.conn.bigWrites && toWrite > usermem.PageSize {
+			toWrite = usermem.PageSize
+		}
+
+		// Limit the write size to maxWrite.
+		if toWrite > maxWrite {
+			toWrite = maxWrite
+		}
+
+		in.Offset = off + uint64(written)
+		in.Size = toWrite
+
+		req, err := fs.conn.NewRequest(auth.CredentialsFromContext(ctx), uint32(t.ThreadID()), fd.inode().NodeID, linux.FUSE_WRITE, &in)
+		if err != nil {
+			return 0, err
+		}
+
+		req.payload = data[written : written+toWrite]
+
+		// TODO(gvisor.dev/issue/3247): support async write.
+
+		res, err := fs.conn.Call(t, req)
+		if err != nil {
+			return 0, err
+		}
+		if err := res.Error(); err != nil {
+			return 0, err
+		}
+
+		out := linux.FUSEWriteOut{}
+		if err := res.UnmarshalPayload(&out); err != nil {
+			return 0, err
+		}
+
+		// Write more than requested? EIO.
+		if out.Size > toWrite {
+			return 0, syserror.EIO
+		}
+
+		written += out.Size
+
+		// Break if short write. Not necessarily an error.
+		if out.Size != toWrite {
+			break
+		}
+	}
+
+	return written, nil
+}
diff --git a/pkg/sentry/fsimpl/fuse/regular_file.go b/pkg/sentry/fsimpl/fuse/regular_file.go
index 37ce4e2687..193e773921 100644
--- a/pkg/sentry/fsimpl/fuse/regular_file.go
+++ b/pkg/sentry/fsimpl/fuse/regular_file.go
@@ -123,3 +123,108 @@ func (fd *regularFileFD) Read(ctx context.Context, dst usermem.IOSequence, opts
 	fd.offMu.Unlock()
 	return n, err
 }
+
+// PWrite implements vfs.FileDescriptionImpl.PWrite.
+func (fd *regularFileFD) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) {
+	n, _, err := fd.pwrite(ctx, src, offset, opts)
+	return n, err
+}
+
+// Write implements vfs.FileDescriptionImpl.Write.
+func (fd *regularFileFD) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) {
+	fd.offMu.Lock()
+	n, off, err := fd.pwrite(ctx, src, fd.off, opts)
+	fd.off = off
+	fd.offMu.Unlock()
+	return n, err
+}
+
+// pwrite returns the number of bytes written, final offset and error. The
+// final offset should be ignored by PWrite.
+func (fd *regularFileFD) pwrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (written, finalOff int64, err error) {
+	if offset < 0 {
+		return 0, offset, syserror.EINVAL
+	}
+
+	// Check that flags are supported.
+	//
+	// TODO(gvisor.dev/issue/2601): Support select preadv2 flags.
+	if opts.Flags&^linux.RWF_HIPRI != 0 {
+		return 0, offset, syserror.EOPNOTSUPP
+	}
+
+	inode := fd.inode()
+	inode.metadataMu.Lock()
+	defer inode.metadataMu.Unlock()
+
+	// If the file is opened with O_APPEND, update offset to file size.
+	// Note: since our Open() implements the interface of kernfs,
+	// and kernfs currently does not support O_APPEND, this will never
+	// be true before we switch out from kernfs.
+	if fd.vfsfd.StatusFlags()&linux.O_APPEND != 0 {
+		// Locking inode.metadataMu is sufficient for reading size
+		offset = int64(inode.size)
+	}
+
+	srclen := src.NumBytes()
+
+	if srclen > math.MaxUint32 {
+		// FUSE only supports uint32 for size.
+		// Overflow.
+		return 0, offset, syserror.EINVAL
+	}
+	if end := offset + srclen; end < offset {
+		// Overflow.
+		return 0, offset, syserror.EINVAL
+	}
+
+	srclen, err = vfs.CheckLimit(ctx, offset, srclen)
+	if err != nil {
+		return 0, offset, err
+	}
+
+	if srclen == 0 {
+		// Return before causing any side effects.
+		return 0, offset, nil
+	}
+
+	src = src.TakeFirst64(srclen)
+
+	// TODO(gvisor.dev/issue/3237): Add cache support:
+	// buffer cache. Ideally we write from src to our buffer cache first.
+	// The slice passed to fs.Write() should be a slice from buffer cache.
+	data := make([]byte, srclen)
+	// Reason for making a copy here: connection.Call() blocks on kerneltask,
+	// which in turn acquires mm.activeMu lock. Functions like CopyInTo() will
+	// attemp to acquire the mm.activeMu lock as well -> deadlock.
+	// We must finish reading from the userspace memory before
+	// t.Block() deactivates it.
+	cp, err := src.CopyIn(ctx, data)
+	if err != nil {
+		return 0, offset, err
+	}
+	if int64(cp) != srclen {
+		return 0, offset, syserror.EIO
+	}
+
+	n, err := fd.inode().fs.Write(ctx, fd, uint64(offset), uint32(srclen), data)
+	if err != nil {
+		return 0, offset, err
+	}
+
+	if n == 0 {
+		// We have checked srclen != 0 previously.
+		// If err == nil, then it's a short write and we return EIO.
+		return 0, offset, syserror.EIO
+	}
+
+	written = int64(n)
+	finalOff = offset + written
+
+	if finalOff > int64(inode.size) {
+		atomic.StoreUint64(&inode.size, uint64(finalOff))
+		atomic.AddUint64(&inode.fs.conn.attributeVersion, 1)
+	}
+
+	return
+}
diff --git a/test/fuse/BUILD b/test/fuse/BUILD
index 1a6b5b516b..55ad987482 100644
--- a/test/fuse/BUILD
+++ b/test/fuse/BUILD
@@ -42,6 +42,11 @@ syscall_test(
     test = "//test/fuse/linux:read_test",
 )
 
+syscall_test(
+    fuse = "True",
+    test = "//test/fuse/linux:write_test",
+)
+
 syscall_test(
     fuse = "True",
     test = "//test/fuse/linux:rmdir_test",
diff --git a/test/fuse/linux/BUILD b/test/fuse/linux/BUILD
index 2bb956af9b..cc5bd560e0 100644
--- a/test/fuse/linux/BUILD
+++ b/test/fuse/linux/BUILD
@@ -154,6 +154,19 @@ cc_binary(
     ],
 )
 
+cc_binary(
+    name = "write_test",
+    testonly = 1,
+    srcs = ["write_test.cc"],
+    deps = [
+        gtest,
+        ":fuse_base",
+        "//test/util:fuse_util",
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
 cc_binary(
     name = "create_test",
     testonly = 1,
diff --git a/test/fuse/linux/fuse_base.cc b/test/fuse/linux/fuse_base.cc
index e3c6b585cc..a033db1174 100644
--- a/test/fuse/linux/fuse_base.cc
+++ b/test/fuse/linux/fuse_base.cc
@@ -164,7 +164,8 @@ void FuseTest::UnmountFuse() {
 }
 
 // Consumes the first FUSE request and returns the corresponding PosixError.
-PosixError FuseTest::ServerConsumeFuseInit() {
+PosixError FuseTest::ServerConsumeFuseInit(
+    const struct fuse_init_out* out_payload) {
   std::vector<char> buf(FUSE_MIN_READ_BUFFER);
   RETURN_ERROR_IF_SYSCALL_FAIL(
       RetryEINTR(read)(dev_fd_, buf.data(), buf.size()));
@@ -176,10 +177,8 @@ PosixError FuseTest::ServerConsumeFuseInit() {
   };
   // Returns a fake fuse_init_out with 7.0 version to avoid ECONNREFUSED
   // error in the initialization of FUSE connection.
-  struct fuse_init_out out_payload = {
-      .major = 7,
-  };
-  auto iov_out = FuseGenerateIovecs(out_header, out_payload);
+  auto iov_out = FuseGenerateIovecs(
+      out_header, *const_cast<struct fuse_init_out*>(out_payload));
 
   RETURN_ERROR_IF_SYSCALL_FAIL(
       RetryEINTR(writev)(dev_fd_, iov_out.data(), iov_out.size()));
@@ -244,7 +243,7 @@ void FuseTest::ServerFuseLoop() {
 // becomes testing thread and the child thread becomes the FUSE server running
 // in background. These 2 threads are connected via socketpair. sock_[0] is
 // opened in testing thread and sock_[1] is opened in the FUSE server.
-void FuseTest::SetUpFuseServer() {
+void FuseTest::SetUpFuseServer(const struct fuse_init_out* payload) {
   ASSERT_THAT(socketpair(AF_UNIX, SOCK_STREAM, 0, sock_), SyscallSucceeds());
 
   switch (fork()) {
@@ -261,7 +260,7 @@ void FuseTest::SetUpFuseServer() {
 
   // Begin child thread, i.e. the FUSE server.
   ASSERT_THAT(close(sock_[0]), SyscallSucceeds());
-  ServerCompleteWith(ServerConsumeFuseInit().ok());
+  ServerCompleteWith(ServerConsumeFuseInit(payload).ok());
   ServerFuseLoop();
   _exit(0);
 }
diff --git a/test/fuse/linux/fuse_base.h b/test/fuse/linux/fuse_base.h
index 452748d6de..6ad296ca21 100644
--- a/test/fuse/linux/fuse_base.h
+++ b/test/fuse/linux/fuse_base.h
@@ -33,6 +33,8 @@ namespace testing {
 
 constexpr char kMountOpts[] = "rootmode=755,user_id=0,group_id=0";
 
+constexpr struct fuse_init_out kDefaultFUSEInitOutPayload = {.major = 7};
+
 // Internal commands used to communicate between testing thread and the FUSE
 // server. See test/fuse/README.md for further detail.
 enum class FuseTestCmd {
@@ -171,7 +173,8 @@ class FuseTest : public ::testing::Test {
   void MountFuse(const char* mountOpts = kMountOpts);
 
   // Creates a socketpair for communication and forks FUSE server.
-  void SetUpFuseServer();
+  void SetUpFuseServer(
+      const struct fuse_init_out* payload = &kDefaultFUSEInitOutPayload);
 
   // Unmounts the mountpoint of the FUSE server.
   void UnmountFuse();
@@ -194,7 +197,7 @@ class FuseTest : public ::testing::Test {
 
   // Consumes the first FUSE request when mounting FUSE. Replies with a
   // response with empty payload.
-  PosixError ServerConsumeFuseInit();
+  PosixError ServerConsumeFuseInit(const struct fuse_init_out* payload);
 
   // A command switch that dispatch different FuseTestCmd to its handler.
   void ServerHandleCommand();
diff --git a/test/fuse/linux/write_test.cc b/test/fuse/linux/write_test.cc
new file mode 100644
index 0000000000..e7a1aff134
--- /dev/null
+++ b/test/fuse/linux/write_test.cc
@@ -0,0 +1,303 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/fuse.h>
+#include <sys/stat.h>
+#include <sys/statfs.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <string>
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "test/fuse/linux/fuse_base.h"
+#include "test/util/fuse_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+class WriteTest : public FuseTest {
+  void SetUp() override {
+    FuseTest::SetUp();
+    test_file_path_ = JoinPath(mount_point_.path().c_str(), test_file_);
+  }
+
+  // TearDown overrides the parent's function
+  // to skip checking the unconsumed release request at the end.
+  void TearDown() override { UnmountFuse(); }
+
+ protected:
+  const std::string test_file_ = "test_file";
+  const mode_t test_file_mode_ = S_IFREG | S_IRWXU | S_IRWXG | S_IRWXO;
+  const uint64_t test_fh_ = 1;
+  const uint32_t open_flag_ = O_RDWR;
+
+  std::string test_file_path_;
+
+  PosixErrorOr<FileDescriptor> OpenTestFile(const std::string &path,
+                                            uint64_t size = 512) {
+    SetServerInodeLookup(test_file_, test_file_mode_, size);
+
+    struct fuse_out_header out_header_open = {
+        .len = sizeof(struct fuse_out_header) + sizeof(struct fuse_open_out),
+    };
+    struct fuse_open_out out_payload_open = {
+        .fh = test_fh_,
+        .open_flags = open_flag_,
+    };
+    auto iov_out_open = FuseGenerateIovecs(out_header_open, out_payload_open);
+    SetServerResponse(FUSE_OPEN, iov_out_open);
+
+    auto res = Open(path.c_str(), open_flag_);
+    if (res.ok()) {
+      SkipServerActualRequest();
+    }
+    return res;
+  }
+};
+
+class WriteTestSmallMaxWrite : public WriteTest {
+  void SetUp() override {
+    MountFuse();
+    SetUpFuseServer(&fuse_init_payload);
+    test_file_path_ = JoinPath(mount_point_.path().c_str(), test_file_);
+  }
+
+ protected:
+  const static uint32_t max_write_ = 4096;
+  constexpr static struct fuse_init_out fuse_init_payload = {
+      .major = 7,
+      .max_write = max_write_,
+  };
+
+  const uint32_t size_fragment = max_write_;
+};
+
+TEST_F(WriteTest, WriteNormal) {
+  auto fd = ASSERT_NO_ERRNO_AND_VALUE(OpenTestFile(test_file_path_));
+
+  // Prepare for the write.
+  const int n_write = 10;
+  struct fuse_out_header out_header_write = {
+      .len = sizeof(struct fuse_out_header) + sizeof(struct fuse_write_out),
+  };
+  struct fuse_write_out out_payload_write = {
+      .size = n_write,
+  };
+  auto iov_out_write = FuseGenerateIovecs(out_header_write, out_payload_write);
+  SetServerResponse(FUSE_WRITE, iov_out_write);
+
+  // Issue the write.
+  std::vector<char> buf(n_write);
+  RandomizeBuffer(buf.data(), buf.size());
+  EXPECT_THAT(write(fd.get(), buf.data(), n_write),
+              SyscallSucceedsWithValue(n_write));
+
+  // Check the write request.
+  struct fuse_in_header in_header_write;
+  struct fuse_write_in in_payload_write;
+  std::vector<char> payload_buf(n_write);
+  auto iov_in_write =
+      FuseGenerateIovecs(in_header_write, in_payload_write, payload_buf);
+  GetServerActualRequest(iov_in_write);
+
+  EXPECT_EQ(in_payload_write.fh, test_fh_);
+  EXPECT_EQ(in_header_write.len,
+            sizeof(in_header_write) + sizeof(in_payload_write));
+  EXPECT_EQ(in_header_write.opcode, FUSE_WRITE);
+  EXPECT_EQ(in_payload_write.offset, 0);
+  EXPECT_EQ(in_payload_write.size, n_write);
+  EXPECT_EQ(buf, payload_buf);
+}
+
+TEST_F(WriteTest, WriteShort) {
+  auto fd = ASSERT_NO_ERRNO_AND_VALUE(OpenTestFile(test_file_path_));
+
+  // Prepare for the write.
+  const int n_write = 10, n_written = 5;
+  struct fuse_out_header out_header_write = {
+      .len = sizeof(struct fuse_out_header) + sizeof(struct fuse_write_out),
+  };
+  struct fuse_write_out out_payload_write = {
+      .size = n_written,
+  };
+  auto iov_out_write = FuseGenerateIovecs(out_header_write, out_payload_write);
+  SetServerResponse(FUSE_WRITE, iov_out_write);
+
+  // Issue the write.
+  std::vector<char> buf(n_write);
+  RandomizeBuffer(buf.data(), buf.size());
+  EXPECT_THAT(write(fd.get(), buf.data(), n_write),
+              SyscallSucceedsWithValue(n_written));
+
+  // Check the write request.
+  struct fuse_in_header in_header_write;
+  struct fuse_write_in in_payload_write;
+  std::vector<char> payload_buf(n_write);
+  auto iov_in_write =
+      FuseGenerateIovecs(in_header_write, in_payload_write, payload_buf);
+  GetServerActualRequest(iov_in_write);
+
+  EXPECT_EQ(in_payload_write.fh, test_fh_);
+  EXPECT_EQ(in_header_write.len,
+            sizeof(in_header_write) + sizeof(in_payload_write));
+  EXPECT_EQ(in_header_write.opcode, FUSE_WRITE);
+  EXPECT_EQ(in_payload_write.offset, 0);
+  EXPECT_EQ(in_payload_write.size, n_write);
+  EXPECT_EQ(buf, payload_buf);
+}
+
+TEST_F(WriteTest, WriteShortZero) {
+  auto fd = ASSERT_NO_ERRNO_AND_VALUE(OpenTestFile(test_file_path_));
+
+  // Prepare for the write.
+  const int n_write = 10;
+  struct fuse_out_header out_header_write = {
+      .len = sizeof(struct fuse_out_header) + sizeof(struct fuse_write_out),
+  };
+  struct fuse_write_out out_payload_write = {
+      .size = 0,
+  };
+  auto iov_out_write = FuseGenerateIovecs(out_header_write, out_payload_write);
+  SetServerResponse(FUSE_WRITE, iov_out_write);
+
+  // Issue the write.
+  std::vector<char> buf(n_write);
+  RandomizeBuffer(buf.data(), buf.size());
+  EXPECT_THAT(write(fd.get(), buf.data(), n_write), SyscallFailsWithErrno(EIO));
+
+  // Check the write request.
+  struct fuse_in_header in_header_write;
+  struct fuse_write_in in_payload_write;
+  std::vector<char> payload_buf(n_write);
+  auto iov_in_write =
+      FuseGenerateIovecs(in_header_write, in_payload_write, payload_buf);
+  GetServerActualRequest(iov_in_write);
+
+  EXPECT_EQ(in_payload_write.fh, test_fh_);
+  EXPECT_EQ(in_header_write.len,
+            sizeof(in_header_write) + sizeof(in_payload_write));
+  EXPECT_EQ(in_header_write.opcode, FUSE_WRITE);
+  EXPECT_EQ(in_payload_write.offset, 0);
+  EXPECT_EQ(in_payload_write.size, n_write);
+  EXPECT_EQ(buf, payload_buf);
+}
+
+TEST_F(WriteTest, WriteZero) {
+  auto fd = ASSERT_NO_ERRNO_AND_VALUE(OpenTestFile(test_file_path_));
+
+  // Issue the write.
+  std::vector<char> buf(0);
+  EXPECT_THAT(write(fd.get(), buf.data(), 0), SyscallSucceedsWithValue(0));
+}
+
+TEST_F(WriteTest, PWrite) {
+  const int file_size = 512;
+  auto fd = ASSERT_NO_ERRNO_AND_VALUE(OpenTestFile(test_file_path_, file_size));
+
+  // Prepare for the write.
+  const int n_write = 10;
+  struct fuse_out_header out_header_write = {
+      .len = sizeof(struct fuse_out_header) + sizeof(struct fuse_write_out),
+  };
+  struct fuse_write_out out_payload_write = {
+      .size = n_write,
+  };
+  auto iov_out_write = FuseGenerateIovecs(out_header_write, out_payload_write);
+  SetServerResponse(FUSE_WRITE, iov_out_write);
+
+  // Issue the write.
+  std::vector<char> buf(n_write);
+  RandomizeBuffer(buf.data(), buf.size());
+  const int offset_write = file_size >> 1;
+  EXPECT_THAT(pwrite(fd.get(), buf.data(), n_write, offset_write),
+              SyscallSucceedsWithValue(n_write));
+
+  // Check the write request.
+  struct fuse_in_header in_header_write;
+  struct fuse_write_in in_payload_write;
+  std::vector<char> payload_buf(n_write);
+  auto iov_in_write =
+      FuseGenerateIovecs(in_header_write, in_payload_write, payload_buf);
+  GetServerActualRequest(iov_in_write);
+
+  EXPECT_EQ(in_payload_write.fh, test_fh_);
+  EXPECT_EQ(in_header_write.len,
+            sizeof(in_header_write) + sizeof(in_payload_write));
+  EXPECT_EQ(in_header_write.opcode, FUSE_WRITE);
+  EXPECT_EQ(in_payload_write.offset, offset_write);
+  EXPECT_EQ(in_payload_write.size, n_write);
+  EXPECT_EQ(buf, payload_buf);
+}
+
+TEST_F(WriteTestSmallMaxWrite, WriteSmallMaxWrie) {
+  const int n_fragment = 10;
+  const int n_write = size_fragment * n_fragment;
+
+  auto fd = ASSERT_NO_ERRNO_AND_VALUE(OpenTestFile(test_file_path_, n_write));
+
+  // Prepare for the write.
+  struct fuse_out_header out_header_write = {
+      .len = sizeof(struct fuse_out_header) + sizeof(struct fuse_write_out),
+  };
+  struct fuse_write_out out_payload_write = {
+      .size = size_fragment,
+  };
+  auto iov_out_write = FuseGenerateIovecs(out_header_write, out_payload_write);
+
+  for (int i = 0; i < n_fragment; ++i) {
+    SetServerResponse(FUSE_WRITE, iov_out_write);
+  }
+
+  // Issue the write.
+  std::vector<char> buf(n_write);
+  RandomizeBuffer(buf.data(), buf.size());
+  EXPECT_THAT(write(fd.get(), buf.data(), n_write),
+              SyscallSucceedsWithValue(n_write));
+
+  ASSERT_EQ(GetServerNumUnsentResponses(), 0);
+  ASSERT_EQ(GetServerNumUnconsumedRequests(), n_fragment);
+
+  // Check the write request.
+  struct fuse_in_header in_header_write;
+  struct fuse_write_in in_payload_write;
+  std::vector<char> payload_buf(size_fragment);
+  auto iov_in_write =
+      FuseGenerateIovecs(in_header_write, in_payload_write, payload_buf);
+
+  for (int i = 0; i < n_fragment; ++i) {
+    GetServerActualRequest(iov_in_write);
+
+    EXPECT_EQ(in_payload_write.fh, test_fh_);
+    EXPECT_EQ(in_header_write.len,
+              sizeof(in_header_write) + sizeof(in_payload_write));
+    EXPECT_EQ(in_header_write.opcode, FUSE_WRITE);
+    EXPECT_EQ(in_payload_write.offset, i * size_fragment);
+    EXPECT_EQ(in_payload_write.size, size_fragment);
+
+    auto it = buf.begin() + i * size_fragment;
+    EXPECT_EQ(std::vector<char>(it, it + size_fragment), payload_buf);
+  }
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
\ No newline at end of file

From 0ac6f5546880ba451af3d1746c28318d8763152c Mon Sep 17 00:00:00 2001
From: Craig Chi <craigchi@google.com>
Date: Fri, 28 Aug 2020 11:25:19 -0700
Subject: [PATCH 192/211] Add default attr in fuse_util

fuse_util provides utilities for fuse testing. Add a function to return
a stub fuse_attr struct with specified mode and nodeid.
---
 test/fuse/linux/stat_test.cc | 30 +++++----------------
 test/util/fuse_util.cc       | 52 +++++++++++++++++++-----------------
 test/util/fuse_util.h        |  3 +++
 3 files changed, 38 insertions(+), 47 deletions(-)

diff --git a/test/fuse/linux/stat_test.cc b/test/fuse/linux/stat_test.cc
index 9ab53f8d20..717fd1fac0 100644
--- a/test/fuse/linux/stat_test.cc
+++ b/test/fuse/linux/stat_test.cc
@@ -45,26 +45,7 @@ class StatTest : public FuseTest {
 TEST_F(StatTest, StatNormal) {
   // Set up fixture.
   mode_t expected_mode = S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH;
-  struct timespec atime = {.tv_sec = 1595436289, .tv_nsec = 134150844};
-  struct timespec mtime = {.tv_sec = 1595436290, .tv_nsec = 134150845};
-  struct timespec ctime = {.tv_sec = 1595436291, .tv_nsec = 134150846};
-  struct fuse_attr attr = {
-      .ino = 1,
-      .size = 512,
-      .blocks = 4,
-      .atime = static_cast<uint64_t>(atime.tv_sec),
-      .mtime = static_cast<uint64_t>(mtime.tv_sec),
-      .ctime = static_cast<uint64_t>(ctime.tv_sec),
-      .atimensec = static_cast<uint32_t>(atime.tv_nsec),
-      .mtimensec = static_cast<uint32_t>(mtime.tv_nsec),
-      .ctimensec = static_cast<uint32_t>(ctime.tv_nsec),
-      .mode = expected_mode,
-      .nlink = 2,
-      .uid = 1234,
-      .gid = 4321,
-      .rdev = 12,
-      .blksize = 4096,
-  };
+  struct fuse_attr attr = DefaultFuseAttr(expected_mode, 1);
   struct fuse_out_header out_header = {
       .len = sizeof(struct fuse_out_header) + sizeof(struct fuse_attr_out),
   };
@@ -89,9 +70,12 @@ TEST_F(StatTest, StatNormal) {
       .st_size = static_cast<off_t>(attr.size),
       .st_blksize = attr.blksize,
       .st_blocks = static_cast<blkcnt_t>(attr.blocks),
-      .st_atim = atime,
-      .st_mtim = mtime,
-      .st_ctim = ctime,
+      .st_atim = (struct timespec){.tv_sec = static_cast<int>(attr.atime),
+                                   .tv_nsec = attr.atimensec},
+      .st_mtim = (struct timespec){.tv_sec = static_cast<int>(attr.mtime),
+                                   .tv_nsec = attr.mtimensec},
+      .st_ctim = (struct timespec){.tv_sec = static_cast<int>(attr.ctime),
+                                   .tv_nsec = attr.ctimensec},
   };
   EXPECT_TRUE(StatsAreEqual(stat_buf, expected_stat));
 
diff --git a/test/util/fuse_util.cc b/test/util/fuse_util.cc
index 5b10a9e451..4db0533355 100644
--- a/test/util/fuse_util.cc
+++ b/test/util/fuse_util.cc
@@ -22,35 +22,39 @@
 namespace gvisor {
 namespace testing {
 
-// Create response body with specified mode and nodeID.
-fuse_entry_out DefaultEntryOut(mode_t mode, uint64_t node_id) {
+// Create a default FuseAttr struct with specified mode and inode.
+fuse_attr DefaultFuseAttr(mode_t mode, uint64_t inode) {
   const int time_sec = 1595436289;
   const int time_nsec = 134150844;
+  return (struct fuse_attr){
+      .ino = inode,
+      .size = 512,
+      .blocks = 4,
+      .atime = time_sec,
+      .mtime = time_sec,
+      .ctime = time_sec,
+      .atimensec = time_nsec,
+      .mtimensec = time_nsec,
+      .ctimensec = time_nsec,
+      .mode = mode,
+      .nlink = 2,
+      .uid = 1234,
+      .gid = 4321,
+      .rdev = 12,
+      .blksize = 4096,
+  };
+}
+
+// Create response body with specified mode and nodeID.
+fuse_entry_out DefaultEntryOut(mode_t mode, uint64_t node_id) {
   struct fuse_entry_out default_entry_out = {
       .nodeid = node_id,
       .generation = 0,
-      .entry_valid = time_sec,
-      .attr_valid = time_sec,
-      .entry_valid_nsec = time_nsec,
-      .attr_valid_nsec = time_nsec,
-      .attr =
-          (struct fuse_attr){
-              .ino = node_id,
-              .size = 512,
-              .blocks = 4,
-              .atime = time_sec,
-              .mtime = time_sec,
-              .ctime = time_sec,
-              .atimensec = time_nsec,
-              .mtimensec = time_nsec,
-              .ctimensec = time_nsec,
-              .mode = mode,
-              .nlink = 2,
-              .uid = 1234,
-              .gid = 4321,
-              .rdev = 12,
-              .blksize = 4096,
-          },
+      .entry_valid = 0,
+      .attr_valid = 0,
+      .entry_valid_nsec = 0,
+      .attr_valid_nsec = 0,
+      .attr = DefaultFuseAttr(mode, node_id),
   };
   return default_entry_out;
 };
diff --git a/test/util/fuse_util.h b/test/util/fuse_util.h
index 1f1bf64a40..6b5a8ce1f7 100644
--- a/test/util/fuse_util.h
+++ b/test/util/fuse_util.h
@@ -63,6 +63,9 @@ std::vector<struct iovec> FuseGenerateIovecs(T &first, Types &...args) {
   return first_iovec;
 }
 
+// Create a fuse_attr filled with the specified mode and inode.
+fuse_attr DefaultFuseAttr(mode_t mode, uint64_t inode);
+
 // Return a fuse_entry_out FUSE server response body.
 fuse_entry_out DefaultEntryOut(mode_t mode, uint64_t nodeId);
 

From 88820940a980081711ce848592609dd4192e1be5 Mon Sep 17 00:00:00 2001
From: Craig Chi <craigchi@google.com>
Date: Tue, 8 Sep 2020 14:32:02 -0700
Subject: [PATCH 193/211] Add fuse_fd_util library to include common fuse fd
 test functions

---
 test/fuse/linux/BUILD           | 15 ++++++++
 test/fuse/linux/fuse_fd_util.cc | 62 +++++++++++++++++++++++++++++++++
 test/fuse/linux/fuse_fd_util.h  | 48 +++++++++++++++++++++++++
 3 files changed, 125 insertions(+)
 create mode 100644 test/fuse/linux/fuse_fd_util.cc
 create mode 100644 test/fuse/linux/fuse_fd_util.h

diff --git a/test/fuse/linux/BUILD b/test/fuse/linux/BUILD
index cc5bd560e0..eb090cbfd5 100644
--- a/test/fuse/linux/BUILD
+++ b/test/fuse/linux/BUILD
@@ -141,6 +141,21 @@ cc_library(
     ],
 )
 
+cc_library(
+    name = "fuse_fd_util",
+    testonly = 1,
+    srcs = ["fuse_fd_util.cc"],
+    hdrs = ["fuse_fd_util.h"],
+    deps = [
+        gtest,
+        ":fuse_base",
+        "//test/util:cleanup",
+        "//test/util:file_descriptor",
+        "//test/util:fuse_util",
+        "//test/util:posix_error",
+    ],
+)
+
 cc_binary(
     name = "read_test",
     testonly = 1,
diff --git a/test/fuse/linux/fuse_fd_util.cc b/test/fuse/linux/fuse_fd_util.cc
new file mode 100644
index 0000000000..4a2505b005
--- /dev/null
+++ b/test/fuse/linux/fuse_fd_util.cc
@@ -0,0 +1,62 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "test/fuse/linux/fuse_fd_util.h"
+
+#include <fcntl.h>
+#include <linux/fuse.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/uio.h>
+
+#include <string>
+#include <vector>
+
+#include "test/util/cleanup.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/fuse_util.h"
+#include "test/util/posix_error.h"
+
+namespace gvisor {
+namespace testing {
+
+PosixErrorOr<FileDescriptor> FuseFdTest::OpenPath(const std::string &path,
+                                                  uint32_t flags, uint64_t fh) {
+  struct fuse_out_header out_header = {
+      .len = sizeof(struct fuse_out_header) + sizeof(struct fuse_open_out),
+  };
+  struct fuse_open_out out_payload = {
+      .fh = fh,
+      .open_flags = flags,
+  };
+  auto iov_out = FuseGenerateIovecs(out_header, out_payload);
+  SetServerResponse(FUSE_OPEN, iov_out);
+
+  auto res = Open(path.c_str(), flags);
+  if (res.ok()) {
+    SkipServerActualRequest();
+  }
+  return res;
+}
+
+Cleanup FuseFdTest::CloseFD(FileDescriptor &fd) {
+  return Cleanup([&] {
+    close(fd.release());
+    SkipServerActualRequest();
+  });
+}
+
+}  // namespace testing
+}  // namespace gvisor
+
diff --git a/test/fuse/linux/fuse_fd_util.h b/test/fuse/linux/fuse_fd_util.h
new file mode 100644
index 0000000000..066185c94c
--- /dev/null
+++ b/test/fuse/linux/fuse_fd_util.h
@@ -0,0 +1,48 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef GVISOR_TEST_FUSE_FUSE_FD_UTIL_H_
+#define GVISOR_TEST_FUSE_FUSE_FD_UTIL_H_
+
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include <string>
+
+#include "test/fuse/linux/fuse_base.h"
+#include "test/util/cleanup.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/posix_error.h"
+
+namespace gvisor {
+namespace testing {
+
+class FuseFdTest : public FuseTest {
+ public:
+  // Sets the FUSE server to respond to a FUSE_OPEN with corresponding flags and
+  // fh. Then does a real file system open on the absolute path to get an fd.
+  PosixErrorOr<FileDescriptor> OpenPath(const std::string &path,
+                                        uint32_t flags = O_RDONLY,
+                                        uint64_t fh = 1);
+
+  // Returns a cleanup object that closes the fd when it is destroyed. After
+  // the close is done, tells the FUSE server to skip this FUSE_RELEASE.
+  Cleanup CloseFD(FileDescriptor &fd);
+};
+
+}  // namespace testing
+}  // namespace gvisor
+
+#endif  // GVISOR_TEST_FUSE_FUSE_FD_UTIL_H_

From cc9dff706be5518466ac677c19fc9436e059855d Mon Sep 17 00:00:00 2001
From: Craig Chi <craigchi@google.com>
Date: Wed, 9 Sep 2020 09:16:09 -0700
Subject: [PATCH 194/211] Add fh support for revise attr and fstat(2) test

According to Linux 4.4's FUSE behavior, the flags and fh attributes in
FUSE_GETATTR are only used in read, write, and lseek. fstat(2) doesn't
use them either. Add tests to ensure the requests sent from FUSE module
are consistent with Linux's.

Updates #3655
---
 pkg/abi/linux/fuse.go                  |   5 ++
 pkg/sentry/fsimpl/fuse/fusefs.go       |  21 +++--
 pkg/sentry/fsimpl/fuse/regular_file.go |   2 +-
 test/fuse/linux/BUILD                  |   4 +-
 test/fuse/linux/stat_test.cc           | 112 +++++++++++++++++++++++--
 test/util/fuse_util.cc                 |  12 +--
 test/util/fuse_util.h                  |   5 +-
 7 files changed, 133 insertions(+), 28 deletions(-)

diff --git a/pkg/abi/linux/fuse.go b/pkg/abi/linux/fuse.go
index e49a92fb2f..f0bef1e8ec 100644
--- a/pkg/abi/linux/fuse.go
+++ b/pkg/abi/linux/fuse.go
@@ -227,6 +227,11 @@ type FUSEInitOut struct {
 	_ [8]uint32
 }
 
+// FUSE_GETATTR_FH is currently the only flag of FUSEGetAttrIn.GetAttrFlags.
+// If it is set, the file handle (FUSEGetAttrIn.Fh) is used to indicate the
+// object instead of the node id attribute in the request header.
+const FUSE_GETATTR_FH = (1 << 0)
+
 // FUSEGetAttrIn is the request sent by the kernel to the daemon,
 // to get the attribute of a inode.
 //
diff --git a/pkg/sentry/fsimpl/fuse/fusefs.go b/pkg/sentry/fsimpl/fuse/fusefs.go
index 8cf13dcb6b..821048d876 100644
--- a/pkg/sentry/fsimpl/fuse/fusefs.go
+++ b/pkg/sentry/fsimpl/fuse/fusefs.go
@@ -609,9 +609,9 @@ func statFromFUSEAttr(attr linux.FUSEAttr, mask, devMinor uint32) linux.Statx {
 }
 
 // getAttr gets the attribute of this inode by issuing a FUSE_GETATTR request
-// or read from local cache.
-// It updates the corresponding attributes if necessary.
-func (i *inode) getAttr(ctx context.Context, fs *vfs.Filesystem, opts vfs.StatOptions) (linux.FUSEAttr, error) {
+// or read from local cache. It updates the corresponding attributes if
+// necessary.
+func (i *inode) getAttr(ctx context.Context, fs *vfs.Filesystem, opts vfs.StatOptions, flags uint32, fh uint64) (linux.FUSEAttr, error) {
 	attributeVersion := atomic.LoadUint64(&i.fs.conn.attributeVersion)
 
 	// TODO(gvisor.dev/issue/3679): send the request only if
@@ -631,11 +631,10 @@ func (i *inode) getAttr(ctx context.Context, fs *vfs.Filesystem, opts vfs.StatOp
 
 	creds := auth.CredentialsFromContext(ctx)
 
-	var in linux.FUSEGetAttrIn
-	// We don't set any attribute in the request, because in VFS2 fstat(2) will
-	// finally be translated into vfs.FilesystemImpl.StatAt() (see
-	// pkg/sentry/syscalls/linux/vfs2/stat.go), resulting in the same flow
-	// as stat(2). Thus GetAttrFlags and Fh variable will never be used in VFS2.
+	in := linux.FUSEGetAttrIn{
+		GetAttrFlags: flags,
+		Fh:           fh,
+	}
 	req, err := i.fs.conn.NewRequest(creds, uint32(task.ThreadID()), i.NodeID, linux.FUSE_GETATTR, &in)
 	if err != nil {
 		return linux.FUSEAttr{}, err
@@ -676,17 +675,17 @@ func (i *inode) getAttr(ctx context.Context, fs *vfs.Filesystem, opts vfs.StatOp
 // reviseAttr attempts to update the attributes for internal purposes
 // by calling getAttr with a pre-specified mask.
 // Used by read, write, lseek.
-func (i *inode) reviseAttr(ctx context.Context) error {
+func (i *inode) reviseAttr(ctx context.Context, flags uint32, fh uint64) error {
 	// Never need atime for internal purposes.
 	_, err := i.getAttr(ctx, i.fs.VFSFilesystem(), vfs.StatOptions{
 		Mask: linux.STATX_BASIC_STATS &^ linux.STATX_ATIME,
-	})
+	}, flags, fh)
 	return err
 }
 
 // Stat implements kernfs.Inode.Stat.
 func (i *inode) Stat(ctx context.Context, fs *vfs.Filesystem, opts vfs.StatOptions) (linux.Statx, error) {
-	attr, err := i.getAttr(ctx, fs, opts)
+	attr, err := i.getAttr(ctx, fs, opts, 0, 0)
 	if err != nil {
 		return linux.Statx{}, err
 	}
diff --git a/pkg/sentry/fsimpl/fuse/regular_file.go b/pkg/sentry/fsimpl/fuse/regular_file.go
index 193e773921..5bdd096c3c 100644
--- a/pkg/sentry/fsimpl/fuse/regular_file.go
+++ b/pkg/sentry/fsimpl/fuse/regular_file.go
@@ -65,7 +65,7 @@ func (fd *regularFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offs
 
 	// Reading beyond EOF, update file size if outdated.
 	if uint64(offset+size) > atomic.LoadUint64(&inode.size) {
-		if err := inode.reviseAttr(ctx); err != nil {
+		if err := inode.reviseAttr(ctx, linux.FUSE_GETATTR_FH, fd.Fh); err != nil {
 			return 0, err
 		}
 		// If the offset after update is still too large, return error.
diff --git a/test/fuse/linux/BUILD b/test/fuse/linux/BUILD
index eb090cbfd5..7a3e52fad9 100644
--- a/test/fuse/linux/BUILD
+++ b/test/fuse/linux/BUILD
@@ -11,7 +11,9 @@ cc_binary(
     srcs = ["stat_test.cc"],
     deps = [
         gtest,
-        ":fuse_base",
+        ":fuse_fd_util",
+        "//test/util:cleanup",
+        "//test/util:fs_util",
         "//test/util:fuse_util",
         "//test/util:test_main",
         "//test/util:test_util",
diff --git a/test/fuse/linux/stat_test.cc b/test/fuse/linux/stat_test.cc
index 717fd1fac0..6f032cac11 100644
--- a/test/fuse/linux/stat_test.cc
+++ b/test/fuse/linux/stat_test.cc
@@ -18,12 +18,15 @@
 #include <sys/stat.h>
 #include <sys/statfs.h>
 #include <sys/types.h>
+#include <sys/uio.h>
 #include <unistd.h>
 
 #include <vector>
 
 #include "gtest/gtest.h"
-#include "test/fuse/linux/fuse_base.h"
+#include "test/fuse/linux/fuse_fd_util.h"
+#include "test/util/cleanup.h"
+#include "test/util/fs_util.h"
 #include "test/util/fuse_util.h"
 #include "test/util/test_util.h"
 
@@ -32,19 +35,30 @@ namespace testing {
 
 namespace {
 
-class StatTest : public FuseTest {
+class StatTest : public FuseFdTest {
  public:
+  void SetUp() override {
+    FuseFdTest::SetUp();
+    test_file_path_ = JoinPath(mount_point_.path(), test_file_);
+  }
+
+ protected:
   bool StatsAreEqual(struct stat expected, struct stat actual) {
-    // device number will be dynamically allocated by kernel, we cannot know
-    // in advance
+    // Device number will be dynamically allocated by kernel, we cannot know in
+    // advance.
     actual.st_dev = expected.st_dev;
     return memcmp(&expected, &actual, sizeof(struct stat)) == 0;
   }
+
+  const std::string test_file_ = "testfile";
+  const mode_t expected_mode = S_IFREG | S_IRUSR | S_IWUSR;
+  const uint64_t fh = 23;
+
+  std::string test_file_path_;
 };
 
 TEST_F(StatTest, StatNormal) {
   // Set up fixture.
-  mode_t expected_mode = S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH;
   struct fuse_attr attr = DefaultFuseAttr(expected_mode, 1);
   struct fuse_out_header out_header = {
       .len = sizeof(struct fuse_out_header) + sizeof(struct fuse_attr_out),
@@ -55,7 +69,7 @@ TEST_F(StatTest, StatNormal) {
   auto iov_out = FuseGenerateIovecs(out_header, out_payload);
   SetServerResponse(FUSE_GETATTR, iov_out);
 
-  // Do integration test.
+  // Make syscall.
   struct stat stat_buf;
   EXPECT_THAT(stat(mount_point_.path().c_str(), &stat_buf), SyscallSucceeds());
 
@@ -99,7 +113,7 @@ TEST_F(StatTest, StatNotFound) {
   auto iov_out = FuseGenerateIovecs(out_header);
   SetServerResponse(FUSE_GETATTR, iov_out);
 
-  // Do integration test.
+  // Make syscall.
   struct stat stat_buf;
   EXPECT_THAT(stat(mount_point_.path().c_str(), &stat_buf),
               SyscallFailsWithErrno(ENOENT));
@@ -115,6 +129,90 @@ TEST_F(StatTest, StatNotFound) {
   EXPECT_EQ(in_payload.fh, 0);
 }
 
+TEST_F(StatTest, FstatNormal) {
+  // Set up fixture.
+  SetServerInodeLookup(test_file_);
+  auto fd = ASSERT_NO_ERRNO_AND_VALUE(OpenPath(test_file_path_, O_RDONLY, fh));
+  auto close_fd = CloseFD(fd);
+
+  struct fuse_attr attr = DefaultFuseAttr(expected_mode, 2);
+  struct fuse_out_header out_header = {
+      .len = sizeof(struct fuse_out_header) + sizeof(struct fuse_attr_out),
+  };
+  struct fuse_attr_out out_payload = {
+      .attr = attr,
+  };
+  auto iov_out = FuseGenerateIovecs(out_header, out_payload);
+  SetServerResponse(FUSE_GETATTR, iov_out);
+
+  // Make syscall.
+  struct stat stat_buf;
+  EXPECT_THAT(fstat(fd.get(), &stat_buf), SyscallSucceeds());
+
+  // Check filesystem operation result.
+  struct stat expected_stat = {
+      .st_ino = attr.ino,
+      .st_nlink = attr.nlink,
+      .st_mode = expected_mode,
+      .st_uid = attr.uid,
+      .st_gid = attr.gid,
+      .st_rdev = attr.rdev,
+      .st_size = static_cast<off_t>(attr.size),
+      .st_blksize = attr.blksize,
+      .st_blocks = static_cast<blkcnt_t>(attr.blocks),
+      .st_atim = (struct timespec){.tv_sec = static_cast<int>(attr.atime),
+                                   .tv_nsec = attr.atimensec},
+      .st_mtim = (struct timespec){.tv_sec = static_cast<int>(attr.mtime),
+                                   .tv_nsec = attr.mtimensec},
+      .st_ctim = (struct timespec){.tv_sec = static_cast<int>(attr.ctime),
+                                   .tv_nsec = attr.ctimensec},
+  };
+  EXPECT_TRUE(StatsAreEqual(stat_buf, expected_stat));
+
+  // Check FUSE request.
+  struct fuse_in_header in_header;
+  struct fuse_getattr_in in_payload;
+  auto iov_in = FuseGenerateIovecs(in_header, in_payload);
+
+  GetServerActualRequest(iov_in);
+  EXPECT_EQ(in_header.opcode, FUSE_GETATTR);
+  EXPECT_EQ(in_payload.getattr_flags, 0);
+  EXPECT_EQ(in_payload.fh, 0);
+}
+
+TEST_F(StatTest, StatByFileHandle) {
+  // Set up fixture.
+  SetServerInodeLookup(test_file_, expected_mode, 0);
+  auto fd = ASSERT_NO_ERRNO_AND_VALUE(OpenPath(test_file_path_, O_RDONLY, fh));
+  auto close_fd = CloseFD(fd);
+
+  struct fuse_attr attr = DefaultFuseAttr(expected_mode, 2, 0);
+  struct fuse_out_header out_header = {
+      .len = sizeof(struct fuse_out_header) + sizeof(struct fuse_attr_out),
+  };
+  struct fuse_attr_out out_payload = {
+      .attr = attr,
+  };
+  auto iov_out = FuseGenerateIovecs(out_header, out_payload);
+  SetServerResponse(FUSE_GETATTR, iov_out);
+
+  // Make syscall.
+  std::vector<char> buf(1);
+  // Since this is an empty file, it won't issue FUSE_READ. But a FUSE_GETATTR
+  // will be issued before read completes.
+  EXPECT_THAT(read(fd.get(), buf.data(), buf.size()), SyscallSucceeds());
+
+  // Check FUSE request.
+  struct fuse_in_header in_header;
+  struct fuse_getattr_in in_payload;
+  auto iov_in = FuseGenerateIovecs(in_header, in_payload);
+
+  GetServerActualRequest(iov_in);
+  EXPECT_EQ(in_header.opcode, FUSE_GETATTR);
+  EXPECT_EQ(in_payload.getattr_flags, FUSE_GETATTR_FH);
+  EXPECT_EQ(in_payload.fh, fh);
+}
+
 }  // namespace
 
 }  // namespace testing
diff --git a/test/util/fuse_util.cc b/test/util/fuse_util.cc
index 4db0533355..595d0cebfa 100644
--- a/test/util/fuse_util.cc
+++ b/test/util/fuse_util.cc
@@ -22,13 +22,13 @@
 namespace gvisor {
 namespace testing {
 
-// Create a default FuseAttr struct with specified mode and inode.
-fuse_attr DefaultFuseAttr(mode_t mode, uint64_t inode) {
+// Create a default FuseAttr struct with specified mode, inode, and size.
+fuse_attr DefaultFuseAttr(mode_t mode, uint64_t inode, uint64_t size) {
   const int time_sec = 1595436289;
   const int time_nsec = 134150844;
   return (struct fuse_attr){
       .ino = inode,
-      .size = 512,
+      .size = size,
       .blocks = 4,
       .atime = time_sec,
       .mtime = time_sec,
@@ -45,8 +45,8 @@ fuse_attr DefaultFuseAttr(mode_t mode, uint64_t inode) {
   };
 }
 
-// Create response body with specified mode and nodeID.
-fuse_entry_out DefaultEntryOut(mode_t mode, uint64_t node_id) {
+// Create response body with specified mode, nodeID, and size.
+fuse_entry_out DefaultEntryOut(mode_t mode, uint64_t node_id, uint64_t size) {
   struct fuse_entry_out default_entry_out = {
       .nodeid = node_id,
       .generation = 0,
@@ -54,7 +54,7 @@ fuse_entry_out DefaultEntryOut(mode_t mode, uint64_t node_id) {
       .attr_valid = 0,
       .entry_valid_nsec = 0,
       .attr_valid_nsec = 0,
-      .attr = DefaultFuseAttr(mode, node_id),
+      .attr = DefaultFuseAttr(mode, node_id, size),
   };
   return default_entry_out;
 };
diff --git a/test/util/fuse_util.h b/test/util/fuse_util.h
index 6b5a8ce1f7..544fe1b38e 100644
--- a/test/util/fuse_util.h
+++ b/test/util/fuse_util.h
@@ -64,10 +64,11 @@ std::vector<struct iovec> FuseGenerateIovecs(T &first, Types &...args) {
 }
 
 // Create a fuse_attr filled with the specified mode and inode.
-fuse_attr DefaultFuseAttr(mode_t mode, uint64_t inode);
+fuse_attr DefaultFuseAttr(mode_t mode, uint64_t inode, uint64_t size = 512);
 
 // Return a fuse_entry_out FUSE server response body.
-fuse_entry_out DefaultEntryOut(mode_t mode, uint64_t nodeId);
+fuse_entry_out DefaultEntryOut(mode_t mode, uint64_t node_id,
+                               uint64_t size = 512);
 
 }  // namespace testing
 }  // namespace gvisor

From b301fd500c923d01f3cce922eee883e94d4585d5 Mon Sep 17 00:00:00 2001
From: Craig Chi <craig08@users.noreply.github.com>
Date: Wed, 9 Sep 2020 10:44:09 -0700
Subject: [PATCH 195/211] Implement FUSE_SETATTR

This commit implements FUSE_SETATTR command. When a system call modifies
the metadata of a regular file or a folder by chown(2), chmod(2),
truncate(2), utime(2), or utimes(2), they should be translated to
corresponding FUSE_SETATTR command and sent to the FUSE server.

Fixes #3332
---
 pkg/abi/linux/fuse.go                       |  67 ++++
 pkg/sentry/fsimpl/fuse/file.go              |   2 +-
 pkg/sentry/fsimpl/fuse/fusefs.go            |  83 ++++-
 pkg/sentry/fsimpl/kernfs/inode_impl_util.go |   7 +
 test/fuse/BUILD                             |   5 +
 test/fuse/linux/BUILD                       |  16 +
 test/fuse/linux/setstat_test.cc             | 338 ++++++++++++++++++++
 7 files changed, 516 insertions(+), 2 deletions(-)
 create mode 100644 test/fuse/linux/setstat_test.cc

diff --git a/pkg/abi/linux/fuse.go b/pkg/abi/linux/fuse.go
index f0bef1e8ec..fcc957bfe6 100644
--- a/pkg/abi/linux/fuse.go
+++ b/pkg/abi/linux/fuse.go
@@ -749,3 +749,70 @@ func (r *FUSEDirent) UnmarshalBytes(src []byte) {
 	name.UnmarshalBytes(src[:r.Meta.NameLen])
 	r.Name = string(name)
 }
+
+// FATTR_* consts are the attribute flags defined in include/uapi/linux/fuse.h.
+// These should be or-ed together for setattr to know what has been changed.
+const (
+	FATTR_MODE      = (1 << 0)
+	FATTR_UID       = (1 << 1)
+	FATTR_GID       = (1 << 2)
+	FATTR_SIZE      = (1 << 3)
+	FATTR_ATIME     = (1 << 4)
+	FATTR_MTIME     = (1 << 5)
+	FATTR_FH        = (1 << 6)
+	FATTR_ATIME_NOW = (1 << 7)
+	FATTR_MTIME_NOW = (1 << 8)
+	FATTR_LOCKOWNER = (1 << 9)
+	FATTR_CTIME     = (1 << 10)
+)
+
+// FUSESetAttrIn is the request sent by the kernel to the daemon,
+// to set the attribute(s) of a file.
+//
+// +marshal
+type FUSESetAttrIn struct {
+	// Valid indicates which attributes are modified by this request.
+	Valid uint32
+
+	_ uint32
+
+	// Fh is used to identify the file if FATTR_FH is set in Valid.
+	Fh uint64
+
+	// Size is the size that the request wants to change to.
+	Size uint64
+
+	// LockOwner is the owner of the lock that the request wants to change to.
+	LockOwner uint64
+
+	// Atime is the access time that the request wants to change to.
+	Atime uint64
+
+	// Mtime is the modification time that the request wants to change to.
+	Mtime uint64
+
+	// Ctime is the status change time that the request wants to change to.
+	Ctime uint64
+
+	// AtimeNsec is the nano second part of Atime.
+	AtimeNsec uint32
+
+	// MtimeNsec is the nano second part of Mtime.
+	MtimeNsec uint32
+
+	// CtimeNsec is the nano second part of Ctime.
+	CtimeNsec uint32
+
+	// Mode is the file mode that the request wants to change to.
+	Mode uint32
+
+	_ uint32
+
+	// UID is the user ID of the owner that the request wants to change to.
+	UID uint32
+
+	// GID is the group ID of the owner that the request wants to change to.
+	GID uint32
+
+	_ uint32
+}
diff --git a/pkg/sentry/fsimpl/fuse/file.go b/pkg/sentry/fsimpl/fuse/file.go
index 186ec23620..15c0e3f419 100644
--- a/pkg/sentry/fsimpl/fuse/file.go
+++ b/pkg/sentry/fsimpl/fuse/file.go
@@ -123,5 +123,5 @@ func (fd *fileDescription) Stat(ctx context.Context, opts vfs.StatOptions) (linu
 // SetStat implements FileDescriptionImpl.SetStat.
 func (fd *fileDescription) SetStat(ctx context.Context, opts vfs.SetStatOptions) error {
 	creds := auth.CredentialsFromContext(ctx)
-	return fd.inode().SetStat(ctx, fd.inode().fs.VFSFilesystem(), creds, opts)
+	return fd.inode().setAttr(ctx, fd.inode().fs.VFSFilesystem(), creds, opts, true, fd.Fh)
 }
diff --git a/pkg/sentry/fsimpl/fuse/fusefs.go b/pkg/sentry/fsimpl/fuse/fusefs.go
index 821048d876..5722453030 100644
--- a/pkg/sentry/fsimpl/fuse/fusefs.go
+++ b/pkg/sentry/fsimpl/fuse/fusefs.go
@@ -660,7 +660,7 @@ func (i *inode) getAttr(ctx context.Context, fs *vfs.Filesystem, opts vfs.StatOp
 	}
 
 	// Set the metadata of kernfs.InodeAttrs.
-	if err := i.SetStat(ctx, fs, creds, vfs.SetStatOptions{
+	if err := i.SetInodeStat(ctx, fs, creds, vfs.SetStatOptions{
 		Stat: statFromFUSEAttr(out.Attr, linux.STATX_ALL, i.fs.devMinor),
 	}); err != nil {
 		return linux.FUSEAttr{}, err
@@ -703,3 +703,84 @@ func (i *inode) StatFS(ctx context.Context, fs *vfs.Filesystem) (linux.Statfs, e
 	// TODO(gvisor.dev/issues/3413): Complete the implementation of statfs.
 	return vfs.GenericStatFS(linux.FUSE_SUPER_MAGIC), nil
 }
+
+// fattrMaskFromStats converts vfs.SetStatOptions.Stat.Mask to linux stats mask
+// aligned with the attribute mask defined in include/linux/fs.h.
+func fattrMaskFromStats(mask uint32) uint32 {
+	var fuseAttrMask uint32
+	maskMap := map[uint32]uint32{
+		linux.STATX_MODE:  linux.FATTR_MODE,
+		linux.STATX_UID:   linux.FATTR_UID,
+		linux.STATX_GID:   linux.FATTR_GID,
+		linux.STATX_SIZE:  linux.FATTR_SIZE,
+		linux.STATX_ATIME: linux.FATTR_ATIME,
+		linux.STATX_MTIME: linux.FATTR_MTIME,
+		linux.STATX_CTIME: linux.FATTR_CTIME,
+	}
+	for statxMask, fattrMask := range maskMap {
+		if mask&statxMask != 0 {
+			fuseAttrMask |= fattrMask
+		}
+	}
+	return fuseAttrMask
+}
+
+// SetStat implements kernfs.Inode.SetStat.
+func (i *inode) SetStat(ctx context.Context, fs *vfs.Filesystem, creds *auth.Credentials, opts vfs.SetStatOptions) error {
+	return i.setAttr(ctx, fs, creds, opts, false, 0)
+}
+
+func (i *inode) setAttr(ctx context.Context, fs *vfs.Filesystem, creds *auth.Credentials, opts vfs.SetStatOptions, useFh bool, fh uint64) error {
+	conn := i.fs.conn
+	task := kernel.TaskFromContext(ctx)
+	if task == nil {
+		log.Warningf("couldn't get kernel task from context")
+		return syserror.EINVAL
+	}
+
+	// We should retain the original file type when assigning new mode.
+	fileType := uint16(i.Mode()) & linux.S_IFMT
+	fattrMask := fattrMaskFromStats(opts.Stat.Mask)
+	if useFh {
+		fattrMask |= linux.FATTR_FH
+	}
+	in := linux.FUSESetAttrIn{
+		Valid:     fattrMask,
+		Fh:        fh,
+		Size:      opts.Stat.Size,
+		Atime:     uint64(opts.Stat.Atime.Sec),
+		Mtime:     uint64(opts.Stat.Mtime.Sec),
+		Ctime:     uint64(opts.Stat.Ctime.Sec),
+		AtimeNsec: opts.Stat.Atime.Nsec,
+		MtimeNsec: opts.Stat.Mtime.Nsec,
+		CtimeNsec: opts.Stat.Ctime.Nsec,
+		Mode:      uint32(fileType | opts.Stat.Mode),
+		UID:       opts.Stat.UID,
+		GID:       opts.Stat.GID,
+	}
+	req, err := conn.NewRequest(creds, uint32(task.ThreadID()), i.NodeID, linux.FUSE_SETATTR, &in)
+	if err != nil {
+		return err
+	}
+
+	res, err := conn.Call(task, req)
+	if err != nil {
+		return err
+	}
+	if err := res.Error(); err != nil {
+		return err
+	}
+	out := linux.FUSEGetAttrOut{}
+	if err := res.UnmarshalPayload(&out); err != nil {
+		return err
+	}
+
+	// Set the metadata of kernfs.InodeAttrs.
+	if err := i.SetInodeStat(ctx, fs, creds, vfs.SetStatOptions{
+		Stat: statFromFUSEAttr(out.Attr, linux.STATX_ALL, i.fs.devMinor),
+	}); err != nil {
+		return err
+	}
+
+	return nil
+}
diff --git a/pkg/sentry/fsimpl/kernfs/inode_impl_util.go b/pkg/sentry/fsimpl/kernfs/inode_impl_util.go
index ef63a19473..ea4f679c2a 100644
--- a/pkg/sentry/fsimpl/kernfs/inode_impl_util.go
+++ b/pkg/sentry/fsimpl/kernfs/inode_impl_util.go
@@ -256,6 +256,13 @@ func (a *InodeAttrs) Stat(context.Context, *vfs.Filesystem, vfs.StatOptions) (li
 
 // SetStat implements Inode.SetStat.
 func (a *InodeAttrs) SetStat(ctx context.Context, fs *vfs.Filesystem, creds *auth.Credentials, opts vfs.SetStatOptions) error {
+	return a.SetInodeStat(ctx, fs, creds, opts)
+}
+
+// SetInodeStat sets the corresponding attributes from opts to InodeAttrs.
+// This function can be used by other kernfs-based filesystem implementation to
+// sets the unexported attributes into kernfs.InodeAttrs.
+func (a *InodeAttrs) SetInodeStat(ctx context.Context, fs *vfs.Filesystem, creds *auth.Credentials, opts vfs.SetStatOptions) error {
 	if opts.Stat.Mask == 0 {
 		return nil
 	}
diff --git a/test/fuse/BUILD b/test/fuse/BUILD
index 55ad987482..29b9a9d93b 100644
--- a/test/fuse/BUILD
+++ b/test/fuse/BUILD
@@ -62,6 +62,11 @@ syscall_test(
     test = "//test/fuse/linux:create_test",
 )
 
+syscall_test(
+    fuse = "True",
+    test = "//test/fuse/linux:setstat_test",
+)
+
 syscall_test(
     size = "large",
     add_overlay = True,
diff --git a/test/fuse/linux/BUILD b/test/fuse/linux/BUILD
index 7a3e52fad9..7ecd6d8cb3 100644
--- a/test/fuse/linux/BUILD
+++ b/test/fuse/linux/BUILD
@@ -100,6 +100,22 @@ cc_binary(
     ],
 )
 
+cc_binary(
+    name = "setstat_test",
+    testonly = 1,
+    srcs = ["setstat_test.cc"],
+    deps = [
+        gtest,
+        ":fuse_fd_util",
+        "//test/util:cleanup",
+        "//test/util:fs_util",
+        "//test/util:fuse_util",
+        "//test/util:temp_umask",
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
 cc_binary(
     name = "rmdir_test",
     testonly = 1,
diff --git a/test/fuse/linux/setstat_test.cc b/test/fuse/linux/setstat_test.cc
new file mode 100644
index 0000000000..68301c775d
--- /dev/null
+++ b/test/fuse/linux/setstat_test.cc
@@ -0,0 +1,338 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/fuse.h>
+#include <sys/stat.h>
+#include <sys/statfs.h>
+#include <sys/types.h>
+#include <sys/uio.h>
+#include <unistd.h>
+#include <utime.h>
+
+#include <string>
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "test/fuse/linux/fuse_fd_util.h"
+#include "test/util/cleanup.h"
+#include "test/util/fs_util.h"
+#include "test/util/fuse_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+class SetStatTest : public FuseFdTest {
+ public:
+  void SetUp() override {
+    FuseFdTest::SetUp();
+    test_dir_path_ = JoinPath(mount_point_.path(), test_dir_);
+    test_file_path_ = JoinPath(mount_point_.path(), test_file_);
+  }
+
+ protected:
+  const uint64_t fh = 23;
+  const std::string test_dir_ = "testdir";
+  const std::string test_file_ = "testfile";
+  const mode_t test_dir_mode_ = S_IFDIR | S_IRUSR | S_IWUSR | S_IXUSR;
+  const mode_t test_file_mode_ = S_IFREG | S_IRUSR | S_IWUSR | S_IXUSR;
+
+  std::string test_dir_path_;
+  std::string test_file_path_;
+};
+
+TEST_F(SetStatTest, ChmodDir) {
+  // Set up fixture.
+  SetServerInodeLookup(test_dir_, test_dir_mode_);
+  struct fuse_out_header out_header = {
+      .len = sizeof(struct fuse_out_header) + sizeof(struct fuse_attr_out),
+      .error = 0,
+  };
+  mode_t set_mode = S_IRGRP | S_IWGRP | S_IXGRP;
+  struct fuse_attr_out out_payload = {
+      .attr = DefaultFuseAttr(set_mode, 2),
+  };
+  auto iov_out = FuseGenerateIovecs(out_header, out_payload);
+  SetServerResponse(FUSE_SETATTR, iov_out);
+
+  // Make syscall.
+  EXPECT_THAT(chmod(test_dir_path_.c_str(), set_mode), SyscallSucceeds());
+
+  // Check FUSE request.
+  struct fuse_in_header in_header;
+  struct fuse_setattr_in in_payload;
+  auto iov_in = FuseGenerateIovecs(in_header, in_payload);
+
+  GetServerActualRequest(iov_in);
+  EXPECT_EQ(in_header.len, sizeof(in_header) + sizeof(in_payload));
+  EXPECT_EQ(in_header.opcode, FUSE_SETATTR);
+  EXPECT_EQ(in_header.uid, 0);
+  EXPECT_EQ(in_header.gid, 0);
+  EXPECT_EQ(in_payload.valid, FATTR_MODE);
+  EXPECT_EQ(in_payload.mode, S_IFDIR | set_mode);
+}
+
+TEST_F(SetStatTest, ChownDir) {
+  // Set up fixture.
+  SetServerInodeLookup(test_dir_, test_dir_mode_);
+  struct fuse_out_header out_header = {
+      .len = sizeof(struct fuse_out_header) + sizeof(struct fuse_attr_out),
+      .error = 0,
+  };
+  struct fuse_attr_out out_payload = {
+      .attr = DefaultFuseAttr(test_dir_mode_, 2),
+  };
+  auto iov_out = FuseGenerateIovecs(out_header, out_payload);
+  SetServerResponse(FUSE_SETATTR, iov_out);
+
+  // Make syscall.
+  EXPECT_THAT(chown(test_dir_path_.c_str(), 1025, 1025), SyscallSucceeds());
+
+  // Check FUSE request.
+  struct fuse_in_header in_header;
+  struct fuse_setattr_in in_payload;
+  auto iov_in = FuseGenerateIovecs(in_header, in_payload);
+
+  GetServerActualRequest(iov_in);
+  EXPECT_EQ(in_header.len, sizeof(in_header) + sizeof(in_payload));
+  EXPECT_EQ(in_header.opcode, FUSE_SETATTR);
+  EXPECT_EQ(in_header.uid, 0);
+  EXPECT_EQ(in_header.gid, 0);
+  EXPECT_EQ(in_payload.valid, FATTR_UID | FATTR_GID);
+  EXPECT_EQ(in_payload.uid, 1025);
+  EXPECT_EQ(in_payload.gid, 1025);
+}
+
+TEST_F(SetStatTest, TruncateFile) {
+  // Set up fixture.
+  SetServerInodeLookup(test_file_, test_file_mode_);
+  struct fuse_out_header out_header = {
+      .len = sizeof(struct fuse_out_header) + sizeof(struct fuse_attr_out),
+      .error = 0,
+  };
+  struct fuse_attr_out out_payload = {
+      .attr = DefaultFuseAttr(S_IFREG | S_IRUSR | S_IWUSR, 2),
+  };
+  auto iov_out = FuseGenerateIovecs(out_header, out_payload);
+  SetServerResponse(FUSE_SETATTR, iov_out);
+
+  // Make syscall.
+  EXPECT_THAT(truncate(test_file_path_.c_str(), 321), SyscallSucceeds());
+
+  // Check FUSE request.
+  struct fuse_in_header in_header;
+  struct fuse_setattr_in in_payload;
+  auto iov_in = FuseGenerateIovecs(in_header, in_payload);
+
+  GetServerActualRequest(iov_in);
+  EXPECT_EQ(in_header.len, sizeof(in_header) + sizeof(in_payload));
+  EXPECT_EQ(in_header.opcode, FUSE_SETATTR);
+  EXPECT_EQ(in_header.uid, 0);
+  EXPECT_EQ(in_header.gid, 0);
+  EXPECT_EQ(in_payload.valid, FATTR_SIZE);
+  EXPECT_EQ(in_payload.size, 321);
+}
+
+TEST_F(SetStatTest, UtimeFile) {
+  // Set up fixture.
+  SetServerInodeLookup(test_file_, test_file_mode_);
+  struct fuse_out_header out_header = {
+      .len = sizeof(struct fuse_out_header) + sizeof(struct fuse_attr_out),
+      .error = 0,
+  };
+  struct fuse_attr_out out_payload = {
+      .attr = DefaultFuseAttr(S_IFREG | S_IRUSR | S_IWUSR, 2),
+  };
+  auto iov_out = FuseGenerateIovecs(out_header, out_payload);
+  SetServerResponse(FUSE_SETATTR, iov_out);
+
+  // Make syscall.
+  time_t expected_atime = 1597159766, expected_mtime = 1597159765;
+  struct utimbuf times = {
+      .actime = expected_atime,
+      .modtime = expected_mtime,
+  };
+  EXPECT_THAT(utime(test_file_path_.c_str(), &times), SyscallSucceeds());
+
+  // Check FUSE request.
+  struct fuse_in_header in_header;
+  struct fuse_setattr_in in_payload;
+  auto iov_in = FuseGenerateIovecs(in_header, in_payload);
+
+  GetServerActualRequest(iov_in);
+  EXPECT_EQ(in_header.len, sizeof(in_header) + sizeof(in_payload));
+  EXPECT_EQ(in_header.opcode, FUSE_SETATTR);
+  EXPECT_EQ(in_header.uid, 0);
+  EXPECT_EQ(in_header.gid, 0);
+  EXPECT_EQ(in_payload.valid, FATTR_ATIME | FATTR_MTIME);
+  EXPECT_EQ(in_payload.atime, expected_atime);
+  EXPECT_EQ(in_payload.mtime, expected_mtime);
+}
+
+TEST_F(SetStatTest, UtimesFile) {
+  // Set up fixture.
+  SetServerInodeLookup(test_file_, test_file_mode_);
+  struct fuse_out_header out_header = {
+      .len = sizeof(struct fuse_out_header) + sizeof(struct fuse_attr_out),
+      .error = 0,
+  };
+  struct fuse_attr_out out_payload = {
+      .attr = DefaultFuseAttr(test_file_mode_, 2),
+  };
+  auto iov_out = FuseGenerateIovecs(out_header, out_payload);
+  SetServerResponse(FUSE_SETATTR, iov_out);
+
+  // Make syscall.
+  struct timeval expected_times[2] = {
+      {
+          .tv_sec = 1597159766,
+          .tv_usec = 234945,
+      },
+      {
+          .tv_sec = 1597159765,
+          .tv_usec = 232341,
+      },
+  };
+  EXPECT_THAT(utimes(test_file_path_.c_str(), expected_times),
+              SyscallSucceeds());
+
+  // Check FUSE request.
+  struct fuse_in_header in_header;
+  struct fuse_setattr_in in_payload;
+  auto iov_in = FuseGenerateIovecs(in_header, in_payload);
+
+  GetServerActualRequest(iov_in);
+  EXPECT_EQ(in_header.len, sizeof(in_header) + sizeof(in_payload));
+  EXPECT_EQ(in_header.opcode, FUSE_SETATTR);
+  EXPECT_EQ(in_header.uid, 0);
+  EXPECT_EQ(in_header.gid, 0);
+  EXPECT_EQ(in_payload.valid, FATTR_ATIME | FATTR_MTIME);
+  EXPECT_EQ(in_payload.atime, expected_times[0].tv_sec);
+  EXPECT_EQ(in_payload.atimensec, expected_times[0].tv_usec * 1000);
+  EXPECT_EQ(in_payload.mtime, expected_times[1].tv_sec);
+  EXPECT_EQ(in_payload.mtimensec, expected_times[1].tv_usec * 1000);
+}
+
+TEST_F(SetStatTest, FtruncateFile) {
+  // Set up fixture.
+  SetServerInodeLookup(test_file_, test_file_mode_);
+  auto fd = ASSERT_NO_ERRNO_AND_VALUE(OpenPath(test_file_path_, O_RDWR, fh));
+  auto close_fd = CloseFD(fd);
+
+  struct fuse_out_header out_header = {
+      .len = sizeof(struct fuse_out_header) + sizeof(struct fuse_attr_out),
+      .error = 0,
+  };
+  struct fuse_attr_out out_payload = {
+      .attr = DefaultFuseAttr(test_file_mode_, 2),
+  };
+  auto iov_out = FuseGenerateIovecs(out_header, out_payload);
+  SetServerResponse(FUSE_SETATTR, iov_out);
+
+  // Make syscall.
+  EXPECT_THAT(ftruncate(fd.get(), 321), SyscallSucceeds());
+
+  // Check FUSE request.
+  struct fuse_in_header in_header;
+  struct fuse_setattr_in in_payload;
+  auto iov_in = FuseGenerateIovecs(in_header, in_payload);
+
+  GetServerActualRequest(iov_in);
+  EXPECT_EQ(in_header.len, sizeof(in_header) + sizeof(in_payload));
+  EXPECT_EQ(in_header.opcode, FUSE_SETATTR);
+  EXPECT_EQ(in_header.uid, 0);
+  EXPECT_EQ(in_header.gid, 0);
+  EXPECT_EQ(in_payload.valid, FATTR_SIZE | FATTR_FH);
+  EXPECT_EQ(in_payload.fh, fh);
+  EXPECT_EQ(in_payload.size, 321);
+}
+
+TEST_F(SetStatTest, FchmodFile) {
+  // Set up fixture.
+  SetServerInodeLookup(test_file_, test_file_mode_);
+  auto fd = ASSERT_NO_ERRNO_AND_VALUE(OpenPath(test_file_path_, O_RDWR, fh));
+  auto close_fd = CloseFD(fd);
+
+  struct fuse_out_header out_header = {
+      .len = sizeof(struct fuse_out_header) + sizeof(struct fuse_attr_out),
+      .error = 0,
+  };
+  mode_t set_mode = S_IROTH | S_IWOTH | S_IXOTH;
+  struct fuse_attr_out out_payload = {
+      .attr = DefaultFuseAttr(set_mode, 2),
+  };
+  auto iov_out = FuseGenerateIovecs(out_header, out_payload);
+  SetServerResponse(FUSE_SETATTR, iov_out);
+
+  // Make syscall.
+  EXPECT_THAT(fchmod(fd.get(), set_mode), SyscallSucceeds());
+
+  // Check FUSE request.
+  struct fuse_in_header in_header;
+  struct fuse_setattr_in in_payload;
+  auto iov_in = FuseGenerateIovecs(in_header, in_payload);
+
+  GetServerActualRequest(iov_in);
+  EXPECT_EQ(in_header.len, sizeof(in_header) + sizeof(in_payload));
+  EXPECT_EQ(in_header.opcode, FUSE_SETATTR);
+  EXPECT_EQ(in_header.uid, 0);
+  EXPECT_EQ(in_header.gid, 0);
+  EXPECT_EQ(in_payload.valid, FATTR_MODE | FATTR_FH);
+  EXPECT_EQ(in_payload.fh, fh);
+  EXPECT_EQ(in_payload.mode, S_IFREG | set_mode);
+}
+
+TEST_F(SetStatTest, FchownFile) {
+  // Set up fixture.
+  SetServerInodeLookup(test_file_, test_file_mode_);
+  auto fd = ASSERT_NO_ERRNO_AND_VALUE(OpenPath(test_file_path_, O_RDWR, fh));
+  auto close_fd = CloseFD(fd);
+
+  struct fuse_out_header out_header = {
+      .len = sizeof(struct fuse_out_header) + sizeof(struct fuse_attr_out),
+      .error = 0,
+  };
+  struct fuse_attr_out out_payload = {
+      .attr = DefaultFuseAttr(S_IFREG | S_IRUSR | S_IWUSR | S_IXUSR, 2),
+  };
+  auto iov_out = FuseGenerateIovecs(out_header, out_payload);
+  SetServerResponse(FUSE_SETATTR, iov_out);
+
+  // Make syscall.
+  EXPECT_THAT(fchown(fd.get(), 1025, 1025), SyscallSucceeds());
+
+  // Check FUSE request.
+  struct fuse_in_header in_header;
+  struct fuse_setattr_in in_payload;
+  auto iov_in = FuseGenerateIovecs(in_header, in_payload);
+
+  GetServerActualRequest(iov_in);
+  EXPECT_EQ(in_header.len, sizeof(in_header) + sizeof(in_payload));
+  EXPECT_EQ(in_header.opcode, FUSE_SETATTR);
+  EXPECT_EQ(in_header.uid, 0);
+  EXPECT_EQ(in_header.gid, 0);
+  EXPECT_EQ(in_payload.valid, FATTR_UID | FATTR_GID | FATTR_FH);
+  EXPECT_EQ(in_payload.fh, fh);
+  EXPECT_EQ(in_payload.uid, 1025);
+  EXPECT_EQ(in_payload.gid, 1025);
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor

From b03fbc7dd4bae75bcccd36a2815f0761470f15a9 Mon Sep 17 00:00:00 2001
From: Craig Chi <craigchi@google.com>
Date: Wed, 9 Sep 2020 13:10:56 -0700
Subject: [PATCH 196/211] Add comments for exported attributes

---
 pkg/abi/linux/fuse.go | 56 +++++++++++++++++++++++++++++++++----------
 1 file changed, 43 insertions(+), 13 deletions(-)

diff --git a/pkg/abi/linux/fuse.go b/pkg/abi/linux/fuse.go
index fcc957bfe6..ba3316ad60 100644
--- a/pkg/abi/linux/fuse.go
+++ b/pkg/abi/linux/fuse.go
@@ -252,22 +252,52 @@ type FUSEGetAttrIn struct {
 //
 // +marshal
 type FUSEAttr struct {
-	Ino       uint64
-	Size      uint64
-	Blocks    uint64
-	Atime     uint64
-	Mtime     uint64
-	Ctime     uint64
+	// Ino is the inode number of this file.
+	Ino uint64
+
+	// Size is the size of this file.
+	Size uint64
+
+	// Blocks is the number of the 512B blocks allocated by this file.
+	Blocks uint64
+
+	// Atime is the time of last access.
+	Atime uint64
+
+	// Mtime is the time of last modification.
+	Mtime uint64
+
+	// Ctime is the time of last status change.
+	Ctime uint64
+
+	// AtimeNsec is the nano second part of Atime.
 	AtimeNsec uint32
+
+	// MtimeNsec is the nano second part of Mtime.
 	MtimeNsec uint32
+
+	// CtimeNsec is the nano second part of Ctime.
 	CtimeNsec uint32
-	Mode      uint32
-	Nlink     uint32
-	UID       uint32
-	GID       uint32
-	Rdev      uint32
-	BlkSize   uint32
-	_         uint32
+
+	// Mode contains the file type and mode.
+	Mode uint32
+
+	// Nlink is the number of the hard links.
+	Nlink uint32
+
+	// UID is user ID of the owner.
+	UID uint32
+
+	// GID is group ID of the owner.
+	GID uint32
+
+	// Rdev is the device ID if this is a special file.
+	Rdev uint32
+
+	// BlkSize is the block size for filesystem I/O.
+	BlkSize uint32
+
+	_ uint32
 }
 
 // FUSEGetAttrOut is the reply sent by the daemon to the kernel

From e67e30bdce9e7ed910c1cb136b4c82223b1d7f8c Mon Sep 17 00:00:00 2001
From: Jinmou Li <jinmli@google.com>
Date: Thu, 3 Sep 2020 18:41:26 +0000
Subject: [PATCH 197/211] Refactor FUSE connection for readability and
 structure

This change decouples the code that is weakly
tied to the connection struct from connection.go,
rename variables and files with more meaningful choices,
adds detailed comments, explains lock orders,
and adds other minor improvement to make
the existing FUSE code more readable and
more organized.

Purpose is to avoid too much code in one file
and provide better structure for the
future commits.
---
 pkg/sentry/fsimpl/fuse/BUILD                  |   2 +-
 pkg/sentry/fsimpl/fuse/connection.go          | 264 ++++--------------
 .../fuse/{init.go => connection_control.go}   |  98 ++++---
 pkg/sentry/fsimpl/fuse/dev_test.go            |   2 +-
 pkg/sentry/fsimpl/fuse/fusefs.go              |  25 +-
 pkg/sentry/fsimpl/fuse/request_response.go    | 150 ++++++++++
 6 files changed, 273 insertions(+), 268 deletions(-)
 rename pkg/sentry/fsimpl/fuse/{init.go => connection_control.go} (76%)

diff --git a/pkg/sentry/fsimpl/fuse/BUILD b/pkg/sentry/fsimpl/fuse/BUILD
index 86dc6a7a41..23660a7084 100644
--- a/pkg/sentry/fsimpl/fuse/BUILD
+++ b/pkg/sentry/fsimpl/fuse/BUILD
@@ -30,11 +30,11 @@ go_library(
     name = "fuse",
     srcs = [
         "connection.go",
+        "connection_control.go",
         "dev.go",
         "directory.go",
         "file.go",
         "fusefs.go",
-        "init.go",
         "inode_refs.go",
         "read_write.go",
         "register.go",
diff --git a/pkg/sentry/fsimpl/fuse/connection.go b/pkg/sentry/fsimpl/fuse/connection.go
index 8dd86afade..9d72b927f2 100644
--- a/pkg/sentry/fsimpl/fuse/connection.go
+++ b/pkg/sentry/fsimpl/fuse/connection.go
@@ -16,31 +16,17 @@ package fuse
 
 import (
 	"errors"
-	"fmt"
 	"sync"
-	"sync/atomic"
-	"syscall"
-
-	"gvisor.dev/gvisor/tools/go_marshal/marshal"
 
 	"gvisor.dev/gvisor/pkg/abi/linux"
 	"gvisor.dev/gvisor/pkg/context"
 	"gvisor.dev/gvisor/pkg/log"
 	"gvisor.dev/gvisor/pkg/sentry/kernel"
-	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
 	"gvisor.dev/gvisor/pkg/sentry/vfs"
 	"gvisor.dev/gvisor/pkg/syserror"
 	"gvisor.dev/gvisor/pkg/waiter"
 )
 
-// maxActiveRequestsDefault is the default setting controlling the upper bound
-// on the number of active requests at any given time.
-const maxActiveRequestsDefault = 10000
-
-// Ordinary requests have even IDs, while interrupts IDs are odd.
-// Used to increment the unique ID for each FUSE request.
-var reqIDStep uint64 = 2
-
 const (
 	// fuseDefaultMaxBackground is the default value for MaxBackground.
 	fuseDefaultMaxBackground = 12
@@ -53,33 +39,11 @@ const (
 	fuseDefaultMaxPagesPerReq = 32
 )
 
-// Request represents a FUSE operation request that hasn't been sent to the
-// server yet.
-//
-// +stateify savable
-type Request struct {
-	requestEntry
-
-	id   linux.FUSEOpID
-	hdr  *linux.FUSEHeaderIn
-	data []byte
-
-	// payload for this request: extra bytes to write after
-	// the data slice. Used by FUSE_WRITE.
-	payload []byte
-}
-
-// Response represents an actual response from the server, including the
-// response payload.
-//
-// +stateify savable
-type Response struct {
-	opcode linux.FUSEOpcode
-	hdr    linux.FUSEHeaderOut
-	data   []byte
-}
-
 // connection is the struct by which the sentry communicates with the FUSE server daemon.
+// Lock order:
+// - conn.fd.mu
+// - conn.mu
+// - conn.asyncMu
 type connection struct {
 	fd *DeviceFD
 
@@ -114,10 +78,6 @@ type connection struct {
 	// initializedChan is used to block requests before initialization.
 	initializedChan chan struct{}
 
-	// blocked when there are too many outstading backgrounds requests (NumBackground == MaxBackground).
-	// TODO(gvisor.dev/issue/3185): update the numBackground accordingly; use a channel to block.
-	blocked bool
-
 	// connected (connection established) when a new FUSE file system is created.
 	// Set to false when:
 	//   umount,
@@ -125,51 +85,52 @@ type connection struct {
 	//   device release.
 	connected bool
 
-	// aborted via sysfs.
-	// TODO(gvisor.dev/issue/3185): abort all queued requests.
-	aborted bool
-
-	// atomicOTrunc is true when FUSE does not send a separate SETATTR request
-	// before open with O_TRUNC flag.
-	// Negotiated and only set in INIT.
-	atomicOTrunc bool
-
 	// connInitError if FUSE_INIT encountered error (major version mismatch).
 	// Only set in INIT.
 	connInitError bool
 
 	// connInitSuccess if FUSE_INIT is successful.
 	// Only set in INIT.
-	// Used for destory.
+	// Used for destory (not yet implemented).
 	connInitSuccess bool
 
-	// TODO(gvisor.dev/issue/3185): All the queue logic are working in progress.
-
-	// NumberBackground is the number of requests in the background.
-	numBackground uint16
+	// aborted via sysfs, and will send ECONNABORTED to read after disconnection (instead of ENODEV).
+	// Set only if abortErr is true and via fuse control fs (not yet implemented).
+	// TODO(gvisor.dev/issue/3525): set this to true when user aborts.
+	aborted bool
 
-	// congestionThreshold for NumBackground.
-	// Negotiated in FUSE_INIT.
-	congestionThreshold uint16
+	// numWating is the number of requests waiting to be
+	// sent to FUSE device or being processed by FUSE daemon.
+	numWaiting uint32
 
-	// maxBackground is the maximum number of NumBackground.
-	// Block connection when it is reached.
-	// Negotiated in FUSE_INIT.
-	maxBackground uint16
+	// Terminology note:
+	//
+	// - `asyncNumMax` is the `MaxBackground` in the FUSE_INIT_IN struct.
+	//
+	// - `asyncCongestionThreshold` is the `CongestionThreshold` in the FUSE_INIT_IN struct.
+	//
+	// We call the "background" requests in unix term as async requests.
+	// The "async requests" in unix term is our async requests that expect a reply,
+	// i.e. `!requestOptions.noReply`
 
-	// numActiveBackground is the number of requests in background and has being marked as active.
-	numActiveBackground uint16
+	// asyncMu protects the async request fields.
+	asyncMu sync.Mutex
 
-	// numWating is the number of requests waiting for completion.
-	numWaiting uint32
+	// asyncNum is the number of async requests.
+	// Protected by asyncMu.
+	asyncNum uint16
 
-	// TODO(gvisor.dev/issue/3185): BgQueue
-	// some queue for background queued requests.
+	// asyncCongestionThreshold the number of async requests.
+	// Negotiated in FUSE_INIT as "CongestionThreshold".
+	// TODO(gvisor.dev/issue/3529): add congestion control.
+	// Protected by asyncMu.
+	asyncCongestionThreshold uint16
 
-	// bgLock protects:
-	// MaxBackground, CongestionThreshold, NumBackground,
-	// NumActiveBackground, BgQueue, Blocked.
-	bgLock sync.Mutex
+	// asyncNumMax is the maximum number of asyncNum.
+	// Connection blocks the async requests when it is reached.
+	// Negotiated in FUSE_INIT as "MaxBackground".
+	// Protected by asyncMu.
+	asyncNumMax uint16
 
 	// maxRead is the maximum size of a read buffer in in bytes.
 	// Initialized from a fuse fs parameter.
@@ -187,6 +148,11 @@ type connection struct {
 	// Negotiated and only set in INIT.
 	minor uint32
 
+	// atomicOTrunc is true when FUSE does not send a separate SETATTR request
+	// before open with O_TRUNC flag.
+	// Negotiated and only set in INIT.
+	atomicOTrunc bool
+
 	// asyncRead if read pages asynchronously.
 	// Negotiated and only set in INIT.
 	asyncRead bool
@@ -224,63 +190,13 @@ func newFUSEConnection(_ context.Context, fd *vfs.FileDescription, opts *filesys
 	fuseFD.writeCursor = 0
 
 	return &connection{
-		fd:                  fuseFD,
-		maxBackground:       fuseDefaultMaxBackground,
-		congestionThreshold: fuseDefaultCongestionThreshold,
-		maxRead:             opts.maxRead,
-		maxPages:            fuseDefaultMaxPagesPerReq,
-		initializedChan:     make(chan struct{}),
-		connected:           true,
-	}, nil
-}
-
-// SetInitialized atomically sets the connection as initialized.
-func (conn *connection) SetInitialized() {
-	// Unblock the requests sent before INIT.
-	close(conn.initializedChan)
-
-	// Close the channel first to avoid the non-atomic situation
-	// where conn.initialized is true but there are
-	// tasks being blocked on the channel.
-	// And it prevents the newer tasks from gaining
-	// unnecessary higher chance to be issued before the blocked one.
-
-	atomic.StoreInt32(&(conn.initialized), int32(1))
-}
-
-// IsInitialized atomically check if the connection is initialized.
-// pairs with SetInitialized().
-func (conn *connection) Initialized() bool {
-	return atomic.LoadInt32(&(conn.initialized)) != 0
-}
-
-// NewRequest creates a new request that can be sent to the FUSE server.
-func (conn *connection) NewRequest(creds *auth.Credentials, pid uint32, ino uint64, opcode linux.FUSEOpcode, payload marshal.Marshallable) (*Request, error) {
-	conn.fd.mu.Lock()
-	defer conn.fd.mu.Unlock()
-	conn.fd.nextOpID += linux.FUSEOpID(reqIDStep)
-
-	hdrLen := (*linux.FUSEHeaderIn)(nil).SizeBytes()
-	hdr := linux.FUSEHeaderIn{
-		Len:    uint32(hdrLen + payload.SizeBytes()),
-		Opcode: opcode,
-		Unique: conn.fd.nextOpID,
-		NodeID: ino,
-		UID:    uint32(creds.EffectiveKUID),
-		GID:    uint32(creds.EffectiveKGID),
-		PID:    pid,
-	}
-
-	buf := make([]byte, hdr.Len)
-
-	// TODO(gVisor.dev/3698): Use the unsafe version once go_marshal is safe to use again.
-	hdr.MarshalBytes(buf[:hdrLen])
-	payload.MarshalBytes(buf[hdrLen:])
-
-	return &Request{
-		id:   hdr.Unique,
-		hdr:  &hdr,
-		data: buf,
+		fd:                       fuseFD,
+		asyncNumMax:              fuseDefaultMaxBackground,
+		asyncCongestionThreshold: fuseDefaultCongestionThreshold,
+		maxRead:                  opts.maxRead,
+		maxPages:                 fuseDefaultMaxPagesPerReq,
+		initializedChan:          make(chan struct{}),
+		connected:                true,
 	}, nil
 }
 
@@ -336,43 +252,6 @@ func (conn *connection) call(t *kernel.Task, r *Request) (*Response, error) {
 	return fut.resolve(t)
 }
 
-// Error returns the error of the FUSE call.
-func (r *Response) Error() error {
-	errno := r.hdr.Error
-	if errno >= 0 {
-		return nil
-	}
-
-	sysErrNo := syscall.Errno(-errno)
-	return error(sysErrNo)
-}
-
-// DataLen returns the size of the response without the header.
-func (r *Response) DataLen() uint32 {
-	return r.hdr.Len - uint32(r.hdr.SizeBytes())
-}
-
-// UnmarshalPayload unmarshals the response data into m.
-func (r *Response) UnmarshalPayload(m marshal.Marshallable) error {
-	hdrLen := r.hdr.SizeBytes()
-	haveDataLen := r.hdr.Len - uint32(hdrLen)
-	wantDataLen := uint32(m.SizeBytes())
-
-	if haveDataLen < wantDataLen {
-		return fmt.Errorf("payload too small. Minimum data lenth required: %d,  but got data length %d", wantDataLen, haveDataLen)
-	}
-
-	// The response data is empty unless there is some payload. And so, doesn't
-	// need to be unmarshalled.
-	if r.data == nil {
-		return nil
-	}
-
-	// TODO(gVisor.dev/3698): Use the unsafe version once go_marshal is safe to use again.
-	m.UnmarshalBytes(r.data[hdrLen:])
-	return nil
-}
-
 // callFuture makes a request to the server and returns a future response.
 // Call resolve() when the response needs to be fulfilled.
 func (conn *connection) callFuture(t *kernel.Task, r *Request) (*futureResponse, error) {
@@ -421,50 +300,3 @@ func (conn *connection) callFutureLocked(t *kernel.Task, r *Request) (*futureRes
 
 	return fut, nil
 }
-
-// futureResponse represents an in-flight request, that may or may not have
-// completed yet. Convert it to a resolved Response by calling Resolve, but note
-// that this may block.
-//
-// +stateify savable
-type futureResponse struct {
-	opcode linux.FUSEOpcode
-	ch     chan struct{}
-	hdr    *linux.FUSEHeaderOut
-	data   []byte
-}
-
-// newFutureResponse creates a future response to a FUSE request.
-func newFutureResponse(opcode linux.FUSEOpcode) *futureResponse {
-	return &futureResponse{
-		opcode: opcode,
-		ch:     make(chan struct{}),
-	}
-}
-
-// resolve blocks the task until the server responds to its corresponding request,
-// then returns a resolved response.
-func (f *futureResponse) resolve(t *kernel.Task) (*Response, error) {
-	// If there is no Task associated with this request  - then we don't try to resolve
-	// the response.  Instead, the task writing the response (proxy to the server) will
-	// process the response on our behalf.
-	if t == nil {
-		log.Infof("fuse.Response.resolve: Not waiting on a response from server.")
-		return nil, nil
-	}
-
-	if err := t.Block(f.ch); err != nil {
-		return nil, err
-	}
-
-	return f.getResponse(), nil
-}
-
-// getResponse creates a Response from the data the futureResponse has.
-func (f *futureResponse) getResponse() *Response {
-	return &Response{
-		opcode: f.opcode,
-		hdr:    *f.hdr,
-		data:   f.data,
-	}
-}
diff --git a/pkg/sentry/fsimpl/fuse/init.go b/pkg/sentry/fsimpl/fuse/connection_control.go
similarity index 76%
rename from pkg/sentry/fsimpl/fuse/init.go
rename to pkg/sentry/fsimpl/fuse/connection_control.go
index a47309b6e7..d84d9caf24 100644
--- a/pkg/sentry/fsimpl/fuse/init.go
+++ b/pkg/sentry/fsimpl/fuse/connection_control.go
@@ -15,6 +15,8 @@
 package fuse
 
 import (
+	"sync/atomic"
+
 	"gvisor.dev/gvisor/pkg/abi/linux"
 	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
 )
@@ -50,6 +52,26 @@ var (
 	MaxUserCongestionThreshold uint16 = fuseDefaultCongestionThreshold
 )
 
+// SetInitialized atomically sets the connection as initialized.
+func (conn *connection) SetInitialized() {
+	// Unblock the requests sent before INIT.
+	close(conn.initializedChan)
+
+	// Close the channel first to avoid the non-atomic situation
+	// where conn.initialized is true but there are
+	// tasks being blocked on the channel.
+	// And it prevents the newer tasks from gaining
+	// unnecessary higher chance to be issued before the blocked one.
+
+	atomic.StoreInt32(&(conn.initialized), int32(1))
+}
+
+// IsInitialized atomically check if the connection is initialized.
+// pairs with SetInitialized().
+func (conn *connection) Initialized() bool {
+	return atomic.LoadInt32(&(conn.initialized)) != 0
+}
+
 // InitSend sends a FUSE_INIT request.
 func (conn *connection) InitSend(creds *auth.Credentials, pid uint32) error {
 	in := linux.FUSEInitIn{
@@ -85,15 +107,15 @@ func (conn *connection) InitRecv(res *Response, hasSysAdminCap bool) error {
 }
 
 // Process the FUSE_INIT reply from the FUSE server.
+// It tries to acquire the conn.asyncMu lock if minor version is newer than 13.
 func (conn *connection) initProcessReply(out *linux.FUSEInitOut, hasSysAdminCap bool) error {
+	// No matter error or not, always set initialzied.
+	// to unblock the blocked requests.
+	defer conn.SetInitialized()
+
 	// No support for old major fuse versions.
 	if out.Major != linux.FUSE_KERNEL_VERSION {
 		conn.connInitError = true
-
-		// Set the connection as initialized and unblock the blocked requests
-		// (i.e. return error for them).
-		conn.SetInitialized()
-
 		return nil
 	}
 
@@ -101,29 +123,14 @@ func (conn *connection) initProcessReply(out *linux.FUSEInitOut, hasSysAdminCap
 	conn.connInitSuccess = true
 	conn.minor = out.Minor
 
-	// No support for limits before minor version 13.
-	if out.Minor >= 13 {
-		conn.bgLock.Lock()
-
-		if out.MaxBackground > 0 {
-			conn.maxBackground = out.MaxBackground
-
-			if !hasSysAdminCap &&
-				conn.maxBackground > MaxUserBackgroundRequest {
-				conn.maxBackground = MaxUserBackgroundRequest
-			}
-		}
-
-		if out.CongestionThreshold > 0 {
-			conn.congestionThreshold = out.CongestionThreshold
-
-			if !hasSysAdminCap &&
-				conn.congestionThreshold > MaxUserCongestionThreshold {
-				conn.congestionThreshold = MaxUserCongestionThreshold
-			}
-		}
-
-		conn.bgLock.Unlock()
+	// No support for negotiating MaxWrite before minor version 5.
+	if out.Minor >= 5 {
+		conn.maxWrite = out.MaxWrite
+	} else {
+		conn.maxWrite = fuseMinMaxWrite
+	}
+	if conn.maxWrite < fuseMinMaxWrite {
+		conn.maxWrite = fuseMinMaxWrite
 	}
 
 	// No support for the following flags before minor version 6.
@@ -147,19 +154,30 @@ func (conn *connection) initProcessReply(out *linux.FUSEInitOut, hasSysAdminCap
 		}
 	}
 
-	// No support for negotiating MaxWrite before minor version 5.
-	if out.Minor >= 5 {
-		conn.maxWrite = out.MaxWrite
-	} else {
-		conn.maxWrite = fuseMinMaxWrite
-	}
-	if conn.maxWrite < fuseMinMaxWrite {
-		conn.maxWrite = fuseMinMaxWrite
-	}
+	// No support for limits before minor version 13.
+	if out.Minor >= 13 {
+		conn.asyncMu.Lock()
+
+		if out.MaxBackground > 0 {
+			conn.asyncNumMax = out.MaxBackground
+
+			if !hasSysAdminCap &&
+				conn.asyncNumMax > MaxUserBackgroundRequest {
+				conn.asyncNumMax = MaxUserBackgroundRequest
+			}
+		}
 
-	// Set connection as initialized and unblock the requests
-	// issued before init.
-	conn.SetInitialized()
+		if out.CongestionThreshold > 0 {
+			conn.asyncCongestionThreshold = out.CongestionThreshold
+
+			if !hasSysAdminCap &&
+				conn.asyncCongestionThreshold > MaxUserCongestionThreshold {
+				conn.asyncCongestionThreshold = MaxUserCongestionThreshold
+			}
+		}
+
+		conn.asyncMu.Unlock()
+	}
 
 	return nil
 }
diff --git a/pkg/sentry/fsimpl/fuse/dev_test.go b/pkg/sentry/fsimpl/fuse/dev_test.go
index aedc2fa390..9366834546 100644
--- a/pkg/sentry/fsimpl/fuse/dev_test.go
+++ b/pkg/sentry/fsimpl/fuse/dev_test.go
@@ -369,7 +369,7 @@ func newTestConnection(system *testutil.System, k *kernel.Kernel, maxActiveReque
 	fsopts := filesystemOptions{
 		maxActiveRequests: maxActiveRequests,
 	}
-	fs, err := NewFUSEFilesystem(system.Ctx, 0, &fsopts, &fuseDev.vfsfd)
+	fs, err := newFUSEFilesystem(system.Ctx, 0, &fsopts, &fuseDev.vfsfd)
 	if err != nil {
 		return nil, nil, err
 	}
diff --git a/pkg/sentry/fsimpl/fuse/fusefs.go b/pkg/sentry/fsimpl/fuse/fusefs.go
index 5722453030..30725182a2 100644
--- a/pkg/sentry/fsimpl/fuse/fusefs.go
+++ b/pkg/sentry/fsimpl/fuse/fusefs.go
@@ -35,6 +35,10 @@ import (
 // Name is the default filesystem name.
 const Name = "fuse"
 
+// maxActiveRequestsDefault is the default setting controlling the upper bound
+// on the number of active requests at any given time.
+const maxActiveRequestsDefault = 10000
+
 // FilesystemType implements vfs.FilesystemType.
 type FilesystemType struct{}
 
@@ -168,12 +172,12 @@ func (fsType FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt
 
 	// Check for unparsed options.
 	if len(mopts) != 0 {
-		log.Warningf("%s.GetFilesystem: unknown options: %v", fsType.Name(), mopts)
+		log.Warningf("%s.GetFilesystem: unsupported or unknown options: %v", fsType.Name(), mopts)
 		return nil, nil, syserror.EINVAL
 	}
 
 	// Create a new FUSE filesystem.
-	fs, err := NewFUSEFilesystem(ctx, devMinor, &fsopts, fuseFd)
+	fs, err := newFUSEFilesystem(ctx, devMinor, &fsopts, fuseFd)
 	if err != nil {
 		log.Warningf("%s.NewFUSEFilesystem: failed with error: %v", fsType.Name(), err)
 		return nil, nil, err
@@ -194,21 +198,22 @@ func (fsType FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt
 	return fs.VFSFilesystem(), root.VFSDentry(), nil
 }
 
-// NewFUSEFilesystem creates a new FUSE filesystem.
-func NewFUSEFilesystem(ctx context.Context, devMinor uint32, opts *filesystemOptions, device *vfs.FileDescription) (*filesystem, error) {
-	fs := &filesystem{
-		devMinor: devMinor,
-		opts:     opts,
-	}
-
+// newFUSEFilesystem creates a new FUSE filesystem.
+func newFUSEFilesystem(ctx context.Context, devMinor uint32, opts *filesystemOptions, device *vfs.FileDescription) (*filesystem, error) {
 	conn, err := newFUSEConnection(ctx, device, opts)
 	if err != nil {
 		log.Warningf("fuse.NewFUSEFilesystem: NewFUSEConnection failed with error: %v", err)
 		return nil, syserror.EINVAL
 	}
 
-	fs.conn = conn
 	fuseFD := device.Impl().(*DeviceFD)
+
+	fs := &filesystem{
+		devMinor: devMinor,
+		opts:     opts,
+		conn:     conn,
+	}
+
 	fuseFD.fs = fs
 
 	return fs, nil
diff --git a/pkg/sentry/fsimpl/fuse/request_response.go b/pkg/sentry/fsimpl/fuse/request_response.go
index a69b212218..648eaf2639 100644
--- a/pkg/sentry/fsimpl/fuse/request_response.go
+++ b/pkg/sentry/fsimpl/fuse/request_response.go
@@ -15,7 +15,13 @@
 package fuse
 
 import (
+	"fmt"
+	"syscall"
+
 	"gvisor.dev/gvisor/pkg/abi/linux"
+	"gvisor.dev/gvisor/pkg/log"
+	"gvisor.dev/gvisor/pkg/sentry/kernel"
+	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
 	"gvisor.dev/gvisor/pkg/usermem"
 	"gvisor.dev/gvisor/tools/go_marshal/marshal"
 )
@@ -71,3 +77,147 @@ func (r *fuseInitRes) UnmarshalBytes(src []byte) {
 func (r *fuseInitRes) SizeBytes() int {
 	return int(r.initLen)
 }
+
+// Ordinary requests have even IDs, while interrupts IDs are odd.
+// Used to increment the unique ID for each FUSE request.
+var reqIDStep uint64 = 2
+
+// Request represents a FUSE operation request that hasn't been sent to the
+// server yet.
+//
+// +stateify savable
+type Request struct {
+	requestEntry
+
+	id   linux.FUSEOpID
+	hdr  *linux.FUSEHeaderIn
+	data []byte
+
+	// payload for this request: extra bytes to write after
+	// the data slice. Used by FUSE_WRITE.
+	payload []byte
+}
+
+// NewRequest creates a new request that can be sent to the FUSE server.
+func (conn *connection) NewRequest(creds *auth.Credentials, pid uint32, ino uint64, opcode linux.FUSEOpcode, payload marshal.Marshallable) (*Request, error) {
+	conn.fd.mu.Lock()
+	defer conn.fd.mu.Unlock()
+	conn.fd.nextOpID += linux.FUSEOpID(reqIDStep)
+
+	hdrLen := (*linux.FUSEHeaderIn)(nil).SizeBytes()
+	hdr := linux.FUSEHeaderIn{
+		Len:    uint32(hdrLen + payload.SizeBytes()),
+		Opcode: opcode,
+		Unique: conn.fd.nextOpID,
+		NodeID: ino,
+		UID:    uint32(creds.EffectiveKUID),
+		GID:    uint32(creds.EffectiveKGID),
+		PID:    pid,
+	}
+
+	buf := make([]byte, hdr.Len)
+
+	// TODO(gVisor.dev/3698): Use the unsafe version once go_marshal is safe to use again.
+	hdr.MarshalBytes(buf[:hdrLen])
+	payload.MarshalBytes(buf[hdrLen:])
+
+	return &Request{
+		id:   hdr.Unique,
+		hdr:  &hdr,
+		data: buf,
+	}, nil
+}
+
+// futureResponse represents an in-flight request, that may or may not have
+// completed yet. Convert it to a resolved Response by calling Resolve, but note
+// that this may block.
+//
+// +stateify savable
+type futureResponse struct {
+	opcode linux.FUSEOpcode
+	ch     chan struct{}
+	hdr    *linux.FUSEHeaderOut
+	data   []byte
+}
+
+// newFutureResponse creates a future response to a FUSE request.
+func newFutureResponse(opcode linux.FUSEOpcode) *futureResponse {
+	return &futureResponse{
+		opcode: opcode,
+		ch:     make(chan struct{}),
+	}
+}
+
+// resolve blocks the task until the server responds to its corresponding request,
+// then returns a resolved response.
+func (f *futureResponse) resolve(t *kernel.Task) (*Response, error) {
+	// If there is no Task associated with this request  - then we don't try to resolve
+	// the response.  Instead, the task writing the response (proxy to the server) will
+	// process the response on our behalf.
+	if t == nil {
+		log.Infof("fuse.Response.resolve: Not waiting on a response from server.")
+		return nil, nil
+	}
+
+	if err := t.Block(f.ch); err != nil {
+		return nil, err
+	}
+
+	return f.getResponse(), nil
+}
+
+// getResponse creates a Response from the data the futureResponse has.
+func (f *futureResponse) getResponse() *Response {
+	return &Response{
+		opcode: f.opcode,
+		hdr:    *f.hdr,
+		data:   f.data,
+	}
+}
+
+// Response represents an actual response from the server, including the
+// response payload.
+//
+// +stateify savable
+type Response struct {
+	opcode linux.FUSEOpcode
+	hdr    linux.FUSEHeaderOut
+	data   []byte
+}
+
+// Error returns the error of the FUSE call.
+func (r *Response) Error() error {
+	errno := r.hdr.Error
+	if errno >= 0 {
+		return nil
+	}
+
+	sysErrNo := syscall.Errno(-errno)
+	return error(sysErrNo)
+}
+
+// DataLen returns the size of the response without the header.
+func (r *Response) DataLen() uint32 {
+	return r.hdr.Len - uint32(r.hdr.SizeBytes())
+}
+
+// UnmarshalPayload unmarshals the response data into m.
+func (r *Response) UnmarshalPayload(m marshal.Marshallable) error {
+	hdrLen := r.hdr.SizeBytes()
+	haveDataLen := r.hdr.Len - uint32(hdrLen)
+	wantDataLen := uint32(m.SizeBytes())
+
+	if haveDataLen < wantDataLen {
+		return fmt.Errorf("payload too small. Minimum data lenth required: %d,  but got data length %d", wantDataLen, haveDataLen)
+	}
+
+	// The response data is empty unless there is some payload. And so, doesn't
+	// need to be unmarshalled.
+	if r.data == nil {
+		return nil
+	}
+
+	// TODO(gVisor.dev/3698): Use the unsafe version once go_marshal is safe to use again.
+	m.UnmarshalBytes(r.data[hdrLen:])
+	return nil
+}

From 84acc461cda70348a70992aac984c0e6e2b1621f Mon Sep 17 00:00:00 2001
From: Jinmou Li <jinmli@google.com>
Date: Thu, 3 Sep 2020 19:16:17 +0000
Subject: [PATCH 198/211] Improve FUSE async/noreply call logic

This change adds bookkeeping variables for the
FUSE request. With them, old insecure confusing
code we used to process async requests is replaced
by new clear compiling ones. Future code can take
advantage of them to have better control of each
requests.
---
 pkg/sentry/fsimpl/fuse/connection.go       | 62 ++++++++++------------
 pkg/sentry/fsimpl/fuse/dev.go              | 31 +++++------
 pkg/sentry/fsimpl/fuse/request_response.go | 22 +++++---
 3 files changed, 53 insertions(+), 62 deletions(-)

diff --git a/pkg/sentry/fsimpl/fuse/connection.go b/pkg/sentry/fsimpl/fuse/connection.go
index 9d72b927f2..122b7d92f6 100644
--- a/pkg/sentry/fsimpl/fuse/connection.go
+++ b/pkg/sentry/fsimpl/fuse/connection.go
@@ -15,7 +15,6 @@
 package fuse
 
 import (
-	"errors"
 	"sync"
 
 	"gvisor.dev/gvisor/pkg/abi/linux"
@@ -200,42 +199,40 @@ func newFUSEConnection(_ context.Context, fd *vfs.FileDescription, opts *filesys
 	}, nil
 }
 
-// Call makes a request to the server and blocks the invoking task until a
-// server responds with a response. Task should never be nil.
-// Requests will not be sent before the connection is initialized.
-// For async tasks, use CallAsync().
-func (conn *connection) Call(t *kernel.Task, r *Request) (*Response, error) {
-	// Block requests sent before connection is initalized.
-	if !conn.Initialized() {
-		if err := t.Block(conn.initializedChan); err != nil {
-			return nil, err
-		}
-	}
-
-	return conn.call(t, r)
+// CallAsync makes an async (aka background) request.
+// It's a simple wrapper around Call().
+func (conn *connection) CallAsync(t *kernel.Task, r *Request) error {
+	r.async = true
+	_, err := conn.Call(t, r)
+	return err
 }
 
-// CallAsync makes an async (aka background) request.
-// Those requests either do not expect a response (e.g. release) or
-// the response should be handled by others (e.g. init).
-// Return immediately unless the connection is blocked (before initialization).
-// Async call example: init, release, forget, aio, interrupt.
+// Call makes a request to the server.
+// Block before the connection is initialized.
 // When the Request is FUSE_INIT, it will not be blocked before initialization.
-func (conn *connection) CallAsync(t *kernel.Task, r *Request) error {
+// Task should never be nil.
+//
+// For a sync request, it blocks the invoking task until
+// a server responds with a response.
+//
+// For an async request (that do not expect a response immediately),
+// it returns directly unless being blocked either before initialization
+// or when there are too many async requests ongoing.
+//
+// Example for async request:
+// init, readahead, write, async read/write, fuse_notify_reply,
+// non-sync release, interrupt, forget.
+//
+// The forget request does not have a reply,
+// as documented in include/uapi/linux/fuse.h:FUSE_FORGET.
+func (conn *connection) Call(t *kernel.Task, r *Request) (*Response, error) {
 	// Block requests sent before connection is initalized.
 	if !conn.Initialized() && r.hdr.Opcode != linux.FUSE_INIT {
 		if err := t.Block(conn.initializedChan); err != nil {
-			return err
+			return nil, err
 		}
 	}
 
-	// This should be the only place that invokes call() with a nil task.
-	_, err := conn.call(nil, r)
-	return err
-}
-
-// call makes a call without blocking checks.
-func (conn *connection) call(t *kernel.Task, r *Request) (*Response, error) {
 	if !conn.connected {
 		return nil, syserror.ENOTCONN
 	}
@@ -270,11 +267,6 @@ func (conn *connection) callFuture(t *kernel.Task, r *Request) (*futureResponse,
 	// if there are always too many ongoing requests all the time. The
 	// supported maxActiveRequests setting should be really high to avoid this.
 	for conn.fd.numActiveRequests == conn.fd.fs.opts.maxActiveRequests {
-		if t == nil {
-			// Since there is no task that is waiting. We must error out.
-			return nil, errors.New("FUSE request queue full")
-		}
-
 		log.Infof("Blocking request %v from being queued. Too many active requests: %v",
 			r.id, conn.fd.numActiveRequests)
 		conn.fd.mu.Unlock()
@@ -291,8 +283,8 @@ func (conn *connection) callFuture(t *kernel.Task, r *Request) (*futureResponse,
 // callFutureLocked makes a request to the server and returns a future response.
 func (conn *connection) callFutureLocked(t *kernel.Task, r *Request) (*futureResponse, error) {
 	conn.fd.queue.PushBack(r)
-	conn.fd.numActiveRequests += 1
-	fut := newFutureResponse(r.hdr.Opcode)
+	conn.fd.numActiveRequests++
+	fut := newFutureResponse(r)
 	conn.fd.completions[r.id] = fut
 
 	// Signal the readers that there is something to read.
diff --git a/pkg/sentry/fsimpl/fuse/dev.go b/pkg/sentry/fsimpl/fuse/dev.go
index 6022593d6d..e7296c189e 100644
--- a/pkg/sentry/fsimpl/fuse/dev.go
+++ b/pkg/sentry/fsimpl/fuse/dev.go
@@ -367,22 +367,20 @@ func (fd *DeviceFD) Seek(ctx context.Context, offset int64, whence int32) (int64
 
 // sendResponse sends a response to the waiting task (if any).
 func (fd *DeviceFD) sendResponse(ctx context.Context, fut *futureResponse) error {
-	// See if the running task need to perform some action before returning.
-	// Since we just finished writing the future, we can be sure that
-	// getResponse generates a populated response.
-	if err := fd.noReceiverAction(ctx, fut.getResponse()); err != nil {
-		return err
-	}
+	// Signal the task waiting on a response if any.
+	defer close(fut.ch)
 
 	// Signal that the queue is no longer full.
 	select {
 	case fd.fullQueueCh <- struct{}{}:
 	default:
 	}
-	fd.numActiveRequests -= 1
+	fd.numActiveRequests--
+
+	if fut.async {
+		return fd.asyncCallBack(ctx, fut.getResponse())
+	}
 
-	// Signal the task waiting on a response.
-	close(fut.ch)
 	return nil
 }
 
@@ -404,23 +402,18 @@ func (fd *DeviceFD) sendError(ctx context.Context, errno int32, req *Request) er
 	delete(fd.completions, respHdr.Unique)
 
 	fut.hdr = &respHdr
-	if err := fd.sendResponse(ctx, fut); err != nil {
-		return err
-	}
-
-	return nil
+	return fd.sendResponse(ctx, fut)
 }
 
-// noReceiverAction has the calling kernel.Task do some action if its known that no
-// receiver is going to be waiting on the future channel. This is to be used by:
-// FUSE_INIT.
-func (fd *DeviceFD) noReceiverAction(ctx context.Context, r *Response) error {
+// asyncCallBack executes pre-defined callback function for async requests.
+// Currently used by: FUSE_INIT.
+func (fd *DeviceFD) asyncCallBack(ctx context.Context, r *Response) error {
 	switch r.opcode {
 	case linux.FUSE_INIT:
 		creds := auth.CredentialsFromContext(ctx)
 		rootUserNs := kernel.KernelFromContext(ctx).RootUserNamespace()
 		return fd.fs.conn.InitRecv(r, creds.HasCapabilityIn(linux.CAP_SYS_ADMIN, rootUserNs))
-		// TODO(gvisor.dev/issue/3247): support async read: correctly process the response using information from r.options.
+		// TODO(gvisor.dev/issue/3247): support async read: correctly process the response.
 	}
 
 	return nil
diff --git a/pkg/sentry/fsimpl/fuse/request_response.go b/pkg/sentry/fsimpl/fuse/request_response.go
index 648eaf2639..fd9a961976 100644
--- a/pkg/sentry/fsimpl/fuse/request_response.go
+++ b/pkg/sentry/fsimpl/fuse/request_response.go
@@ -19,7 +19,6 @@ import (
 	"syscall"
 
 	"gvisor.dev/gvisor/pkg/abi/linux"
-	"gvisor.dev/gvisor/pkg/log"
 	"gvisor.dev/gvisor/pkg/sentry/kernel"
 	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
 	"gvisor.dev/gvisor/pkg/usermem"
@@ -96,6 +95,12 @@ type Request struct {
 	// payload for this request: extra bytes to write after
 	// the data slice. Used by FUSE_WRITE.
 	payload []byte
+
+	// If this request is async.
+	async bool
+	// If we don't care its response.
+	// Manually set by the caller.
+	noReply bool
 }
 
 // NewRequest creates a new request that can be sent to the FUSE server.
@@ -138,24 +143,25 @@ type futureResponse struct {
 	ch     chan struct{}
 	hdr    *linux.FUSEHeaderOut
 	data   []byte
+
+	// If this request is async.
+	async bool
 }
 
 // newFutureResponse creates a future response to a FUSE request.
-func newFutureResponse(opcode linux.FUSEOpcode) *futureResponse {
+func newFutureResponse(req *Request) *futureResponse {
 	return &futureResponse{
-		opcode: opcode,
+		opcode: req.hdr.Opcode,
 		ch:     make(chan struct{}),
+		async:  req.async,
 	}
 }
 
 // resolve blocks the task until the server responds to its corresponding request,
 // then returns a resolved response.
 func (f *futureResponse) resolve(t *kernel.Task) (*Response, error) {
-	// If there is no Task associated with this request  - then we don't try to resolve
-	// the response.  Instead, the task writing the response (proxy to the server) will
-	// process the response on our behalf.
-	if t == nil {
-		log.Infof("fuse.Response.resolve: Not waiting on a response from server.")
+	// Return directly for async requests.
+	if f.async {
 		return nil, nil
 	}
 

From 5a28bc6121c2cb076e24036386241e32a5745b40 Mon Sep 17 00:00:00 2001
From: Jinmou Li <jinmli@google.com>
Date: Thu, 3 Sep 2020 19:22:24 +0000
Subject: [PATCH 199/211] Fix FUSE_RELEASE protocol reply processing

This commit fixes the potential unexpected errors
of original handling of FUSE_RELEASE responses while
keep the same behavior (ignoring any reply).
---
 pkg/sentry/fsimpl/fuse/dev.go  | 9 ++++++++-
 pkg/sentry/fsimpl/fuse/file.go | 7 ++++++-
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/pkg/sentry/fsimpl/fuse/dev.go b/pkg/sentry/fsimpl/fuse/dev.go
index e7296c189e..6f0b896cbc 100644
--- a/pkg/sentry/fsimpl/fuse/dev.go
+++ b/pkg/sentry/fsimpl/fuse/dev.go
@@ -204,8 +204,11 @@ func (fd *DeviceFD) readLocked(ctx context.Context, dst usermem.IOSequence, opts
 
 	// Fully done with this req, remove it from the queue.
 	fd.queue.Remove(req)
-	if req.hdr.Opcode == linux.FUSE_RELEASE {
+
+	// Remove noReply ones from map of requests expecting a reply.
+	if req.noReply {
 		fd.numActiveRequests -= 1
+		delete(fd.completions, req.hdr.Unique)
 	}
 
 	return int64(n), nil
@@ -296,6 +299,10 @@ func (fd *DeviceFD) writeLocked(ctx context.Context, src usermem.IOSequence, opt
 
 			fut, ok := fd.completions[hdr.Unique]
 			if !ok {
+				if fut.hdr.Unique == linux.FUSE_RELEASE {
+					// Currently we simply discard the reply for FUSE_RELEASE.
+					return n + src.NumBytes(), nil
+				}
 				// Server sent us a response for a request we never sent?
 				return 0, syserror.EINVAL
 			}
diff --git a/pkg/sentry/fsimpl/fuse/file.go b/pkg/sentry/fsimpl/fuse/file.go
index 15c0e3f419..b98145ba27 100644
--- a/pkg/sentry/fsimpl/fuse/file.go
+++ b/pkg/sentry/fsimpl/fuse/file.go
@@ -84,7 +84,12 @@ func (fd *fileDescription) Release(ctx context.Context) {
 	}
 	kernelTask := kernel.TaskFromContext(ctx)
 	// ignoring errors and FUSE server reply is analogous to Linux's behavior.
-	req, _ := conn.NewRequest(auth.CredentialsFromContext(ctx), uint32(kernelTask.ThreadID()), fd.inode().NodeID, opcode, &in)
+	req, err := conn.NewRequest(auth.CredentialsFromContext(ctx), uint32(kernelTask.ThreadID()), fd.inode().NodeID, opcode, &in)
+	if err != nil {
+		// No way to invoke Call() with an errored request.
+		return
+	}
+	req.noReply = true
 	conn.CallAsync(kernelTask, req)
 }
 

From 33d324792fecf11f8088d14bffb704805a3df38d Mon Sep 17 00:00:00 2001
From: jinmouil <67118279+jinmouil@users.noreply.github.com>
Date: Wed, 9 Sep 2020 16:22:33 -0700
Subject: [PATCH 200/211] Add FUSE umount support

This change implements Release for the FUSE filesystem
and expected behaviors of the FUSE devices.
It includes several checks for aborted connection
in the path for making a request and a function
to abort all the ongoing FUSE requests in order.
---
 pkg/sentry/fsimpl/fuse/BUILD                 |   6 +-
 pkg/sentry/fsimpl/fuse/connection.go         |  11 +-
 pkg/sentry/fsimpl/fuse/connection_control.go |  62 +++++++++
 pkg/sentry/fsimpl/fuse/connection_test.go    | 127 +++++++++++++++++++
 pkg/sentry/fsimpl/fuse/dev.go                |  51 +++++---
 pkg/sentry/fsimpl/fuse/dev_test.go           |   4 -
 pkg/sentry/fsimpl/fuse/fusefs.go             |  12 ++
 pkg/sentry/fsimpl/fuse/utils_test.go         | 111 ++++++++++++++++
 pkg/syserror/syserror.go                     |   1 +
 9 files changed, 363 insertions(+), 22 deletions(-)
 create mode 100644 pkg/sentry/fsimpl/fuse/connection_test.go
 create mode 100644 pkg/sentry/fsimpl/fuse/utils_test.go

diff --git a/pkg/sentry/fsimpl/fuse/BUILD b/pkg/sentry/fsimpl/fuse/BUILD
index 23660a7084..77025772c7 100644
--- a/pkg/sentry/fsimpl/fuse/BUILD
+++ b/pkg/sentry/fsimpl/fuse/BUILD
@@ -66,7 +66,11 @@ go_library(
 go_test(
     name = "fuse_test",
     size = "small",
-    srcs = ["dev_test.go"],
+    srcs = [
+        "connection_test.go",
+        "dev_test.go",
+        "utils_test.go",
+    ],
     library = ":fuse",
     deps = [
         "//pkg/abi/linux",
diff --git a/pkg/sentry/fsimpl/fuse/connection.go b/pkg/sentry/fsimpl/fuse/connection.go
index 122b7d92f6..6009cdf974 100644
--- a/pkg/sentry/fsimpl/fuse/connection.go
+++ b/pkg/sentry/fsimpl/fuse/connection.go
@@ -179,7 +179,6 @@ func newFUSEConnection(_ context.Context, fd *vfs.FileDescription, opts *filesys
 	// Mark the device as ready so it can be used. /dev/fuse can only be used if the FD was used to
 	// mount a FUSE filesystem.
 	fuseFD := fd.Impl().(*DeviceFD)
-	fuseFD.mounted = true
 
 	// Create the writeBuf for the header to be stored in.
 	hdrLen := uint32((*linux.FUSEHeaderOut)(nil).SizeBytes())
@@ -282,6 +281,16 @@ func (conn *connection) callFuture(t *kernel.Task, r *Request) (*futureResponse,
 
 // callFutureLocked makes a request to the server and returns a future response.
 func (conn *connection) callFutureLocked(t *kernel.Task, r *Request) (*futureResponse, error) {
+	// Check connected again holding conn.mu.
+	conn.mu.Lock()
+	if !conn.connected {
+		conn.mu.Unlock()
+		// we checked connected before,
+		// this must be due to aborted connection.
+		return nil, syserror.ECONNABORTED
+	}
+	conn.mu.Unlock()
+
 	conn.fd.queue.PushBack(r)
 	conn.fd.numActiveRequests++
 	fut := newFutureResponse(r)
diff --git a/pkg/sentry/fsimpl/fuse/connection_control.go b/pkg/sentry/fsimpl/fuse/connection_control.go
index d84d9caf24..a63c66e7c0 100644
--- a/pkg/sentry/fsimpl/fuse/connection_control.go
+++ b/pkg/sentry/fsimpl/fuse/connection_control.go
@@ -16,8 +16,10 @@ package fuse
 
 import (
 	"sync/atomic"
+	"syscall"
 
 	"gvisor.dev/gvisor/pkg/abi/linux"
+	"gvisor.dev/gvisor/pkg/context"
 	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
 )
 
@@ -181,3 +183,63 @@ func (conn *connection) initProcessReply(out *linux.FUSEInitOut, hasSysAdminCap
 
 	return nil
 }
+
+// Abort this FUSE connection.
+// It tries to acquire conn.fd.mu, conn.lock, conn.bgLock in order.
+// All possible requests waiting or blocking will be aborted.
+func (conn *connection) Abort(ctx context.Context) {
+	conn.fd.mu.Lock()
+	conn.mu.Lock()
+	conn.asyncMu.Lock()
+
+	if !conn.connected {
+		conn.asyncMu.Unlock()
+		conn.mu.Unlock()
+		conn.fd.mu.Unlock()
+		return
+	}
+
+	conn.connected = false
+
+	// Empty the `fd.queue` that holds the requests
+	// not yet read by the FUSE daemon yet.
+	// These are a subset of the requests in `fuse.completion` map.
+	for !conn.fd.queue.Empty() {
+		req := conn.fd.queue.Front()
+		conn.fd.queue.Remove(req)
+	}
+
+	var terminate []linux.FUSEOpID
+
+	// 2. Collect the requests have not been sent to FUSE daemon,
+	// or have not received a reply.
+	for unique := range conn.fd.completions {
+		terminate = append(terminate, unique)
+	}
+
+	// Release all locks to avoid deadlock.
+	conn.asyncMu.Unlock()
+	conn.mu.Unlock()
+	conn.fd.mu.Unlock()
+
+	// 1. The requets blocked before initialization.
+	// Will reach call() `connected` check and return.
+	if !conn.Initialized() {
+		conn.SetInitialized()
+	}
+
+	// 2. Terminate the requests collected above.
+	// Set ECONNABORTED error.
+	// sendError() will remove them from `fd.completion` map.
+	// Will enter the path of a normally received error.
+	for _, toTerminate := range terminate {
+		conn.fd.sendError(ctx, -int32(syscall.ECONNABORTED), toTerminate)
+	}
+
+	// 3. The requests not yet written to FUSE device.
+	// Early terminate.
+	// Will reach callFutureLocked() `connected` check and return.
+	close(conn.fd.fullQueueCh)
+
+	// TODO(gvisor.dev/issue/3528): Forget all pending forget reqs.
+}
diff --git a/pkg/sentry/fsimpl/fuse/connection_test.go b/pkg/sentry/fsimpl/fuse/connection_test.go
new file mode 100644
index 0000000000..6aa77b36d2
--- /dev/null
+++ b/pkg/sentry/fsimpl/fuse/connection_test.go
@@ -0,0 +1,127 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package fuse
+
+import (
+	"math/rand"
+	"syscall"
+	"testing"
+
+	"gvisor.dev/gvisor/pkg/sentry/kernel"
+	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+	"gvisor.dev/gvisor/pkg/syserror"
+)
+
+// TestConnectionInitBlock tests if initialization
+// correctly blocks and unblocks the connection.
+// Since it's unfeasible to test kernelTask.Block() in unit test,
+// the code in Call() are not tested here.
+func TestConnectionInitBlock(t *testing.T) {
+	s := setup(t)
+	defer s.Destroy()
+
+	k := kernel.KernelFromContext(s.Ctx)
+
+	conn, _, err := newTestConnection(s, k, maxActiveRequestsDefault)
+	if err != nil {
+		t.Fatalf("newTestConnection: %v", err)
+	}
+
+	select {
+	case <-conn.initializedChan:
+		t.Fatalf("initializedChan should be blocking before SetInitialized")
+	default:
+	}
+
+	conn.SetInitialized()
+
+	select {
+	case <-conn.initializedChan:
+	default:
+		t.Fatalf("initializedChan should not be blocking after SetInitialized")
+	}
+}
+
+func TestConnectionAbort(t *testing.T) {
+	s := setup(t)
+	defer s.Destroy()
+
+	k := kernel.KernelFromContext(s.Ctx)
+	creds := auth.CredentialsFromContext(s.Ctx)
+	task := kernel.TaskFromContext(s.Ctx)
+
+	const maxActiveRequest uint64 = 10
+
+	conn, _, err := newTestConnection(s, k, maxActiveRequest)
+	if err != nil {
+		t.Fatalf("newTestConnection: %v", err)
+	}
+
+	testObj := &testPayload{
+		data: rand.Uint32(),
+	}
+
+	var futNormal []*futureResponse
+
+	for i := 0; i < 2*int(maxActiveRequest); i++ {
+		req, err := conn.NewRequest(creds, uint32(i), uint64(i), 0, testObj)
+		if err != nil {
+			t.Fatalf("NewRequest creation failed: %v", err)
+		}
+		if i < int(maxActiveRequest) {
+			// Issue the requests that will not be blocked due to maxActiveRequest.
+			fut, err := conn.callFutureLocked(task, req)
+			if err != nil {
+				t.Fatalf("callFutureLocked failed: %v", err)
+			}
+			futNormal = append(futNormal, fut)
+		} else {
+			go func(t *testing.T) {
+				// The requests beyond maxActiveRequest will be blocked and receive expected errors.
+				_, err := conn.callFutureLocked(task, req)
+				if err != syserror.ECONNABORTED && err != syserror.ENOTCONN {
+					t.Fatalf("Incorrect error code received for blocked callFutureLocked() on aborted connection")
+				}
+			}(t)
+		}
+	}
+
+	conn.Abort(s.Ctx)
+
+	// Abort should unblock the initialization channel.
+	select {
+	case <-conn.initializedChan:
+	default:
+		t.Fatalf("initializedChan should not be blocking after SetInitialized")
+	}
+
+	// Abort will return ECONNABORTED error to unblocked requests.
+	for _, fut := range futNormal {
+		if fut.getResponse().hdr.Error != -int32(syscall.ECONNABORTED) {
+			t.Fatalf("Incorrect error code received for aborted connection")
+		}
+	}
+
+	// After abort, Call() should return directly with ENOTCONN.
+	req, err := conn.NewRequest(creds, 0, 0, 0, testObj)
+	if err != nil {
+		t.Fatalf("NewRequest creation failed: %v", err)
+	}
+	_, err = conn.Call(task, req)
+	if err != syserror.ENOTCONN {
+		t.Fatalf("Incorrect error code received for Call() after connection aborted")
+	}
+
+}
diff --git a/pkg/sentry/fsimpl/fuse/dev.go b/pkg/sentry/fsimpl/fuse/dev.go
index 6f0b896cbc..64c3e32e26 100644
--- a/pkg/sentry/fsimpl/fuse/dev.go
+++ b/pkg/sentry/fsimpl/fuse/dev.go
@@ -55,9 +55,6 @@ type DeviceFD struct {
 	vfs.DentryMetadataFileDescriptionImpl
 	vfs.NoLockFD
 
-	// mounted specifies whether a FUSE filesystem was mounted using the DeviceFD.
-	mounted bool
-
 	// nextOpID is used to create new requests.
 	nextOpID linux.FUSEOpID
 
@@ -99,13 +96,15 @@ type DeviceFD struct {
 
 // Release implements vfs.FileDescriptionImpl.Release.
 func (fd *DeviceFD) Release(context.Context) {
-	fd.fs.conn.connected = false
+	if fd.fs != nil {
+		fd.fs.conn.connected = false
+	}
 }
 
 // PRead implements vfs.FileDescriptionImpl.PRead.
 func (fd *DeviceFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) {
 	// Operations on /dev/fuse don't make sense until a FUSE filesystem is mounted.
-	if !fd.mounted {
+	if fd.fs == nil {
 		return 0, syserror.EPERM
 	}
 
@@ -115,10 +114,16 @@ func (fd *DeviceFD) PRead(ctx context.Context, dst usermem.IOSequence, offset in
 // Read implements vfs.FileDescriptionImpl.Read.
 func (fd *DeviceFD) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) {
 	// Operations on /dev/fuse don't make sense until a FUSE filesystem is mounted.
-	if !fd.mounted {
+	if fd.fs == nil {
 		return 0, syserror.EPERM
 	}
 
+	// Return ENODEV if the filesystem is umounted.
+	if fd.fs.umounted {
+		// TODO(gvisor.dev/issue/3525): return ECONNABORTED if aborted via fuse control fs.
+		return 0, syserror.ENODEV
+	}
+
 	// We require that any Read done on this filesystem have a sane minimum
 	// read buffer. It must have the capacity for the fixed parts of any request
 	// header (Linux uses the request header and the FUSEWriteIn header for this
@@ -165,7 +170,7 @@ func (fd *DeviceFD) readLocked(ctx context.Context, dst usermem.IOSequence, opts
 		}
 
 		// Return the error to the calling task.
-		if err := fd.sendError(ctx, errno, req); err != nil {
+		if err := fd.sendError(ctx, errno, req.hdr.Unique); err != nil {
 			return 0, err
 		}
 
@@ -217,7 +222,7 @@ func (fd *DeviceFD) readLocked(ctx context.Context, dst usermem.IOSequence, opts
 // PWrite implements vfs.FileDescriptionImpl.PWrite.
 func (fd *DeviceFD) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) {
 	// Operations on /dev/fuse don't make sense until a FUSE filesystem is mounted.
-	if !fd.mounted {
+	if fd.fs == nil {
 		return 0, syserror.EPERM
 	}
 
@@ -234,10 +239,15 @@ func (fd *DeviceFD) Write(ctx context.Context, src usermem.IOSequence, opts vfs.
 // writeLocked implements writing to the fuse device while locked with DeviceFD.mu.
 func (fd *DeviceFD) writeLocked(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) {
 	// Operations on /dev/fuse don't make sense until a FUSE filesystem is mounted.
-	if !fd.mounted {
+	if fd.fs == nil {
 		return 0, syserror.EPERM
 	}
 
+	// Return ENODEV if the filesystem is umounted.
+	if fd.fs.umounted {
+		return 0, syserror.ENODEV
+	}
+
 	var cn, n int64
 	hdrLen := uint32((*linux.FUSEHeaderOut)(nil).SizeBytes())
 
@@ -303,7 +313,8 @@ func (fd *DeviceFD) writeLocked(ctx context.Context, src usermem.IOSequence, opt
 					// Currently we simply discard the reply for FUSE_RELEASE.
 					return n + src.NumBytes(), nil
 				}
-				// Server sent us a response for a request we never sent?
+				// Server sent us a response for a request we never sent,
+				// or for which we already received a reply (e.g. aborted), an unlikely event.
 				return 0, syserror.EINVAL
 			}
 
@@ -343,7 +354,14 @@ func (fd *DeviceFD) Readiness(mask waiter.EventMask) waiter.EventMask {
 // locked with DeviceFD.mu.
 func (fd *DeviceFD) readinessLocked(mask waiter.EventMask) waiter.EventMask {
 	var ready waiter.EventMask
-	ready |= waiter.EventOut // FD is always writable
+
+	if fd.fs.umounted {
+		ready |= waiter.EventErr
+		return ready & mask
+	}
+
+	// FD is always writable.
+	ready |= waiter.EventOut
 	if !fd.queue.Empty() {
 		// Have reqs available, FD is readable.
 		ready |= waiter.EventIn
@@ -365,7 +383,7 @@ func (fd *DeviceFD) EventUnregister(e *waiter.Entry) {
 // Seek implements vfs.FileDescriptionImpl.Seek.
 func (fd *DeviceFD) Seek(ctx context.Context, offset int64, whence int32) (int64, error) {
 	// Operations on /dev/fuse don't make sense until a FUSE filesystem is mounted.
-	if !fd.mounted {
+	if fd.fs == nil {
 		return 0, syserror.EPERM
 	}
 
@@ -391,19 +409,20 @@ func (fd *DeviceFD) sendResponse(ctx context.Context, fut *futureResponse) error
 	return nil
 }
 
-// sendError sends an error response to the waiting task (if any).
-func (fd *DeviceFD) sendError(ctx context.Context, errno int32, req *Request) error {
+// sendError sends an error response to the waiting task (if any) by calling sendResponse().
+func (fd *DeviceFD) sendError(ctx context.Context, errno int32, unique linux.FUSEOpID) error {
 	// Return the error to the calling task.
 	outHdrLen := uint32((*linux.FUSEHeaderOut)(nil).SizeBytes())
 	respHdr := linux.FUSEHeaderOut{
 		Len:    outHdrLen,
 		Error:  errno,
-		Unique: req.hdr.Unique,
+		Unique: unique,
 	}
 
 	fut, ok := fd.completions[respHdr.Unique]
 	if !ok {
-		// Server sent us a response for a request we never sent?
+		// A response for a request we never sent,
+		// or for which we already received a reply (e.g. aborted).
 		return syserror.EINVAL
 	}
 	delete(fd.completions, respHdr.Unique)
diff --git a/pkg/sentry/fsimpl/fuse/dev_test.go b/pkg/sentry/fsimpl/fuse/dev_test.go
index 9366834546..3da8b469bb 100644
--- a/pkg/sentry/fsimpl/fuse/dev_test.go
+++ b/pkg/sentry/fsimpl/fuse/dev_test.go
@@ -35,10 +35,6 @@ import (
 // will simply echo the payload back with the appropriate headers.
 const echoTestOpcode linux.FUSEOpcode = 1000
 
-type testPayload struct {
-	data uint32
-}
-
 // TestFUSECommunication tests that the communication layer between the Sentry and the
 // FUSE server daemon works as expected.
 func TestFUSECommunication(t *testing.T) {
diff --git a/pkg/sentry/fsimpl/fuse/fusefs.go b/pkg/sentry/fsimpl/fuse/fusefs.go
index 30725182a2..f4ed73c128 100644
--- a/pkg/sentry/fsimpl/fuse/fusefs.go
+++ b/pkg/sentry/fsimpl/fuse/fusefs.go
@@ -29,6 +29,7 @@ import (
 	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
 	"gvisor.dev/gvisor/pkg/sentry/vfs"
 	"gvisor.dev/gvisor/pkg/syserror"
+	"gvisor.dev/gvisor/pkg/waiter"
 	"gvisor.dev/gvisor/tools/go_marshal/marshal"
 )
 
@@ -82,6 +83,9 @@ type filesystem struct {
 
 	// opts is the options the fusefs is initialized with.
 	opts *filesystemOptions
+
+	// umounted is true if filesystem.Release() has been called.
+	umounted bool
 }
 
 // Name implements vfs.FilesystemType.Name.
@@ -221,6 +225,14 @@ func newFUSEFilesystem(ctx context.Context, devMinor uint32, opts *filesystemOpt
 
 // Release implements vfs.FilesystemImpl.Release.
 func (fs *filesystem) Release(ctx context.Context) {
+	fs.umounted = true
+	fs.conn.Abort(ctx)
+
+	fs.conn.fd.mu.Lock()
+	// Notify all the waiters on this fd.
+	fs.conn.fd.waitQueue.Notify(waiter.EventIn)
+	fs.conn.fd.mu.Unlock()
+
 	fs.Filesystem.VFSFilesystem().VirtualFilesystem().PutAnonBlockDevMinor(fs.devMinor)
 	fs.Filesystem.Release(ctx)
 }
diff --git a/pkg/sentry/fsimpl/fuse/utils_test.go b/pkg/sentry/fsimpl/fuse/utils_test.go
new file mode 100644
index 0000000000..4363228305
--- /dev/null
+++ b/pkg/sentry/fsimpl/fuse/utils_test.go
@@ -0,0 +1,111 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package fuse
+
+import (
+	"testing"
+
+	"gvisor.dev/gvisor/pkg/abi/linux"
+	"gvisor.dev/gvisor/pkg/sentry/fsimpl/testutil"
+	"gvisor.dev/gvisor/pkg/sentry/kernel"
+	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+	"gvisor.dev/gvisor/pkg/sentry/vfs"
+	"gvisor.dev/gvisor/pkg/usermem"
+	"gvisor.dev/gvisor/tools/go_marshal/marshal"
+)
+
+func setup(t *testing.T) *testutil.System {
+	k, err := testutil.Boot()
+	if err != nil {
+		t.Fatalf("Error creating kernel: %v", err)
+	}
+
+	ctx := k.SupervisorContext()
+	creds := auth.CredentialsFromContext(ctx)
+
+	k.VFS().MustRegisterFilesystemType(Name, &FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{
+		AllowUserList:  true,
+		AllowUserMount: true,
+	})
+
+	mntns, err := k.VFS().NewMountNamespace(ctx, creds, "", "tmpfs", &vfs.GetFilesystemOptions{})
+	if err != nil {
+		t.Fatalf("NewMountNamespace(): %v", err)
+	}
+
+	return testutil.NewSystem(ctx, t, k.VFS(), mntns)
+}
+
+// newTestConnection creates a fuse connection that the sentry can communicate with
+// and the FD for the server to communicate with.
+func newTestConnection(system *testutil.System, k *kernel.Kernel, maxActiveRequests uint64) (*connection, *vfs.FileDescription, error) {
+	vfsObj := &vfs.VirtualFilesystem{}
+	fuseDev := &DeviceFD{}
+
+	if err := vfsObj.Init(system.Ctx); err != nil {
+		return nil, nil, err
+	}
+
+	vd := vfsObj.NewAnonVirtualDentry("genCountFD")
+	defer vd.DecRef(system.Ctx)
+	if err := fuseDev.vfsfd.Init(fuseDev, linux.O_RDWR|linux.O_CREAT, vd.Mount(), vd.Dentry(), &vfs.FileDescriptionOptions{}); err != nil {
+		return nil, nil, err
+	}
+
+	fsopts := filesystemOptions{
+		maxActiveRequests: maxActiveRequests,
+	}
+	fs, err := newFUSEFilesystem(system.Ctx, 0, &fsopts, &fuseDev.vfsfd)
+	if err != nil {
+		return nil, nil, err
+	}
+
+	return fs.conn, &fuseDev.vfsfd, nil
+}
+
+type testPayload struct {
+	marshal.StubMarshallable
+	data uint32
+}
+
+// SizeBytes implements marshal.Marshallable.SizeBytes.
+func (t *testPayload) SizeBytes() int {
+	return 4
+}
+
+// MarshalBytes implements marshal.Marshallable.MarshalBytes.
+func (t *testPayload) MarshalBytes(dst []byte) {
+	usermem.ByteOrder.PutUint32(dst[:4], t.data)
+}
+
+// UnmarshalBytes implements marshal.Marshallable.UnmarshalBytes.
+func (t *testPayload) UnmarshalBytes(src []byte) {
+	*t = testPayload{data: usermem.ByteOrder.Uint32(src[:4])}
+}
+
+// Packed implements marshal.Marshallable.Packed.
+func (t *testPayload) Packed() bool {
+	return true
+}
+
+// MarshalUnsafe implements marshal.Marshallable.MarshalUnsafe.
+func (t *testPayload) MarshalUnsafe(dst []byte) {
+	t.MarshalBytes(dst)
+}
+
+// UnmarshalUnsafe implements marshal.Marshallable.UnmarshalUnsafe.
+func (t *testPayload) UnmarshalUnsafe(src []byte) {
+	t.UnmarshalBytes(src)
+}
diff --git a/pkg/syserror/syserror.go b/pkg/syserror/syserror.go
index fe9f50169f..f516c8e467 100644
--- a/pkg/syserror/syserror.go
+++ b/pkg/syserror/syserror.go
@@ -33,6 +33,7 @@ var (
 	EBADFD       = error(syscall.EBADFD)
 	EBUSY        = error(syscall.EBUSY)
 	ECHILD       = error(syscall.ECHILD)
+	ECONNABORTED = error(syscall.ECONNABORTED)
 	ECONNREFUSED = error(syscall.ECONNREFUSED)
 	ECONNRESET   = error(syscall.ECONNRESET)
 	EDEADLK      = error(syscall.EDEADLK)

From 440b6f00e75e4df9788c640e04b0dc982e03d14d Mon Sep 17 00:00:00 2001
From: Craig Chi <craigchi@google.com>
Date: Wed, 9 Sep 2020 16:44:35 -0700
Subject: [PATCH 201/211] Fix comments of TODO issues.

---
 pkg/abi/linux/fuse.go                      | 2 +-
 pkg/sentry/fsimpl/fuse/request_response.go | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/pkg/abi/linux/fuse.go b/pkg/abi/linux/fuse.go
index ba3316ad60..ca304af052 100644
--- a/pkg/abi/linux/fuse.go
+++ b/pkg/abi/linux/fuse.go
@@ -663,7 +663,7 @@ func (r *FUSEMkdirIn) SizeBytes() int {
 type FUSERmDirIn struct {
 	marshal.StubMarshallable
 
-	// Name is a directory name to be looked up.
+	// Name is a directory name to be removed.
 	Name string
 }
 
diff --git a/pkg/sentry/fsimpl/fuse/request_response.go b/pkg/sentry/fsimpl/fuse/request_response.go
index fd9a961976..70db50f384 100644
--- a/pkg/sentry/fsimpl/fuse/request_response.go
+++ b/pkg/sentry/fsimpl/fuse/request_response.go
@@ -122,7 +122,7 @@ func (conn *connection) NewRequest(creds *auth.Credentials, pid uint32, ino uint
 
 	buf := make([]byte, hdr.Len)
 
-	// TODO(gVisor.dev/3698): Use the unsafe version once go_marshal is safe to use again.
+	// TODO(gVisor.dev/issue/3698): Use the unsafe version once go_marshal is safe to use again.
 	hdr.MarshalBytes(buf[:hdrLen])
 	payload.MarshalBytes(buf[hdrLen:])
 
@@ -223,7 +223,7 @@ func (r *Response) UnmarshalPayload(m marshal.Marshallable) error {
 		return nil
 	}
 
-	// TODO(gVisor.dev/3698): Use the unsafe version once go_marshal is safe to use again.
+	// TODO(gVisor.dev/issue/3698): Use the unsafe version once go_marshal is safe to use again.
 	m.UnmarshalBytes(r.data[hdrLen:])
 	return nil
 }

From 36bbf9e9668d1982afffbe069ab3b26a3822a1e7 Mon Sep 17 00:00:00 2001
From: boyuan-he <67342292+boyuan-he@users.noreply.github.com>
Date: Wed, 9 Sep 2020 17:13:18 -0700
Subject: [PATCH 202/211] Implement FUSE_UNLINK

Fixes #3696
---
 pkg/abi/linux/fuse.go                  | 23 +++++++
 pkg/sentry/fsimpl/fuse/fusefs.go       | 23 +++++++
 pkg/sentry/fsimpl/kernfs/filesystem.go |  6 +-
 pkg/sentry/fsimpl/kernfs/kernfs.go     | 31 ++++++++++
 test/fuse/BUILD                        |  5 ++
 test/fuse/linux/BUILD                  | 14 +++++
 test/fuse/linux/unlink_test.cc         | 83 ++++++++++++++++++++++++++
 7 files changed, 184 insertions(+), 1 deletion(-)
 create mode 100644 test/fuse/linux/unlink_test.cc

diff --git a/pkg/abi/linux/fuse.go b/pkg/abi/linux/fuse.go
index ca304af052..fdd22a13d4 100644
--- a/pkg/abi/linux/fuse.go
+++ b/pkg/abi/linux/fuse.go
@@ -846,3 +846,26 @@ type FUSESetAttrIn struct {
 
 	_ uint32
 }
+
+// FUSEUnlinkIn is the request sent by the kernel to the daemon
+// when trying to unlink a node.
+//
+// Dynamically-sized objects cannot be marshalled.
+type FUSEUnlinkIn struct {
+	marshal.StubMarshallable
+
+	// Name of the node to unlink.
+	Name string
+}
+
+// MarshalBytes serializes r.name to the dst buffer, which should
+// have size len(r.Name) + 1 and last byte set to 0.
+func (r *FUSEUnlinkIn) MarshalBytes(buf []byte) {
+	copy(buf, r.Name)
+}
+
+// SizeBytes is the size of the memory representation of FUSEUnlinkIn.
+// 1 extra byte for null-terminated Name string.
+func (r *FUSEUnlinkIn) SizeBytes() int {
+	return len(r.Name) + 1
+}
diff --git a/pkg/sentry/fsimpl/fuse/fusefs.go b/pkg/sentry/fsimpl/fuse/fusefs.go
index f4ed73c128..8e749bdade 100644
--- a/pkg/sentry/fsimpl/fuse/fusefs.go
+++ b/pkg/sentry/fsimpl/fuse/fusefs.go
@@ -455,6 +455,29 @@ func (i *inode) NewSymlink(ctx context.Context, name, target string) (*vfs.Dentr
 	return i.newEntry(ctx, name, linux.S_IFLNK, linux.FUSE_SYMLINK, &in)
 }
 
+// Unlink implements kernfs.Inode.Unlink.
+func (i *inode) Unlink(ctx context.Context, name string, child *vfs.Dentry) error {
+	kernelTask := kernel.TaskFromContext(ctx)
+	if kernelTask == nil {
+		log.Warningf("fusefs.Inode.newEntry: couldn't get kernel task from context", i.NodeID)
+		return syserror.EINVAL
+	}
+	in := linux.FUSEUnlinkIn{Name: name}
+	req, err := i.fs.conn.NewRequest(auth.CredentialsFromContext(ctx), uint32(kernelTask.ThreadID()), i.NodeID, linux.FUSE_UNLINK, &in)
+	if err != nil {
+		return err
+	}
+	res, err := i.fs.conn.Call(kernelTask, req)
+	if err != nil {
+		return err
+	}
+	// only return error, discard res.
+	if err := res.Error(); err != nil {
+		return err
+	}
+	return i.dentry.RemoveChildLocked(name, child)
+}
+
 // NewDir implements kernfs.Inode.NewDir.
 func (i *inode) NewDir(ctx context.Context, name string, opts vfs.MkdirOptions) (*vfs.Dentry, error) {
 	in := linux.FUSEMkdirIn{
diff --git a/pkg/sentry/fsimpl/kernfs/filesystem.go b/pkg/sentry/fsimpl/kernfs/filesystem.go
index 2823c3b1a3..49f6a0f1d4 100644
--- a/pkg/sentry/fsimpl/kernfs/filesystem.go
+++ b/pkg/sentry/fsimpl/kernfs/filesystem.go
@@ -770,6 +770,10 @@ func (fs *Filesystem) SymlinkAt(ctx context.Context, rp *vfs.ResolvingPath, targ
 func (fs *Filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error {
 	fs.mu.Lock()
 	defer fs.mu.Unlock()
+
+	// Store the name before walkExistingLocked as rp will be advanced past the
+	// name in the following call.
+	name := rp.Component()
 	vfsd, _, err := fs.walkExistingLocked(ctx, rp)
 	fs.processDeferredDecRefsLocked(ctx)
 	if err != nil {
@@ -795,7 +799,7 @@ func (fs *Filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error
 	if err := virtfs.PrepareDeleteDentry(mntns, vfsd); err != nil {
 		return err
 	}
-	if err := parentDentry.inode.Unlink(ctx, rp.Component(), vfsd); err != nil {
+	if err := parentDentry.inode.Unlink(ctx, name, vfsd); err != nil {
 		virtfs.AbortDeleteDentry(vfsd)
 		return err
 	}
diff --git a/pkg/sentry/fsimpl/kernfs/kernfs.go b/pkg/sentry/fsimpl/kernfs/kernfs.go
index 61189af252..163f26ceb4 100644
--- a/pkg/sentry/fsimpl/kernfs/kernfs.go
+++ b/pkg/sentry/fsimpl/kernfs/kernfs.go
@@ -60,6 +60,7 @@ import (
 	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
 	"gvisor.dev/gvisor/pkg/sentry/vfs"
 	"gvisor.dev/gvisor/pkg/sync"
+	"gvisor.dev/gvisor/pkg/syserror"
 )
 
 // Filesystem mostly implements vfs.FilesystemImpl for a generic in-memory
@@ -267,6 +268,36 @@ func (d *Dentry) InsertChildLocked(name string, child *Dentry) {
 	d.children[name] = child
 }
 
+// RemoveChild removes child from the vfs dentry cache. This does not update the
+// directory inode or modify the inode to be unlinked. So calling this on its own
+// isn't sufficient to remove a child from a directory.
+//
+// Precondition: d must represent a directory inode.
+func (d *Dentry) RemoveChild(name string, child *vfs.Dentry) error {
+	d.dirMu.Lock()
+	defer d.dirMu.Unlock()
+	return d.RemoveChildLocked(name, child)
+}
+
+// RemoveChildLocked is equivalent to RemoveChild, with additional
+// preconditions.
+//
+// Precondition: d.dirMu must be locked.
+func (d *Dentry) RemoveChildLocked(name string, child *vfs.Dentry) error {
+	if !d.isDir() {
+		panic(fmt.Sprintf("RemoveChild called on non-directory Dentry: %+v.", d))
+	}
+	c, ok := d.children[name]
+	if !ok {
+		return syserror.ENOENT
+	}
+	if &c.vfsd != child {
+		panic(fmt.Sprintf("Dentry hashed into inode doesn't match what vfs thinks! Child: %+v, vfs: %+v", c, child))
+	}
+	delete(d.children, name)
+	return nil
+}
+
 // Inode returns the dentry's inode.
 func (d *Dentry) Inode() Inode {
 	return d.inode
diff --git a/test/fuse/BUILD b/test/fuse/BUILD
index 29b9a9d93b..dacfe0175a 100644
--- a/test/fuse/BUILD
+++ b/test/fuse/BUILD
@@ -62,6 +62,11 @@ syscall_test(
     test = "//test/fuse/linux:create_test",
 )
 
+syscall_test(
+    fuse = "True",
+    test = "//test/fuse/linux:unlink_test",
+)
+
 syscall_test(
     fuse = "True",
     test = "//test/fuse/linux:setstat_test",
diff --git a/test/fuse/linux/BUILD b/test/fuse/linux/BUILD
index 7ecd6d8cb3..7673252ecd 100644
--- a/test/fuse/linux/BUILD
+++ b/test/fuse/linux/BUILD
@@ -214,3 +214,17 @@ cc_binary(
         "//test/util:test_util",
     ],
 )
+
+cc_binary(
+    name = "unlink_test",
+    testonly = 1,
+    srcs = ["unlink_test.cc"],
+    deps = [
+        gtest,
+        ":fuse_base",
+        "//test/util:fuse_util",
+        "//test/util:temp_umask",
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
diff --git a/test/fuse/linux/unlink_test.cc b/test/fuse/linux/unlink_test.cc
new file mode 100644
index 0000000000..5702e9b328
--- /dev/null
+++ b/test/fuse/linux/unlink_test.cc
@@ -0,0 +1,83 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/fuse.h>
+#include <sys/mount.h>
+#include <sys/stat.h>
+#include <sys/statfs.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <string>
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "test/fuse/linux/fuse_base.h"
+#include "test/util/fuse_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+class UnlinkTest : public FuseTest {
+ protected:
+  const std::string test_file_ = "test_file";
+};
+
+TEST_F(UnlinkTest, RegularFile) {
+  const std::string test_file_path =
+      JoinPath(mount_point_.path().c_str(), test_file_);
+  SetServerInodeLookup(test_file_, S_IFREG | S_IRWXU | S_IRWXG | S_IRWXO);
+
+  struct fuse_out_header out_header = {
+      .len = sizeof(struct fuse_out_header),
+  };
+  auto iov_out = FuseGenerateIovecs(out_header);
+  SetServerResponse(FUSE_UNLINK, iov_out);
+
+  ASSERT_THAT(unlink(test_file_path.c_str()), SyscallSucceeds());
+  struct fuse_in_header in_header;
+  std::vector<char> unlinked_file(test_file_.length() + 1);
+  auto iov_in = FuseGenerateIovecs(in_header, unlinked_file);
+  GetServerActualRequest(iov_in);
+
+  EXPECT_EQ(in_header.len, sizeof(in_header) + test_file_.length() + 1);
+  EXPECT_EQ(in_header.opcode, FUSE_UNLINK);
+  EXPECT_EQ(std::string(unlinked_file.data()), test_file_);
+}
+
+TEST_F(UnlinkTest, NoFile) {
+  const std::string test_file_path =
+      JoinPath(mount_point_.path().c_str(), test_file_);
+  SetServerInodeLookup(test_file_, S_IFREG | S_IRWXU | S_IRWXG | S_IRWXO);
+
+  struct fuse_out_header out_header = {
+      .len = sizeof(struct fuse_out_header),
+      .error = -ENOENT,
+  };
+  auto iov_out = FuseGenerateIovecs(out_header);
+  SetServerResponse(FUSE_UNLINK, iov_out);
+
+  ASSERT_THAT(unlink(test_file_path.c_str()), SyscallFailsWithErrno(ENOENT));
+  SkipServerActualRequest();
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor

From b8bee78d0c99a0c9423061b3dda10b9ef8c9719d Mon Sep 17 00:00:00 2001
From: Craig Chi <craigchi@google.com>
Date: Wed, 9 Sep 2020 16:30:25 -0700
Subject: [PATCH 203/211] Unexport fusefs.inode.nodeID

---
 pkg/sentry/fsimpl/fuse/connection.go |  2 +-
 pkg/sentry/fsimpl/fuse/directory.go  |  4 +--
 pkg/sentry/fsimpl/fuse/file.go       |  7 ++---
 pkg/sentry/fsimpl/fuse/fusefs.go     | 38 ++++++++++++++--------------
 pkg/sentry/fsimpl/fuse/read_write.go |  4 +--
 5 files changed, 28 insertions(+), 27 deletions(-)

diff --git a/pkg/sentry/fsimpl/fuse/connection.go b/pkg/sentry/fsimpl/fuse/connection.go
index 6009cdf974..a7402c149e 100644
--- a/pkg/sentry/fsimpl/fuse/connection.go
+++ b/pkg/sentry/fsimpl/fuse/connection.go
@@ -46,7 +46,7 @@ const (
 type connection struct {
 	fd *DeviceFD
 
-	// mu protect access to struct memebers.
+	// mu protects access to struct memebers.
 	mu sync.Mutex
 
 	// attributeVersion is the version of connection's attributes.
diff --git a/pkg/sentry/fsimpl/fuse/directory.go b/pkg/sentry/fsimpl/fuse/directory.go
index a83357129f..798c4a6f3f 100644
--- a/pkg/sentry/fsimpl/fuse/directory.go
+++ b/pkg/sentry/fsimpl/fuse/directory.go
@@ -67,8 +67,8 @@ func (dir *directoryFD) IterDirents(ctx context.Context, callback vfs.IterDirent
 		Flags:  dir.statusFlags(),
 	}
 
-	/// TODO(gVisor.dev/issue/3404): Support FUSE_READDIRPLUS.
-	req, err := fusefs.conn.NewRequest(creds, uint32(task.ThreadID()), dir.inode().NodeID, linux.FUSE_READDIR, &in)
+	// TODO(gVisor.dev/issue/3404): Support FUSE_READDIRPLUS.
+	req, err := fusefs.conn.NewRequest(creds, uint32(task.ThreadID()), dir.inode().nodeID, linux.FUSE_READDIR, &in)
 	if err != nil {
 		return err
 	}
diff --git a/pkg/sentry/fsimpl/fuse/file.go b/pkg/sentry/fsimpl/fuse/file.go
index b98145ba27..991efcda4b 100644
--- a/pkg/sentry/fsimpl/fuse/file.go
+++ b/pkg/sentry/fsimpl/fuse/file.go
@@ -84,7 +84,7 @@ func (fd *fileDescription) Release(ctx context.Context) {
 	}
 	kernelTask := kernel.TaskFromContext(ctx)
 	// ignoring errors and FUSE server reply is analogous to Linux's behavior.
-	req, err := conn.NewRequest(auth.CredentialsFromContext(ctx), uint32(kernelTask.ThreadID()), fd.inode().NodeID, opcode, &in)
+	req, err := conn.NewRequest(auth.CredentialsFromContext(ctx), uint32(kernelTask.ThreadID()), fd.inode().nodeID, opcode, &in)
 	if err != nil {
 		// No way to invoke Call() with an errored request.
 		return
@@ -125,8 +125,9 @@ func (fd *fileDescription) Stat(ctx context.Context, opts vfs.StatOptions) (linu
 	return inode.Stat(ctx, fs, opts)
 }
 
-// SetStat implements FileDescriptionImpl.SetStat.
+// SetStat implements vfs.FileDescriptionImpl.SetStat.
 func (fd *fileDescription) SetStat(ctx context.Context, opts vfs.SetStatOptions) error {
+	fs := fd.filesystem()
 	creds := auth.CredentialsFromContext(ctx)
-	return fd.inode().setAttr(ctx, fd.inode().fs.VFSFilesystem(), creds, opts, true, fd.Fh)
+	return fd.inode().setAttr(ctx, fs, creds, opts, true, fd.Fh)
 }
diff --git a/pkg/sentry/fsimpl/fuse/fusefs.go b/pkg/sentry/fsimpl/fuse/fusefs.go
index 8e749bdade..402dabe5a7 100644
--- a/pkg/sentry/fsimpl/fuse/fusefs.go
+++ b/pkg/sentry/fsimpl/fuse/fusefs.go
@@ -254,7 +254,7 @@ type inode struct {
 	// metaDataMu protects the metadata of this inode.
 	metadataMu sync.Mutex
 
-	NodeID uint64
+	nodeID uint64
 
 	locks vfs.FileLocks
 
@@ -279,13 +279,13 @@ func (fs *filesystem) newRootInode(creds *auth.Credentials, mode linux.FileMode)
 	i.InodeAttrs.Init(creds, linux.UNNAMED_MAJOR, fs.devMinor, 1, linux.ModeDirectory|0755)
 	i.OrderedChildren.Init(kernfs.OrderedChildrenOptions{})
 	i.dentry.Init(i)
-	i.NodeID = 1
+	i.nodeID = 1
 
 	return &i.dentry
 }
 
 func (fs *filesystem) newInode(nodeID uint64, attr linux.FUSEAttr) *kernfs.Dentry {
-	i := &inode{fs: fs, NodeID: nodeID}
+	i := &inode{fs: fs, nodeID: nodeID}
 	creds := auth.Credentials{EffectiveKGID: auth.KGID(attr.UID), EffectiveKUID: auth.KUID(attr.UID)}
 	i.InodeAttrs.Init(&creds, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), linux.FileMode(attr.Mode))
 	atomic.StoreUint64(&i.size, attr.Size)
@@ -342,7 +342,7 @@ func (i *inode) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentr
 			in.Flags &= ^uint32(linux.O_TRUNC)
 		}
 
-		req, err := i.fs.conn.NewRequest(auth.CredentialsFromContext(ctx), uint32(kernelTask.ThreadID()), i.NodeID, opcode, &in)
+		req, err := i.fs.conn.NewRequest(auth.CredentialsFromContext(ctx), uint32(kernelTask.ThreadID()), i.nodeID, opcode, &in)
 		if err != nil {
 			return nil, err
 		}
@@ -405,13 +405,13 @@ func (i *inode) Lookup(ctx context.Context, name string) (*vfs.Dentry, error) {
 	return i.newEntry(ctx, name, 0, linux.FUSE_LOOKUP, &in)
 }
 
-// IterDirents implements Inode.IterDirents.
-func (inode) IterDirents(ctx context.Context, callback vfs.IterDirentsCallback, offset, relOffset int64) (int64, error) {
+// IterDirents implements kernfs.Inode.IterDirents.
+func (*inode) IterDirents(ctx context.Context, callback vfs.IterDirentsCallback, offset, relOffset int64) (int64, error) {
 	return offset, nil
 }
 
-// Valid implements Inode.Valid.
-func (inode) Valid(ctx context.Context) bool {
+// Valid implements kernfs.Inode.Valid.
+func (*inode) Valid(ctx context.Context) bool {
 	return true
 }
 
@@ -419,7 +419,7 @@ func (inode) Valid(ctx context.Context) bool {
 func (i *inode) NewFile(ctx context.Context, name string, opts vfs.OpenOptions) (*vfs.Dentry, error) {
 	kernelTask := kernel.TaskFromContext(ctx)
 	if kernelTask == nil {
-		log.Warningf("fusefs.Inode.NewFile: couldn't get kernel task from context", i.NodeID)
+		log.Warningf("fusefs.Inode.NewFile: couldn't get kernel task from context", i.nodeID)
 		return nil, syserror.EINVAL
 	}
 	in := linux.FUSECreateIn{
@@ -459,11 +459,11 @@ func (i *inode) NewSymlink(ctx context.Context, name, target string) (*vfs.Dentr
 func (i *inode) Unlink(ctx context.Context, name string, child *vfs.Dentry) error {
 	kernelTask := kernel.TaskFromContext(ctx)
 	if kernelTask == nil {
-		log.Warningf("fusefs.Inode.newEntry: couldn't get kernel task from context", i.NodeID)
+		log.Warningf("fusefs.Inode.newEntry: couldn't get kernel task from context", i.nodeID)
 		return syserror.EINVAL
 	}
 	in := linux.FUSEUnlinkIn{Name: name}
-	req, err := i.fs.conn.NewRequest(auth.CredentialsFromContext(ctx), uint32(kernelTask.ThreadID()), i.NodeID, linux.FUSE_UNLINK, &in)
+	req, err := i.fs.conn.NewRequest(auth.CredentialsFromContext(ctx), uint32(kernelTask.ThreadID()), i.nodeID, linux.FUSE_UNLINK, &in)
 	if err != nil {
 		return err
 	}
@@ -496,7 +496,7 @@ func (i *inode) RmDir(ctx context.Context, name string, child *vfs.Dentry) error
 	task, creds := kernel.TaskFromContext(ctx), auth.CredentialsFromContext(ctx)
 
 	in := linux.FUSERmDirIn{Name: name}
-	req, err := fusefs.conn.NewRequest(creds, uint32(task.ThreadID()), i.NodeID, linux.FUSE_RMDIR, &in)
+	req, err := fusefs.conn.NewRequest(creds, uint32(task.ThreadID()), i.nodeID, linux.FUSE_RMDIR, &in)
 	if err != nil {
 		return err
 	}
@@ -522,10 +522,10 @@ func (i *inode) RmDir(ctx context.Context, name string, child *vfs.Dentry) error
 func (i *inode) newEntry(ctx context.Context, name string, fileType linux.FileMode, opcode linux.FUSEOpcode, payload marshal.Marshallable) (*vfs.Dentry, error) {
 	kernelTask := kernel.TaskFromContext(ctx)
 	if kernelTask == nil {
-		log.Warningf("fusefs.Inode.newEntry: couldn't get kernel task from context", i.NodeID)
+		log.Warningf("fusefs.Inode.newEntry: couldn't get kernel task from context", i.nodeID)
 		return nil, syserror.EINVAL
 	}
-	req, err := i.fs.conn.NewRequest(auth.CredentialsFromContext(ctx), uint32(kernelTask.ThreadID()), i.NodeID, opcode, payload)
+	req, err := i.fs.conn.NewRequest(auth.CredentialsFromContext(ctx), uint32(kernelTask.ThreadID()), i.nodeID, opcode, payload)
 	if err != nil {
 		return nil, err
 	}
@@ -552,7 +552,7 @@ func (i *inode) newEntry(ctx context.Context, name string, fileType linux.FileMo
 	return child.VFSDentry(), nil
 }
 
-// Getlink implements Inode.Getlink.
+// Getlink implements kernfs.Inode.Getlink.
 func (i *inode) Getlink(ctx context.Context, mnt *vfs.Mount) (vfs.VirtualDentry, string, error) {
 	path, err := i.Readlink(ctx, mnt)
 	return vfs.VirtualDentry{}, path, err
@@ -563,13 +563,13 @@ func (i *inode) Readlink(ctx context.Context, mnt *vfs.Mount) (string, error) {
 	if i.Mode().FileType()&linux.S_IFLNK == 0 {
 		return "", syserror.EINVAL
 	}
-	if i.link == "" {
+	if len(i.link) == 0 {
 		kernelTask := kernel.TaskFromContext(ctx)
 		if kernelTask == nil {
 			log.Warningf("fusefs.Inode.Readlink: couldn't get kernel task from context")
 			return "", syserror.EINVAL
 		}
-		req, err := i.fs.conn.NewRequest(auth.CredentialsFromContext(ctx), uint32(kernelTask.ThreadID()), i.NodeID, linux.FUSE_READLINK, &linux.FUSEEmptyIn{})
+		req, err := i.fs.conn.NewRequest(auth.CredentialsFromContext(ctx), uint32(kernelTask.ThreadID()), i.nodeID, linux.FUSE_READLINK, &linux.FUSEEmptyIn{})
 		if err != nil {
 			return "", err
 		}
@@ -675,7 +675,7 @@ func (i *inode) getAttr(ctx context.Context, fs *vfs.Filesystem, opts vfs.StatOp
 		GetAttrFlags: flags,
 		Fh:           fh,
 	}
-	req, err := i.fs.conn.NewRequest(creds, uint32(task.ThreadID()), i.NodeID, linux.FUSE_GETATTR, &in)
+	req, err := i.fs.conn.NewRequest(creds, uint32(task.ThreadID()), i.nodeID, linux.FUSE_GETATTR, &in)
 	if err != nil {
 		return linux.FUSEAttr{}, err
 	}
@@ -798,7 +798,7 @@ func (i *inode) setAttr(ctx context.Context, fs *vfs.Filesystem, creds *auth.Cre
 		UID:       opts.Stat.UID,
 		GID:       opts.Stat.GID,
 	}
-	req, err := conn.NewRequest(creds, uint32(task.ThreadID()), i.NodeID, linux.FUSE_SETATTR, &in)
+	req, err := conn.NewRequest(creds, uint32(task.ThreadID()), i.nodeID, linux.FUSE_SETATTR, &in)
 	if err != nil {
 		return err
 	}
diff --git a/pkg/sentry/fsimpl/fuse/read_write.go b/pkg/sentry/fsimpl/fuse/read_write.go
index 22a018e5e7..625d1547fe 100644
--- a/pkg/sentry/fsimpl/fuse/read_write.go
+++ b/pkg/sentry/fsimpl/fuse/read_write.go
@@ -79,7 +79,7 @@ func (fs *filesystem) ReadInPages(ctx context.Context, fd *regularFileFD, off ui
 		in.Offset = off + (uint64(pagesRead) << usermem.PageShift)
 		in.Size = pagesCanRead << usermem.PageShift
 
-		req, err := fs.conn.NewRequest(auth.CredentialsFromContext(ctx), uint32(t.ThreadID()), fd.inode().NodeID, linux.FUSE_READ, &in)
+		req, err := fs.conn.NewRequest(auth.CredentialsFromContext(ctx), uint32(t.ThreadID()), fd.inode().nodeID, linux.FUSE_READ, &in)
 		if err != nil {
 			return nil, 0, err
 		}
@@ -203,7 +203,7 @@ func (fs *filesystem) Write(ctx context.Context, fd *regularFileFD, off uint64,
 		in.Offset = off + uint64(written)
 		in.Size = toWrite
 
-		req, err := fs.conn.NewRequest(auth.CredentialsFromContext(ctx), uint32(t.ThreadID()), fd.inode().NodeID, linux.FUSE_WRITE, &in)
+		req, err := fs.conn.NewRequest(auth.CredentialsFromContext(ctx), uint32(t.ThreadID()), fd.inode().nodeID, linux.FUSE_WRITE, &in)
 		if err != nil {
 			return 0, err
 		}

From 8d0f76dda8df42f1701e6d82347ecf69b6271a13 Mon Sep 17 00:00:00 2001
From: Boyuan He <heboyuan@google.com>
Date: Fri, 11 Sep 2020 23:28:13 +0000
Subject: [PATCH 204/211] fuse_open: add padding to open out request

---
 pkg/abi/linux/fuse.go | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/pkg/abi/linux/fuse.go b/pkg/abi/linux/fuse.go
index fdd22a13d4..c2357a2ee8 100644
--- a/pkg/abi/linux/fuse.go
+++ b/pkg/abi/linux/fuse.go
@@ -156,9 +156,9 @@ const (
 
 // Constants relevant to FUSE operations.
 const (
-	FUSE_NAME_MAX       = 1024
-	FUSE_PAGE_SIZE      = 4096
-	FUSE_DIRENT_ALIGN   = 8
+	FUSE_NAME_MAX     = 1024
+	FUSE_PAGE_SIZE    = 4096
+	FUSE_DIRENT_ALIGN = 8
 )
 
 // FUSEInitIn is the request sent by the kernel to the daemon,
@@ -402,6 +402,8 @@ type FUSEOpenOut struct {
 
 	// OpenFlag for the opened file.
 	OpenFlag uint32
+
+	_ uint32
 }
 
 // FUSE_READ flags, consistent with the ones in include/uapi/linux/fuse.h.

From 5ecebda34332d0368794efe3f4e30e4fbbe523a0 Mon Sep 17 00:00:00 2001
From: gVisor bot <gvisor-bot@google.com>
Date: Thu, 17 Sep 2020 19:08:05 +0000
Subject: [PATCH 205/211] Merge release-20200907.0-123-gf0b1bd434 (automated)

---
 pkg/abi/linux/linux_abi_autogen_unsafe.go     | 48 +++++++++----------
 .../linux/linux_amd64_abi_autogen_unsafe.go   |  6 +--
 .../linux/linux_arm64_abi_autogen_unsafe.go   |  2 +-
 pkg/sentry/fsimpl/devpts/root_inode_refs.go   |  5 +-
 pkg/sentry/fsimpl/fuse/inode_refs.go          |  5 +-
 .../fsimpl/host/connected_endpoint_refs.go    |  5 +-
 pkg/sentry/fsimpl/host/inode_refs.go          |  5 +-
 pkg/sentry/fsimpl/kernfs/dentry_refs.go       |  5 +-
 .../fsimpl/kernfs/static_directory_refs.go    |  5 +-
 pkg/sentry/fsimpl/proc/fd_dir_inode_refs.go   |  5 +-
 .../fsimpl/proc/fd_info_dir_inode_refs.go     |  5 +-
 pkg/sentry/fsimpl/proc/subtasks_inode_refs.go |  5 +-
 pkg/sentry/fsimpl/proc/task_inode_refs.go     |  5 +-
 pkg/sentry/fsimpl/proc/tasks_inode_refs.go    |  5 +-
 pkg/sentry/fsimpl/sys/dir_refs.go             |  5 +-
 pkg/sentry/fsimpl/tmpfs/inode_refs.go         |  5 +-
 pkg/sentry/kernel/fd_table_refs.go            |  5 +-
 pkg/sentry/kernel/fs_context_refs.go          |  5 +-
 pkg/sentry/kernel/process_group_refs.go       |  5 +-
 ...seqatomic_taskgoroutineschedinfo_unsafe.go |  5 +-
 pkg/sentry/kernel/session_refs.go             |  5 +-
 pkg/sentry/kernel/shm/shm_refs.go             |  5 +-
 pkg/sentry/mm/aio_mappable_refs.go            |  5 +-
 pkg/sentry/mm/special_mappable_refs.go        |  5 +-
 pkg/sentry/platform/ring0/defs_impl_amd64.go  |  4 +-
 pkg/sentry/platform/ring0/defs_impl_arm64.go  |  6 +--
 pkg/sentry/socket/unix/socket_refs.go         |  5 +-
 .../socket/unix/transport/queue_refs.go       |  5 +-
 .../time/seqatomic_parameters_unsafe.go       |  5 +-
 pkg/sentry/vfs/file_description_refs.go       |  5 +-
 pkg/sentry/vfs/filesystem_refs.go             |  5 +-
 pkg/sentry/vfs/mount_namespace_refs.go        |  5 +-
 pkg/tcpip/link/tun/tun_endpoint_refs.go       |  5 +-
 runsc/cmd/gofer.go                            | 15 +++---
 runsc/container/container.go                  |  3 ++
 runsc/fsgofer/fsgofer.go                      |  3 ++
 36 files changed, 132 insertions(+), 95 deletions(-)

diff --git a/pkg/abi/linux/linux_abi_autogen_unsafe.go b/pkg/abi/linux/linux_abi_autogen_unsafe.go
index 0e4ef2f4d0..7762d50b22 100644
--- a/pkg/abi/linux/linux_abi_autogen_unsafe.go
+++ b/pkg/abi/linux/linux_abi_autogen_unsafe.go
@@ -1125,7 +1125,7 @@ func (s *Statx) UnmarshalUnsafe(src []byte) {
 // CopyOutN implements marshal.Marshallable.CopyOutN.
 //go:nosplit
 func (s *Statx) CopyOutN(cc marshal.CopyContext, addr usermem.Addr, limit int) (int, error) {
-    if !s.Ctime.Packed() && s.Mtime.Packed() && s.Atime.Packed() && s.Btime.Packed() {
+    if !s.Atime.Packed() && s.Btime.Packed() && s.Ctime.Packed() && s.Mtime.Packed() {
         // Type Statx doesn't have a packed layout in memory, fall back to MarshalBytes.
         buf := cc.CopyScratchBuffer(s.SizeBytes()) // escapes: okay.
         s.MarshalBytes(buf) // escapes: fallback.
@@ -1155,7 +1155,7 @@ func (s *Statx) CopyOut(cc marshal.CopyContext, addr usermem.Addr) (int, error)
 // CopyIn implements marshal.Marshallable.CopyIn.
 //go:nosplit
 func (s *Statx) CopyIn(cc marshal.CopyContext, addr usermem.Addr) (int, error) {
-    if !s.Mtime.Packed() && s.Atime.Packed() && s.Btime.Packed() && s.Ctime.Packed() {
+    if !s.Atime.Packed() && s.Btime.Packed() && s.Ctime.Packed() && s.Mtime.Packed() {
         // Type Statx doesn't have a packed layout in memory, fall back to UnmarshalBytes.
         buf := cc.CopyScratchBuffer(s.SizeBytes()) // escapes: okay.
         length, err := cc.CopyInBytes(addr, buf) // escapes: okay.
@@ -1656,7 +1656,7 @@ func (f *FUSEHeaderIn) CopyIn(cc marshal.CopyContext, addr usermem.Addr) (int, e
 
 // WriteTo implements io.WriterTo.WriteTo.
 func (f *FUSEHeaderIn) WriteTo(writer io.Writer) (int64, error) {
-    if !f.Opcode.Packed() && f.Unique.Packed() {
+    if !f.Unique.Packed() && f.Opcode.Packed() {
         // Type FUSEHeaderIn doesn't have a packed layout in memory, fall back to MarshalBytes.
         buf := make([]byte, f.SizeBytes())
         f.MarshalBytes(buf)
@@ -4609,7 +4609,7 @@ func (i *IPTIP) Packed() bool {
 
 // MarshalUnsafe implements marshal.Marshallable.MarshalUnsafe.
 func (i *IPTIP) MarshalUnsafe(dst []byte) {
-    if i.Src.Packed() && i.Dst.Packed() && i.SrcMask.Packed() && i.DstMask.Packed() {
+    if i.SrcMask.Packed() && i.DstMask.Packed() && i.Src.Packed() && i.Dst.Packed() {
         safecopy.CopyIn(dst, unsafe.Pointer(i))
     } else {
         // Type IPTIP doesn't have a packed layout in memory, fallback to MarshalBytes.
@@ -4619,7 +4619,7 @@ func (i *IPTIP) MarshalUnsafe(dst []byte) {
 
 // UnmarshalUnsafe implements marshal.Marshallable.UnmarshalUnsafe.
 func (i *IPTIP) UnmarshalUnsafe(src []byte) {
-    if i.Src.Packed() && i.Dst.Packed() && i.SrcMask.Packed() && i.DstMask.Packed() {
+    if i.DstMask.Packed() && i.Src.Packed() && i.Dst.Packed() && i.SrcMask.Packed() {
         safecopy.CopyOut(unsafe.Pointer(i), src)
     } else {
         // Type IPTIP doesn't have a packed layout in memory, fallback to UnmarshalBytes.
@@ -4686,7 +4686,7 @@ func (i *IPTIP) CopyIn(cc marshal.CopyContext, addr usermem.Addr) (int, error) {
 
 // WriteTo implements io.WriterTo.WriteTo.
 func (i *IPTIP) WriteTo(writer io.Writer) (int64, error) {
-    if !i.Src.Packed() && i.Dst.Packed() && i.SrcMask.Packed() && i.DstMask.Packed() {
+    if !i.Dst.Packed() && i.SrcMask.Packed() && i.DstMask.Packed() && i.Src.Packed() {
         // Type IPTIP doesn't have a packed layout in memory, fall back to MarshalBytes.
         buf := make([]byte, i.SizeBytes())
         i.MarshalBytes(buf)
@@ -5395,7 +5395,7 @@ func (i *IP6TEntry) UnmarshalBytes(src []byte) {
 // Packed implements marshal.Marshallable.Packed.
 //go:nosplit
 func (i *IP6TEntry) Packed() bool {
-    return i.IPv6.Packed() && i.Counters.Packed()
+    return i.Counters.Packed() && i.IPv6.Packed()
 }
 
 // MarshalUnsafe implements marshal.Marshallable.MarshalUnsafe.
@@ -5421,7 +5421,7 @@ func (i *IP6TEntry) UnmarshalUnsafe(src []byte) {
 // CopyOutN implements marshal.Marshallable.CopyOutN.
 //go:nosplit
 func (i *IP6TEntry) CopyOutN(cc marshal.CopyContext, addr usermem.Addr, limit int) (int, error) {
-    if !i.IPv6.Packed() && i.Counters.Packed() {
+    if !i.Counters.Packed() && i.IPv6.Packed() {
         // Type IP6TEntry doesn't have a packed layout in memory, fall back to MarshalBytes.
         buf := cc.CopyScratchBuffer(i.SizeBytes()) // escapes: okay.
         i.MarshalBytes(buf) // escapes: fallback.
@@ -5592,7 +5592,7 @@ func (i *IP6TIP) UnmarshalBytes(src []byte) {
 // Packed implements marshal.Marshallable.Packed.
 //go:nosplit
 func (i *IP6TIP) Packed() bool {
-    return i.DstMask.Packed() && i.Src.Packed() && i.Dst.Packed() && i.SrcMask.Packed()
+    return i.SrcMask.Packed() && i.DstMask.Packed() && i.Src.Packed() && i.Dst.Packed()
 }
 
 // MarshalUnsafe implements marshal.Marshallable.MarshalUnsafe.
@@ -5607,7 +5607,7 @@ func (i *IP6TIP) MarshalUnsafe(dst []byte) {
 
 // UnmarshalUnsafe implements marshal.Marshallable.UnmarshalUnsafe.
 func (i *IP6TIP) UnmarshalUnsafe(src []byte) {
-    if i.Src.Packed() && i.Dst.Packed() && i.SrcMask.Packed() && i.DstMask.Packed() {
+    if i.SrcMask.Packed() && i.DstMask.Packed() && i.Src.Packed() && i.Dst.Packed() {
         safecopy.CopyOut(unsafe.Pointer(i), src)
     } else {
         // Type IP6TIP doesn't have a packed layout in memory, fallback to UnmarshalBytes.
@@ -5618,7 +5618,7 @@ func (i *IP6TIP) UnmarshalUnsafe(src []byte) {
 // CopyOutN implements marshal.Marshallable.CopyOutN.
 //go:nosplit
 func (i *IP6TIP) CopyOutN(cc marshal.CopyContext, addr usermem.Addr, limit int) (int, error) {
-    if !i.Src.Packed() && i.Dst.Packed() && i.SrcMask.Packed() && i.DstMask.Packed() {
+    if !i.DstMask.Packed() && i.Src.Packed() && i.Dst.Packed() && i.SrcMask.Packed() {
         // Type IP6TIP doesn't have a packed layout in memory, fall back to MarshalBytes.
         buf := cc.CopyScratchBuffer(i.SizeBytes()) // escapes: okay.
         i.MarshalBytes(buf) // escapes: fallback.
@@ -5648,7 +5648,7 @@ func (i *IP6TIP) CopyOut(cc marshal.CopyContext, addr usermem.Addr) (int, error)
 // CopyIn implements marshal.Marshallable.CopyIn.
 //go:nosplit
 func (i *IP6TIP) CopyIn(cc marshal.CopyContext, addr usermem.Addr) (int, error) {
-    if !i.DstMask.Packed() && i.Src.Packed() && i.Dst.Packed() && i.SrcMask.Packed() {
+    if !i.Src.Packed() && i.Dst.Packed() && i.SrcMask.Packed() && i.DstMask.Packed() {
         // Type IP6TIP doesn't have a packed layout in memory, fall back to UnmarshalBytes.
         buf := cc.CopyScratchBuffer(i.SizeBytes()) // escapes: okay.
         length, err := cc.CopyInBytes(addr, buf) // escapes: okay.
@@ -5674,7 +5674,7 @@ func (i *IP6TIP) CopyIn(cc marshal.CopyContext, addr usermem.Addr) (int, error)
 
 // WriteTo implements io.WriterTo.WriteTo.
 func (i *IP6TIP) WriteTo(writer io.Writer) (int64, error) {
-    if !i.Src.Packed() && i.Dst.Packed() && i.SrcMask.Packed() && i.DstMask.Packed() {
+    if !i.DstMask.Packed() && i.Src.Packed() && i.Dst.Packed() && i.SrcMask.Packed() {
         // Type IP6TIP doesn't have a packed layout in memory, fall back to MarshalBytes.
         buf := make([]byte, i.SizeBytes())
         i.MarshalBytes(buf)
@@ -6319,7 +6319,7 @@ func (s *SemidDS) Packed() bool {
 
 // MarshalUnsafe implements marshal.Marshallable.MarshalUnsafe.
 func (s *SemidDS) MarshalUnsafe(dst []byte) {
-    if s.SemOTime.Packed() && s.SemCTime.Packed() && s.SemPerm.Packed() {
+    if s.SemPerm.Packed() && s.SemOTime.Packed() && s.SemCTime.Packed() {
         safecopy.CopyIn(dst, unsafe.Pointer(s))
     } else {
         // Type SemidDS doesn't have a packed layout in memory, fallback to MarshalBytes.
@@ -6329,7 +6329,7 @@ func (s *SemidDS) MarshalUnsafe(dst []byte) {
 
 // UnmarshalUnsafe implements marshal.Marshallable.UnmarshalUnsafe.
 func (s *SemidDS) UnmarshalUnsafe(src []byte) {
-    if s.SemPerm.Packed() && s.SemOTime.Packed() && s.SemCTime.Packed() {
+    if s.SemCTime.Packed() && s.SemPerm.Packed() && s.SemOTime.Packed() {
         safecopy.CopyOut(unsafe.Pointer(s), src)
     } else {
         // Type SemidDS doesn't have a packed layout in memory, fallback to UnmarshalBytes.
@@ -6340,7 +6340,7 @@ func (s *SemidDS) UnmarshalUnsafe(src []byte) {
 // CopyOutN implements marshal.Marshallable.CopyOutN.
 //go:nosplit
 func (s *SemidDS) CopyOutN(cc marshal.CopyContext, addr usermem.Addr, limit int) (int, error) {
-    if !s.SemPerm.Packed() && s.SemOTime.Packed() && s.SemCTime.Packed() {
+    if !s.SemOTime.Packed() && s.SemCTime.Packed() && s.SemPerm.Packed() {
         // Type SemidDS doesn't have a packed layout in memory, fall back to MarshalBytes.
         buf := cc.CopyScratchBuffer(s.SizeBytes()) // escapes: okay.
         s.MarshalBytes(buf) // escapes: fallback.
@@ -6666,7 +6666,7 @@ func (s *ShmidDS) Packed() bool {
 
 // MarshalUnsafe implements marshal.Marshallable.MarshalUnsafe.
 func (s *ShmidDS) MarshalUnsafe(dst []byte) {
-    if s.ShmDtime.Packed() && s.ShmCtime.Packed() && s.ShmPerm.Packed() && s.ShmAtime.Packed() {
+    if s.ShmPerm.Packed() && s.ShmAtime.Packed() && s.ShmDtime.Packed() && s.ShmCtime.Packed() {
         safecopy.CopyIn(dst, unsafe.Pointer(s))
     } else {
         // Type ShmidDS doesn't have a packed layout in memory, fallback to MarshalBytes.
@@ -6687,7 +6687,7 @@ func (s *ShmidDS) UnmarshalUnsafe(src []byte) {
 // CopyOutN implements marshal.Marshallable.CopyOutN.
 //go:nosplit
 func (s *ShmidDS) CopyOutN(cc marshal.CopyContext, addr usermem.Addr, limit int) (int, error) {
-    if !s.ShmCtime.Packed() && s.ShmPerm.Packed() && s.ShmAtime.Packed() && s.ShmDtime.Packed() {
+    if !s.ShmPerm.Packed() && s.ShmAtime.Packed() && s.ShmDtime.Packed() && s.ShmCtime.Packed() {
         // Type ShmidDS doesn't have a packed layout in memory, fall back to MarshalBytes.
         buf := cc.CopyScratchBuffer(s.SizeBytes()) // escapes: okay.
         s.MarshalBytes(buf) // escapes: fallback.
@@ -6743,7 +6743,7 @@ func (s *ShmidDS) CopyIn(cc marshal.CopyContext, addr usermem.Addr) (int, error)
 
 // WriteTo implements io.WriterTo.WriteTo.
 func (s *ShmidDS) WriteTo(writer io.Writer) (int64, error) {
-    if !s.ShmPerm.Packed() && s.ShmAtime.Packed() && s.ShmDtime.Packed() && s.ShmCtime.Packed() {
+    if !s.ShmCtime.Packed() && s.ShmPerm.Packed() && s.ShmAtime.Packed() && s.ShmDtime.Packed() {
         // Type ShmidDS doesn't have a packed layout in memory, fall back to MarshalBytes.
         buf := make([]byte, s.SizeBytes())
         s.MarshalBytes(buf)
@@ -8774,7 +8774,7 @@ func (i *Itimerspec) Packed() bool {
 
 // MarshalUnsafe implements marshal.Marshallable.MarshalUnsafe.
 func (i *Itimerspec) MarshalUnsafe(dst []byte) {
-    if i.Value.Packed() && i.Interval.Packed() {
+    if i.Interval.Packed() && i.Value.Packed() {
         safecopy.CopyIn(dst, unsafe.Pointer(i))
     } else {
         // Type Itimerspec doesn't have a packed layout in memory, fallback to MarshalBytes.
@@ -8795,7 +8795,7 @@ func (i *Itimerspec) UnmarshalUnsafe(src []byte) {
 // CopyOutN implements marshal.Marshallable.CopyOutN.
 //go:nosplit
 func (i *Itimerspec) CopyOutN(cc marshal.CopyContext, addr usermem.Addr, limit int) (int, error) {
-    if !i.Interval.Packed() && i.Value.Packed() {
+    if !i.Value.Packed() && i.Interval.Packed() {
         // Type Itimerspec doesn't have a packed layout in memory, fall back to MarshalBytes.
         buf := cc.CopyScratchBuffer(i.SizeBytes()) // escapes: okay.
         i.MarshalBytes(buf) // escapes: fallback.
@@ -8955,7 +8955,7 @@ func (i *ItimerVal) CopyOut(cc marshal.CopyContext, addr usermem.Addr) (int, err
 // CopyIn implements marshal.Marshallable.CopyIn.
 //go:nosplit
 func (i *ItimerVal) CopyIn(cc marshal.CopyContext, addr usermem.Addr) (int, error) {
-    if !i.Interval.Packed() && i.Value.Packed() {
+    if !i.Value.Packed() && i.Interval.Packed() {
         // Type ItimerVal doesn't have a packed layout in memory, fall back to UnmarshalBytes.
         buf := cc.CopyScratchBuffer(i.SizeBytes()) // escapes: okay.
         length, err := cc.CopyInBytes(addr, buf) // escapes: okay.
@@ -9143,7 +9143,7 @@ func (t *Tms) MarshalUnsafe(dst []byte) {
 
 // UnmarshalUnsafe implements marshal.Marshallable.UnmarshalUnsafe.
 func (t *Tms) UnmarshalUnsafe(src []byte) {
-    if t.UTime.Packed() && t.STime.Packed() && t.CUTime.Packed() && t.CSTime.Packed() {
+    if t.CSTime.Packed() && t.UTime.Packed() && t.STime.Packed() && t.CUTime.Packed() {
         safecopy.CopyOut(unsafe.Pointer(t), src)
     } else {
         // Type Tms doesn't have a packed layout in memory, fallback to UnmarshalBytes.
@@ -9210,7 +9210,7 @@ func (t *Tms) CopyIn(cc marshal.CopyContext, addr usermem.Addr) (int, error) {
 
 // WriteTo implements io.WriterTo.WriteTo.
 func (t *Tms) WriteTo(writer io.Writer) (int64, error) {
-    if !t.UTime.Packed() && t.STime.Packed() && t.CUTime.Packed() && t.CSTime.Packed() {
+    if !t.CSTime.Packed() && t.UTime.Packed() && t.STime.Packed() && t.CUTime.Packed() {
         // Type Tms doesn't have a packed layout in memory, fall back to MarshalBytes.
         buf := make([]byte, t.SizeBytes())
         t.MarshalBytes(buf)
diff --git a/pkg/abi/linux/linux_amd64_abi_autogen_unsafe.go b/pkg/abi/linux/linux_amd64_abi_autogen_unsafe.go
index c4de4b9d24..4c33bb78f0 100644
--- a/pkg/abi/linux/linux_amd64_abi_autogen_unsafe.go
+++ b/pkg/abi/linux/linux_amd64_abi_autogen_unsafe.go
@@ -288,7 +288,7 @@ func (s *Stat) UnmarshalBytes(src []byte) {
 // Packed implements marshal.Marshallable.Packed.
 //go:nosplit
 func (s *Stat) Packed() bool {
-    return s.MTime.Packed() && s.CTime.Packed() && s.ATime.Packed()
+    return s.CTime.Packed() && s.ATime.Packed() && s.MTime.Packed()
 }
 
 // MarshalUnsafe implements marshal.Marshallable.MarshalUnsafe.
@@ -303,7 +303,7 @@ func (s *Stat) MarshalUnsafe(dst []byte) {
 
 // UnmarshalUnsafe implements marshal.Marshallable.UnmarshalUnsafe.
 func (s *Stat) UnmarshalUnsafe(src []byte) {
-    if s.ATime.Packed() && s.MTime.Packed() && s.CTime.Packed() {
+    if s.CTime.Packed() && s.ATime.Packed() && s.MTime.Packed() {
         safecopy.CopyOut(unsafe.Pointer(s), src)
     } else {
         // Type Stat doesn't have a packed layout in memory, fallback to UnmarshalBytes.
@@ -370,7 +370,7 @@ func (s *Stat) CopyIn(cc marshal.CopyContext, addr usermem.Addr) (int, error) {
 
 // WriteTo implements io.WriterTo.WriteTo.
 func (s *Stat) WriteTo(writer io.Writer) (int64, error) {
-    if !s.MTime.Packed() && s.CTime.Packed() && s.ATime.Packed() {
+    if !s.ATime.Packed() && s.MTime.Packed() && s.CTime.Packed() {
         // Type Stat doesn't have a packed layout in memory, fall back to MarshalBytes.
         buf := make([]byte, s.SizeBytes())
         s.MarshalBytes(buf)
diff --git a/pkg/abi/linux/linux_arm64_abi_autogen_unsafe.go b/pkg/abi/linux/linux_arm64_abi_autogen_unsafe.go
index 2a2e4ec540..8856f4a8ee 100644
--- a/pkg/abi/linux/linux_arm64_abi_autogen_unsafe.go
+++ b/pkg/abi/linux/linux_arm64_abi_autogen_unsafe.go
@@ -377,7 +377,7 @@ func (s *Stat) CopyIn(cc marshal.CopyContext, addr usermem.Addr) (int, error) {
 
 // WriteTo implements io.WriterTo.WriteTo.
 func (s *Stat) WriteTo(writer io.Writer) (int64, error) {
-    if !s.MTime.Packed() && s.CTime.Packed() && s.ATime.Packed() {
+    if !s.ATime.Packed() && s.MTime.Packed() && s.CTime.Packed() {
         // Type Stat doesn't have a packed layout in memory, fall back to MarshalBytes.
         buf := make([]byte, s.SizeBytes())
         s.MarshalBytes(buf)
diff --git a/pkg/sentry/fsimpl/devpts/root_inode_refs.go b/pkg/sentry/fsimpl/devpts/root_inode_refs.go
index 4abb664317..0518012027 100644
--- a/pkg/sentry/fsimpl/devpts/root_inode_refs.go
+++ b/pkg/sentry/fsimpl/devpts/root_inode_refs.go
@@ -2,10 +2,11 @@ package devpts
 
 import (
 	"fmt"
-	"gvisor.dev/gvisor/pkg/log"
-	refs_vfs1 "gvisor.dev/gvisor/pkg/refs"
 	"runtime"
 	"sync/atomic"
+
+	"gvisor.dev/gvisor/pkg/log"
+	refs_vfs1 "gvisor.dev/gvisor/pkg/refs"
 )
 
 // ownerType is used to customize logging. Note that we use a pointer to T so
diff --git a/pkg/sentry/fsimpl/fuse/inode_refs.go b/pkg/sentry/fsimpl/fuse/inode_refs.go
index 4fb4d4da7e..6b9456e1d5 100644
--- a/pkg/sentry/fsimpl/fuse/inode_refs.go
+++ b/pkg/sentry/fsimpl/fuse/inode_refs.go
@@ -2,10 +2,11 @@ package fuse
 
 import (
 	"fmt"
-	"gvisor.dev/gvisor/pkg/log"
-	refs_vfs1 "gvisor.dev/gvisor/pkg/refs"
 	"runtime"
 	"sync/atomic"
+
+	"gvisor.dev/gvisor/pkg/log"
+	refs_vfs1 "gvisor.dev/gvisor/pkg/refs"
 )
 
 // ownerType is used to customize logging. Note that we use a pointer to T so
diff --git a/pkg/sentry/fsimpl/host/connected_endpoint_refs.go b/pkg/sentry/fsimpl/host/connected_endpoint_refs.go
index 225f597823..babb3f664d 100644
--- a/pkg/sentry/fsimpl/host/connected_endpoint_refs.go
+++ b/pkg/sentry/fsimpl/host/connected_endpoint_refs.go
@@ -2,10 +2,11 @@ package host
 
 import (
 	"fmt"
-	"gvisor.dev/gvisor/pkg/log"
-	refs_vfs1 "gvisor.dev/gvisor/pkg/refs"
 	"runtime"
 	"sync/atomic"
+
+	"gvisor.dev/gvisor/pkg/log"
+	refs_vfs1 "gvisor.dev/gvisor/pkg/refs"
 )
 
 // ownerType is used to customize logging. Note that we use a pointer to T so
diff --git a/pkg/sentry/fsimpl/host/inode_refs.go b/pkg/sentry/fsimpl/host/inode_refs.go
index 4075eae171..17f90ce4ad 100644
--- a/pkg/sentry/fsimpl/host/inode_refs.go
+++ b/pkg/sentry/fsimpl/host/inode_refs.go
@@ -2,10 +2,11 @@ package host
 
 import (
 	"fmt"
-	"gvisor.dev/gvisor/pkg/log"
-	refs_vfs1 "gvisor.dev/gvisor/pkg/refs"
 	"runtime"
 	"sync/atomic"
+
+	"gvisor.dev/gvisor/pkg/log"
+	refs_vfs1 "gvisor.dev/gvisor/pkg/refs"
 )
 
 // ownerType is used to customize logging. Note that we use a pointer to T so
diff --git a/pkg/sentry/fsimpl/kernfs/dentry_refs.go b/pkg/sentry/fsimpl/kernfs/dentry_refs.go
index f99d4941a9..79863b3bcc 100644
--- a/pkg/sentry/fsimpl/kernfs/dentry_refs.go
+++ b/pkg/sentry/fsimpl/kernfs/dentry_refs.go
@@ -2,10 +2,11 @@ package kernfs
 
 import (
 	"fmt"
-	"gvisor.dev/gvisor/pkg/log"
-	refs_vfs1 "gvisor.dev/gvisor/pkg/refs"
 	"runtime"
 	"sync/atomic"
+
+	"gvisor.dev/gvisor/pkg/log"
+	refs_vfs1 "gvisor.dev/gvisor/pkg/refs"
 )
 
 // ownerType is used to customize logging. Note that we use a pointer to T so
diff --git a/pkg/sentry/fsimpl/kernfs/static_directory_refs.go b/pkg/sentry/fsimpl/kernfs/static_directory_refs.go
index 2b258010ea..478b04bddf 100644
--- a/pkg/sentry/fsimpl/kernfs/static_directory_refs.go
+++ b/pkg/sentry/fsimpl/kernfs/static_directory_refs.go
@@ -2,10 +2,11 @@ package kernfs
 
 import (
 	"fmt"
-	"gvisor.dev/gvisor/pkg/log"
-	refs_vfs1 "gvisor.dev/gvisor/pkg/refs"
 	"runtime"
 	"sync/atomic"
+
+	"gvisor.dev/gvisor/pkg/log"
+	refs_vfs1 "gvisor.dev/gvisor/pkg/refs"
 )
 
 // ownerType is used to customize logging. Note that we use a pointer to T so
diff --git a/pkg/sentry/fsimpl/proc/fd_dir_inode_refs.go b/pkg/sentry/fsimpl/proc/fd_dir_inode_refs.go
index 467c327526..9431c15066 100644
--- a/pkg/sentry/fsimpl/proc/fd_dir_inode_refs.go
+++ b/pkg/sentry/fsimpl/proc/fd_dir_inode_refs.go
@@ -2,10 +2,11 @@ package proc
 
 import (
 	"fmt"
-	"gvisor.dev/gvisor/pkg/log"
-	refs_vfs1 "gvisor.dev/gvisor/pkg/refs"
 	"runtime"
 	"sync/atomic"
+
+	"gvisor.dev/gvisor/pkg/log"
+	refs_vfs1 "gvisor.dev/gvisor/pkg/refs"
 )
 
 // ownerType is used to customize logging. Note that we use a pointer to T so
diff --git a/pkg/sentry/fsimpl/proc/fd_info_dir_inode_refs.go b/pkg/sentry/fsimpl/proc/fd_info_dir_inode_refs.go
index 3fcda09484..872b20eb0b 100644
--- a/pkg/sentry/fsimpl/proc/fd_info_dir_inode_refs.go
+++ b/pkg/sentry/fsimpl/proc/fd_info_dir_inode_refs.go
@@ -2,10 +2,11 @@ package proc
 
 import (
 	"fmt"
-	"gvisor.dev/gvisor/pkg/log"
-	refs_vfs1 "gvisor.dev/gvisor/pkg/refs"
 	"runtime"
 	"sync/atomic"
+
+	"gvisor.dev/gvisor/pkg/log"
+	refs_vfs1 "gvisor.dev/gvisor/pkg/refs"
 )
 
 // ownerType is used to customize logging. Note that we use a pointer to T so
diff --git a/pkg/sentry/fsimpl/proc/subtasks_inode_refs.go b/pkg/sentry/fsimpl/proc/subtasks_inode_refs.go
index 2da6801c29..c6d9b35221 100644
--- a/pkg/sentry/fsimpl/proc/subtasks_inode_refs.go
+++ b/pkg/sentry/fsimpl/proc/subtasks_inode_refs.go
@@ -2,10 +2,11 @@ package proc
 
 import (
 	"fmt"
-	"gvisor.dev/gvisor/pkg/log"
-	refs_vfs1 "gvisor.dev/gvisor/pkg/refs"
 	"runtime"
 	"sync/atomic"
+
+	"gvisor.dev/gvisor/pkg/log"
+	refs_vfs1 "gvisor.dev/gvisor/pkg/refs"
 )
 
 // ownerType is used to customize logging. Note that we use a pointer to T so
diff --git a/pkg/sentry/fsimpl/proc/task_inode_refs.go b/pkg/sentry/fsimpl/proc/task_inode_refs.go
index b6e19844cc..7144884502 100644
--- a/pkg/sentry/fsimpl/proc/task_inode_refs.go
+++ b/pkg/sentry/fsimpl/proc/task_inode_refs.go
@@ -2,10 +2,11 @@ package proc
 
 import (
 	"fmt"
-	"gvisor.dev/gvisor/pkg/log"
-	refs_vfs1 "gvisor.dev/gvisor/pkg/refs"
 	"runtime"
 	"sync/atomic"
+
+	"gvisor.dev/gvisor/pkg/log"
+	refs_vfs1 "gvisor.dev/gvisor/pkg/refs"
 )
 
 // ownerType is used to customize logging. Note that we use a pointer to T so
diff --git a/pkg/sentry/fsimpl/proc/tasks_inode_refs.go b/pkg/sentry/fsimpl/proc/tasks_inode_refs.go
index 6207364e44..22d9cc488d 100644
--- a/pkg/sentry/fsimpl/proc/tasks_inode_refs.go
+++ b/pkg/sentry/fsimpl/proc/tasks_inode_refs.go
@@ -2,10 +2,11 @@ package proc
 
 import (
 	"fmt"
-	"gvisor.dev/gvisor/pkg/log"
-	refs_vfs1 "gvisor.dev/gvisor/pkg/refs"
 	"runtime"
 	"sync/atomic"
+
+	"gvisor.dev/gvisor/pkg/log"
+	refs_vfs1 "gvisor.dev/gvisor/pkg/refs"
 )
 
 // ownerType is used to customize logging. Note that we use a pointer to T so
diff --git a/pkg/sentry/fsimpl/sys/dir_refs.go b/pkg/sentry/fsimpl/sys/dir_refs.go
index 9d15d4c80f..89609b1984 100644
--- a/pkg/sentry/fsimpl/sys/dir_refs.go
+++ b/pkg/sentry/fsimpl/sys/dir_refs.go
@@ -2,10 +2,11 @@ package sys
 
 import (
 	"fmt"
-	"gvisor.dev/gvisor/pkg/log"
-	refs_vfs1 "gvisor.dev/gvisor/pkg/refs"
 	"runtime"
 	"sync/atomic"
+
+	"gvisor.dev/gvisor/pkg/log"
+	refs_vfs1 "gvisor.dev/gvisor/pkg/refs"
 )
 
 // ownerType is used to customize logging. Note that we use a pointer to T so
diff --git a/pkg/sentry/fsimpl/tmpfs/inode_refs.go b/pkg/sentry/fsimpl/tmpfs/inode_refs.go
index ff5e99c52c..dbf0b27661 100644
--- a/pkg/sentry/fsimpl/tmpfs/inode_refs.go
+++ b/pkg/sentry/fsimpl/tmpfs/inode_refs.go
@@ -2,10 +2,11 @@ package tmpfs
 
 import (
 	"fmt"
-	"gvisor.dev/gvisor/pkg/log"
-	refs_vfs1 "gvisor.dev/gvisor/pkg/refs"
 	"runtime"
 	"sync/atomic"
+
+	"gvisor.dev/gvisor/pkg/log"
+	refs_vfs1 "gvisor.dev/gvisor/pkg/refs"
 )
 
 // ownerType is used to customize logging. Note that we use a pointer to T so
diff --git a/pkg/sentry/kernel/fd_table_refs.go b/pkg/sentry/kernel/fd_table_refs.go
index a630289c9c..ecba138ac7 100644
--- a/pkg/sentry/kernel/fd_table_refs.go
+++ b/pkg/sentry/kernel/fd_table_refs.go
@@ -2,10 +2,11 @@ package kernel
 
 import (
 	"fmt"
-	"gvisor.dev/gvisor/pkg/log"
-	refs_vfs1 "gvisor.dev/gvisor/pkg/refs"
 	"runtime"
 	"sync/atomic"
+
+	"gvisor.dev/gvisor/pkg/log"
+	refs_vfs1 "gvisor.dev/gvisor/pkg/refs"
 )
 
 // ownerType is used to customize logging. Note that we use a pointer to T so
diff --git a/pkg/sentry/kernel/fs_context_refs.go b/pkg/sentry/kernel/fs_context_refs.go
index e8bb1e6ee9..fb2fde971b 100644
--- a/pkg/sentry/kernel/fs_context_refs.go
+++ b/pkg/sentry/kernel/fs_context_refs.go
@@ -2,10 +2,11 @@ package kernel
 
 import (
 	"fmt"
-	"gvisor.dev/gvisor/pkg/log"
-	refs_vfs1 "gvisor.dev/gvisor/pkg/refs"
 	"runtime"
 	"sync/atomic"
+
+	"gvisor.dev/gvisor/pkg/log"
+	refs_vfs1 "gvisor.dev/gvisor/pkg/refs"
 )
 
 // ownerType is used to customize logging. Note that we use a pointer to T so
diff --git a/pkg/sentry/kernel/process_group_refs.go b/pkg/sentry/kernel/process_group_refs.go
index 4b257d5487..4ed6e6458a 100644
--- a/pkg/sentry/kernel/process_group_refs.go
+++ b/pkg/sentry/kernel/process_group_refs.go
@@ -2,10 +2,11 @@ package kernel
 
 import (
 	"fmt"
-	"gvisor.dev/gvisor/pkg/log"
-	refs_vfs1 "gvisor.dev/gvisor/pkg/refs"
 	"runtime"
 	"sync/atomic"
+
+	"gvisor.dev/gvisor/pkg/log"
+	refs_vfs1 "gvisor.dev/gvisor/pkg/refs"
 )
 
 // ownerType is used to customize logging. Note that we use a pointer to T so
diff --git a/pkg/sentry/kernel/seqatomic_taskgoroutineschedinfo_unsafe.go b/pkg/sentry/kernel/seqatomic_taskgoroutineschedinfo_unsafe.go
index a37f74a10f..4e10436e6f 100644
--- a/pkg/sentry/kernel/seqatomic_taskgoroutineschedinfo_unsafe.go
+++ b/pkg/sentry/kernel/seqatomic_taskgoroutineschedinfo_unsafe.go
@@ -1,11 +1,12 @@
 package kernel
 
 import (
-	"fmt"
-	"gvisor.dev/gvisor/pkg/sync"
 	"reflect"
 	"strings"
 	"unsafe"
+
+	"fmt"
+	"gvisor.dev/gvisor/pkg/sync"
 )
 
 // SeqAtomicLoad returns a copy of *ptr, ensuring that the read does not race
diff --git a/pkg/sentry/kernel/session_refs.go b/pkg/sentry/kernel/session_refs.go
index 204fdd0607..f2e1bb7971 100644
--- a/pkg/sentry/kernel/session_refs.go
+++ b/pkg/sentry/kernel/session_refs.go
@@ -2,10 +2,11 @@ package kernel
 
 import (
 	"fmt"
-	"gvisor.dev/gvisor/pkg/log"
-	refs_vfs1 "gvisor.dev/gvisor/pkg/refs"
 	"runtime"
 	"sync/atomic"
+
+	"gvisor.dev/gvisor/pkg/log"
+	refs_vfs1 "gvisor.dev/gvisor/pkg/refs"
 )
 
 // ownerType is used to customize logging. Note that we use a pointer to T so
diff --git a/pkg/sentry/kernel/shm/shm_refs.go b/pkg/sentry/kernel/shm/shm_refs.go
index 4bffdd0b3a..51e07d0b3d 100644
--- a/pkg/sentry/kernel/shm/shm_refs.go
+++ b/pkg/sentry/kernel/shm/shm_refs.go
@@ -2,10 +2,11 @@ package shm
 
 import (
 	"fmt"
-	"gvisor.dev/gvisor/pkg/log"
-	refs_vfs1 "gvisor.dev/gvisor/pkg/refs"
 	"runtime"
 	"sync/atomic"
+
+	"gvisor.dev/gvisor/pkg/log"
+	refs_vfs1 "gvisor.dev/gvisor/pkg/refs"
 )
 
 // ownerType is used to customize logging. Note that we use a pointer to T so
diff --git a/pkg/sentry/mm/aio_mappable_refs.go b/pkg/sentry/mm/aio_mappable_refs.go
index 1417471370..b99909f077 100644
--- a/pkg/sentry/mm/aio_mappable_refs.go
+++ b/pkg/sentry/mm/aio_mappable_refs.go
@@ -2,10 +2,11 @@ package mm
 
 import (
 	"fmt"
-	"gvisor.dev/gvisor/pkg/log"
-	refs_vfs1 "gvisor.dev/gvisor/pkg/refs"
 	"runtime"
 	"sync/atomic"
+
+	"gvisor.dev/gvisor/pkg/log"
+	refs_vfs1 "gvisor.dev/gvisor/pkg/refs"
 )
 
 // ownerType is used to customize logging. Note that we use a pointer to T so
diff --git a/pkg/sentry/mm/special_mappable_refs.go b/pkg/sentry/mm/special_mappable_refs.go
index 0921a5d180..035bbe6907 100644
--- a/pkg/sentry/mm/special_mappable_refs.go
+++ b/pkg/sentry/mm/special_mappable_refs.go
@@ -2,10 +2,11 @@ package mm
 
 import (
 	"fmt"
-	"gvisor.dev/gvisor/pkg/log"
-	refs_vfs1 "gvisor.dev/gvisor/pkg/refs"
 	"runtime"
 	"sync/atomic"
+
+	"gvisor.dev/gvisor/pkg/log"
+	refs_vfs1 "gvisor.dev/gvisor/pkg/refs"
 )
 
 // ownerType is used to customize logging. Note that we use a pointer to T so
diff --git a/pkg/sentry/platform/ring0/defs_impl_amd64.go b/pkg/sentry/platform/ring0/defs_impl_amd64.go
index 50ac3040e3..029d699fe0 100644
--- a/pkg/sentry/platform/ring0/defs_impl_amd64.go
+++ b/pkg/sentry/platform/ring0/defs_impl_amd64.go
@@ -1,11 +1,11 @@
 package ring0
 
 import (
+	"gvisor.dev/gvisor/pkg/cpuid"
+	"gvisor.dev/gvisor/pkg/sentry/arch"
 	"gvisor.dev/gvisor/pkg/sentry/platform/ring0/pagetables"
 
 	"fmt"
-	"gvisor.dev/gvisor/pkg/cpuid"
-	"gvisor.dev/gvisor/pkg/sentry/arch"
 	"gvisor.dev/gvisor/pkg/usermem"
 	"io"
 	"reflect"
diff --git a/pkg/sentry/platform/ring0/defs_impl_arm64.go b/pkg/sentry/platform/ring0/defs_impl_arm64.go
index f567ef868f..9a83ad409b 100644
--- a/pkg/sentry/platform/ring0/defs_impl_arm64.go
+++ b/pkg/sentry/platform/ring0/defs_impl_arm64.go
@@ -1,13 +1,13 @@
 package ring0
 
 import (
+	"gvisor.dev/gvisor/pkg/sentry/arch"
 	"gvisor.dev/gvisor/pkg/sentry/platform/ring0/pagetables"
+	"io"
+	"reflect"
 
 	"fmt"
-	"gvisor.dev/gvisor/pkg/sentry/arch"
 	"gvisor.dev/gvisor/pkg/usermem"
-	"io"
-	"reflect"
 )
 
 // Useful bits.
diff --git a/pkg/sentry/socket/unix/socket_refs.go b/pkg/sentry/socket/unix/socket_refs.go
index 39aaedc7f4..dababb85f5 100644
--- a/pkg/sentry/socket/unix/socket_refs.go
+++ b/pkg/sentry/socket/unix/socket_refs.go
@@ -2,10 +2,11 @@ package unix
 
 import (
 	"fmt"
-	"gvisor.dev/gvisor/pkg/log"
-	refs_vfs1 "gvisor.dev/gvisor/pkg/refs"
 	"runtime"
 	"sync/atomic"
+
+	"gvisor.dev/gvisor/pkg/log"
+	refs_vfs1 "gvisor.dev/gvisor/pkg/refs"
 )
 
 // ownerType is used to customize logging. Note that we use a pointer to T so
diff --git a/pkg/sentry/socket/unix/transport/queue_refs.go b/pkg/sentry/socket/unix/transport/queue_refs.go
index 4c3dcd13fd..0d4e34988d 100644
--- a/pkg/sentry/socket/unix/transport/queue_refs.go
+++ b/pkg/sentry/socket/unix/transport/queue_refs.go
@@ -2,10 +2,11 @@ package transport
 
 import (
 	"fmt"
-	"gvisor.dev/gvisor/pkg/log"
-	refs_vfs1 "gvisor.dev/gvisor/pkg/refs"
 	"runtime"
 	"sync/atomic"
+
+	"gvisor.dev/gvisor/pkg/log"
+	refs_vfs1 "gvisor.dev/gvisor/pkg/refs"
 )
 
 // ownerType is used to customize logging. Note that we use a pointer to T so
diff --git a/pkg/sentry/time/seqatomic_parameters_unsafe.go b/pkg/sentry/time/seqatomic_parameters_unsafe.go
index 88d6b55696..5eb560f575 100644
--- a/pkg/sentry/time/seqatomic_parameters_unsafe.go
+++ b/pkg/sentry/time/seqatomic_parameters_unsafe.go
@@ -1,11 +1,12 @@
 package time
 
 import (
-	"fmt"
-	"gvisor.dev/gvisor/pkg/sync"
 	"reflect"
 	"strings"
 	"unsafe"
+
+	"fmt"
+	"gvisor.dev/gvisor/pkg/sync"
 )
 
 // SeqAtomicLoad returns a copy of *ptr, ensuring that the read does not race
diff --git a/pkg/sentry/vfs/file_description_refs.go b/pkg/sentry/vfs/file_description_refs.go
index 6c7747259a..bdd7e65548 100644
--- a/pkg/sentry/vfs/file_description_refs.go
+++ b/pkg/sentry/vfs/file_description_refs.go
@@ -2,10 +2,11 @@ package vfs
 
 import (
 	"fmt"
-	"gvisor.dev/gvisor/pkg/log"
-	refs_vfs1 "gvisor.dev/gvisor/pkg/refs"
 	"runtime"
 	"sync/atomic"
+
+	"gvisor.dev/gvisor/pkg/log"
+	refs_vfs1 "gvisor.dev/gvisor/pkg/refs"
 )
 
 // ownerType is used to customize logging. Note that we use a pointer to T so
diff --git a/pkg/sentry/vfs/filesystem_refs.go b/pkg/sentry/vfs/filesystem_refs.go
index 96f6818315..38a9a986fd 100644
--- a/pkg/sentry/vfs/filesystem_refs.go
+++ b/pkg/sentry/vfs/filesystem_refs.go
@@ -2,10 +2,11 @@ package vfs
 
 import (
 	"fmt"
-	"gvisor.dev/gvisor/pkg/log"
-	refs_vfs1 "gvisor.dev/gvisor/pkg/refs"
 	"runtime"
 	"sync/atomic"
+
+	"gvisor.dev/gvisor/pkg/log"
+	refs_vfs1 "gvisor.dev/gvisor/pkg/refs"
 )
 
 // ownerType is used to customize logging. Note that we use a pointer to T so
diff --git a/pkg/sentry/vfs/mount_namespace_refs.go b/pkg/sentry/vfs/mount_namespace_refs.go
index 4c422c81fc..63285fb8eb 100644
--- a/pkg/sentry/vfs/mount_namespace_refs.go
+++ b/pkg/sentry/vfs/mount_namespace_refs.go
@@ -2,10 +2,11 @@ package vfs
 
 import (
 	"fmt"
-	"gvisor.dev/gvisor/pkg/log"
-	refs_vfs1 "gvisor.dev/gvisor/pkg/refs"
 	"runtime"
 	"sync/atomic"
+
+	"gvisor.dev/gvisor/pkg/log"
+	refs_vfs1 "gvisor.dev/gvisor/pkg/refs"
 )
 
 // ownerType is used to customize logging. Note that we use a pointer to T so
diff --git a/pkg/tcpip/link/tun/tun_endpoint_refs.go b/pkg/tcpip/link/tun/tun_endpoint_refs.go
index 9a38142f5e..e0595429c8 100644
--- a/pkg/tcpip/link/tun/tun_endpoint_refs.go
+++ b/pkg/tcpip/link/tun/tun_endpoint_refs.go
@@ -2,10 +2,11 @@ package tun
 
 import (
 	"fmt"
-	"gvisor.dev/gvisor/pkg/log"
-	refs_vfs1 "gvisor.dev/gvisor/pkg/refs"
 	"runtime"
 	"sync/atomic"
+
+	"gvisor.dev/gvisor/pkg/log"
+	refs_vfs1 "gvisor.dev/gvisor/pkg/refs"
 )
 
 // ownerType is used to customize logging. Note that we use a pointer to T so
diff --git a/runsc/cmd/gofer.go b/runsc/cmd/gofer.go
index 371fcc0ae9..bba00d5511 100644
--- a/runsc/cmd/gofer.go
+++ b/runsc/cmd/gofer.go
@@ -62,8 +62,9 @@ type Gofer struct {
 	applyCaps bool
 	setUpRoot bool
 
-	specFD   int
-	mountsFD int
+	panicOnWrite bool
+	specFD       int
+	mountsFD     int
 }
 
 // Name implements subcommands.Command.
@@ -86,6 +87,7 @@ func (g *Gofer) SetFlags(f *flag.FlagSet) {
 	f.StringVar(&g.bundleDir, "bundle", "", "path to the root of the bundle directory, defaults to the current directory")
 	f.Var(&g.ioFDs, "io-fds", "list of FDs to connect 9P servers. They must follow this order: root first, then mounts as defined in the spec")
 	f.BoolVar(&g.applyCaps, "apply-caps", true, "if true, apply capabilities to restrict what the Gofer process can do")
+	f.BoolVar(&g.panicOnWrite, "panic-on-write", false, "if true, panics on attempts to write to RO mounts. RW mounts are unnaffected")
 	f.BoolVar(&g.setUpRoot, "setup-root", true, "if true, set up an empty root for the process")
 	f.IntVar(&g.specFD, "spec-fd", -1, "required fd with the container spec")
 	f.IntVar(&g.mountsFD, "mounts-fd", -1, "mountsFD is the file descriptor to write list of mounts after they have been resolved (direct paths, no symlinks).")
@@ -166,7 +168,8 @@ func (g *Gofer) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
 	// Start with root mount, then add any other additional mount as needed.
 	ats := make([]p9.Attacher, 0, len(spec.Mounts)+1)
 	ap, err := fsgofer.NewAttachPoint("/", fsgofer.Config{
-		ROMount: spec.Root.Readonly || conf.Overlay,
+		ROMount:      spec.Root.Readonly || conf.Overlay,
+		PanicOnWrite: g.panicOnWrite,
 	})
 	if err != nil {
 		Fatalf("creating attach point: %v", err)
@@ -178,8 +181,9 @@ func (g *Gofer) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
 	for _, m := range spec.Mounts {
 		if specutils.Is9PMount(m) {
 			cfg := fsgofer.Config{
-				ROMount: isReadonlyMount(m.Options) || conf.Overlay,
-				HostUDS: conf.FSGoferHostUDS,
+				ROMount:      isReadonlyMount(m.Options) || conf.Overlay,
+				PanicOnWrite: g.panicOnWrite,
+				HostUDS:      conf.FSGoferHostUDS,
 			}
 			ap, err := fsgofer.NewAttachPoint(m.Destination, cfg)
 			if err != nil {
@@ -312,7 +316,6 @@ func setupRootFS(spec *specs.Spec, conf *config.Config) error {
 		if err != nil {
 			return fmt.Errorf("resolving symlinks to %q: %v", spec.Process.Cwd, err)
 		}
-		log.Infof("Create working directory %q if needed", spec.Process.Cwd)
 		if err := os.MkdirAll(dst, 0755); err != nil {
 			return fmt.Errorf("creating working directory %q: %v", spec.Process.Cwd, err)
 		}
diff --git a/runsc/container/container.go b/runsc/container/container.go
index 63478ba8c1..6e1d6a5680 100644
--- a/runsc/container/container.go
+++ b/runsc/container/container.go
@@ -902,6 +902,9 @@ func (c *Container) createGoferProcess(spec *specs.Spec, conf *config.Config, bu
 	}
 
 	args = append(args, "gofer", "--bundle", bundleDir)
+	if conf.Overlay {
+		args = append(args, "--panic-on-write=true")
+	}
 
 	// Open the spec file to donate to the sandbox.
 	specFile, err := specutils.OpenSpec(bundleDir)
diff --git a/runsc/fsgofer/fsgofer.go b/runsc/fsgofer/fsgofer.go
index 0b628c8ce6..4268d97a15 100644
--- a/runsc/fsgofer/fsgofer.go
+++ b/runsc/fsgofer/fsgofer.go
@@ -1181,6 +1181,9 @@ func extractErrno(err error) unix.Errno {
 
 func (l *localFile) checkROMount() error {
 	if conf := l.attachPoint.conf; conf.ROMount {
+		if conf.PanicOnWrite {
+			panic("attempt to write to RO mount")
+		}
 		return unix.EROFS
 	}
 	return nil

From d99063318481128c2151cd1901bff5b4aa684c3e Mon Sep 17 00:00:00 2001
From: gVisor bot <gvisor-bot@google.com>
Date: Mon, 28 Sep 2020 23:49:57 +0000
Subject: [PATCH 206/211] Merge release-20200921.0-57-g237b761f9 (automated)

---
 pkg/sentry/socket/hostinet/socket_vfs2.go  | 1 -
 pkg/sentry/socket/netlink/provider_vfs2.go | 1 -
 pkg/sentry/socket/unix/unix_vfs2.go        | 1 -
 3 files changed, 3 deletions(-)

diff --git a/pkg/sentry/socket/hostinet/socket_vfs2.go b/pkg/sentry/socket/hostinet/socket_vfs2.go
index 87b077e688..97bc6027f1 100644
--- a/pkg/sentry/socket/hostinet/socket_vfs2.go
+++ b/pkg/sentry/socket/hostinet/socket_vfs2.go
@@ -52,7 +52,6 @@ var _ = socket.SocketVFS2(&socketVFS2{})
 func newVFS2Socket(t *kernel.Task, family int, stype linux.SockType, protocol int, fd int, flags uint32) (*vfs.FileDescription, *syserr.Error) {
 	mnt := t.Kernel().SocketMount()
 	d := sockfs.NewDentry(t.Credentials(), mnt)
-	defer d.DecRef(t)
 
 	s := &socketVFS2{
 		socketOpsCommon: socketOpsCommon{
diff --git a/pkg/sentry/socket/netlink/provider_vfs2.go b/pkg/sentry/socket/netlink/provider_vfs2.go
index e8930f031f..bb205be0d2 100644
--- a/pkg/sentry/socket/netlink/provider_vfs2.go
+++ b/pkg/sentry/socket/netlink/provider_vfs2.go
@@ -52,7 +52,6 @@ func (*socketProviderVFS2) Socket(t *kernel.Task, stype linux.SockType, protocol
 	vfsfd := &s.vfsfd
 	mnt := t.Kernel().SocketMount()
 	d := sockfs.NewDentry(t.Credentials(), mnt)
-	defer d.DecRef(t)
 	if err := vfsfd.Init(s, linux.O_RDWR, mnt, d, &vfs.FileDescriptionOptions{
 		DenyPRead:         true,
 		DenyPWrite:        true,
diff --git a/pkg/sentry/socket/unix/unix_vfs2.go b/pkg/sentry/socket/unix/unix_vfs2.go
index 8b1abd9223..b76c0e6fb7 100644
--- a/pkg/sentry/socket/unix/unix_vfs2.go
+++ b/pkg/sentry/socket/unix/unix_vfs2.go
@@ -55,7 +55,6 @@ var _ = socket.SocketVFS2(&SocketVFS2{})
 func NewSockfsFile(t *kernel.Task, ep transport.Endpoint, stype linux.SockType) (*vfs.FileDescription, *syserr.Error) {
 	mnt := t.Kernel().SocketMount()
 	d := sockfs.NewDentry(t.Credentials(), mnt)
-	defer d.DecRef(t)
 
 	fd, err := NewFileDescription(ep, stype, linux.O_RDWR, mnt, d, &vfs.FileLocks{})
 	if err != nil {

From 0452586453638d8faac1606dd236d45adf8424ca Mon Sep 17 00:00:00 2001
From: gVisor bot <gvisor-bot@google.com>
Date: Thu, 7 Jan 2021 19:30:09 +0000
Subject: [PATCH 207/211] Merge release-20201208.0-123-g4c5f36e7b (automated)


From bc8649720ff020c7c40825792c94ab9448053749 Mon Sep 17 00:00:00 2001
From: gVisor bot <gvisor-bot@google.com>
Date: Tue, 14 Sep 2021 23:07:30 +0000
Subject: [PATCH 208/211] Merge release-20210906.0-17-g2b46e2d19 (automated)

---
 pkg/tcpip/link/rawfile/blockingpoll_amd64.s |  2 +-
 pkg/tcpip/link/rawfile/blockingpoll_arm64.s |  2 +-
 pkg/tcpip/link/rawfile/rawfile_unsafe.go    | 16 +---------------
 3 files changed, 3 insertions(+), 17 deletions(-)

diff --git a/pkg/tcpip/link/rawfile/blockingpoll_amd64.s b/pkg/tcpip/link/rawfile/blockingpoll_amd64.s
index f2c2307209..298bad55db 100644
--- a/pkg/tcpip/link/rawfile/blockingpoll_amd64.s
+++ b/pkg/tcpip/link/rawfile/blockingpoll_amd64.s
@@ -27,7 +27,7 @@ TEXT ·BlockingPoll(SB),NOSPLIT,$0-40
 	MOVQ	$0x0, R10  // sigmask parameter which isn't used here
 	MOVQ	$0x10f, AX // SYS_PPOLL
 	SYSCALL
-	CMPQ	AX, $0xfffffffffffff002
+	CMPQ	AX, $0xfffffffffffff001
 	JLS	ok
 	MOVQ	$-1, n+24(FP)
 	NEGQ	AX
diff --git a/pkg/tcpip/link/rawfile/blockingpoll_arm64.s b/pkg/tcpip/link/rawfile/blockingpoll_arm64.s
index 8807586c72..b62888b93f 100644
--- a/pkg/tcpip/link/rawfile/blockingpoll_arm64.s
+++ b/pkg/tcpip/link/rawfile/blockingpoll_arm64.s
@@ -27,7 +27,7 @@ TEXT ·BlockingPoll(SB),NOSPLIT,$0-40
 	MOVD	$0x0, R3  // sigmask parameter which isn't used here
 	MOVD	$0x49, R8 // SYS_PPOLL
 	SVC
-	CMP	$0xfffffffffffff002, R0
+	CMP	$0xfffffffffffff001, R0
 	BLS	ok
 	MOVD	$-1, R1
 	MOVD	R1, n+24(FP)
diff --git a/pkg/tcpip/link/rawfile/rawfile_unsafe.go b/pkg/tcpip/link/rawfile/rawfile_unsafe.go
index 87a0b9a627..e76fc55b66 100644
--- a/pkg/tcpip/link/rawfile/rawfile_unsafe.go
+++ b/pkg/tcpip/link/rawfile/rawfile_unsafe.go
@@ -181,9 +181,7 @@ func BlockingReadvUntilStopped(efd int, fd int, iovecs []unix.Iovec) (int, tcpip
 		if e == 0 {
 			return int(n), nil
 		}
-		if e != 0 && e != unix.EWOULDBLOCK {
-			return 0, TranslateErrno(e)
-		}
+
 		stopped, e := BlockingPollUntilStopped(efd, fd, unix.POLLIN)
 		if stopped {
 			return -1, nil
@@ -206,10 +204,6 @@ func BlockingRecvMMsgUntilStopped(efd int, fd int, msgHdrs []MMsgHdr) (int, tcpi
 			return int(n), nil
 		}
 
-		if e != 0 && e != unix.EWOULDBLOCK {
-			return 0, TranslateErrno(e)
-		}
-
 		stopped, e := BlockingPollUntilStopped(efd, fd, unix.POLLIN)
 		if stopped {
 			return -1, nil
@@ -234,13 +228,5 @@ func BlockingPollUntilStopped(efd int, fd int, events int16) (bool, unix.Errno)
 		},
 	}
 	_, errno := BlockingPoll(&pevents[0], len(pevents), nil)
-	if errno != 0 {
-		return pevents[0].Revents&unix.POLLIN != 0, errno
-	}
-
-	if pevents[1].Revents&unix.POLLHUP != 0 || pevents[1].Revents&unix.POLLERR != 0 {
-		errno = unix.ECONNRESET
-	}
-
 	return pevents[0].Revents&unix.POLLIN != 0, errno
 }

From 22b3672d278b9ec11565122e783f1f5867e68c5a Mon Sep 17 00:00:00 2001
From: gVisor bot <gvisor-bot@google.com>
Date: Wed, 20 Jul 2022 21:53:44 +0000
Subject: [PATCH 209/211] Merge release-20220718.0-10-gbe38a15c1 (automated)

---
 pkg/tcpip/network/ipv6/ipv6.go | 781 ++++++++++++++++-----------------
 1 file changed, 374 insertions(+), 407 deletions(-)

diff --git a/pkg/tcpip/network/ipv6/ipv6.go b/pkg/tcpip/network/ipv6/ipv6.go
index 6b3f2eb563..aca36a87c9 100644
--- a/pkg/tcpip/network/ipv6/ipv6.go
+++ b/pkg/tcpip/network/ipv6/ipv6.go
@@ -1322,84 +1322,14 @@ func (e *endpoint) deliverPacketLocally(h header.IPv6, pkt *stack.PacketBuffer,
 	_ = e.processExtensionHeaders(h, pkt, false /* forwarding */)
 }
 
-func (e *endpoint) processExtensionHeader(it *header.IPv6PayloadIterator, pkt **stack.PacketBuffer, h header.IPv6, routerAlert **header.IPv6RouterAlertOption, hasFragmentHeader *bool, forwarding bool) (bool, error) {
-	stats := e.stats.ip
-	dstAddr := h.DestinationAddress()
-	// Keep track of the start of the previous header so we can report the
-	// special case of a Hop by Hop at a location other than at the start.
-	previousHeaderStart := it.HeaderOffset()
-	extHdr, done, err := it.Next()
-	if err != nil {
-		stats.MalformedPacketsReceived.Increment()
-		return true, err
-	}
-	if done {
-		return true, nil
-	}
-
-	// As per RFC 8200, section 4:
-	//
-	//   Extension headers (except for the Hop-by-Hop Options header) are
-	//   not processed, inserted, or deleted by any node along a packet's
-	//   delivery path until the packet reaches the node identified in the
-	//   Destination Address field of the IPv6 header.
-	//
-	// Furthermore, as per RFC 8200 section 4.1, the Hop By Hop extension
-	// header is restricted to appear first in the list of extension headers.
-	//
-	// Therefore, we can immediately return once we hit any header other
-	// than the Hop-by-Hop header while forwarding a packet.
-	if forwarding {
-		if _, ok := extHdr.(header.IPv6HopByHopOptionsExtHdr); !ok {
-			return true, nil
-		}
-	}
-
-	switch extHdr := extHdr.(type) {
-	case header.IPv6HopByHopOptionsExtHdr:
-		if err := e.processIPv6HopByHopOptionsExtHdr(&extHdr, it, *pkt, dstAddr, routerAlert, previousHeaderStart, forwarding); err != nil {
-			return true, err
-		}
-	case header.IPv6RoutingExtHdr:
-		if err := e.processIPv6RoutingExtHeader(&extHdr, it, *pkt); err != nil {
-			return true, err
-		}
-	case header.IPv6FragmentExtHdr:
-		*hasFragmentHeader = true
-		if extHdr.IsAtomic() {
-			// This fragment extension header indicates that this packet is an
-			// atomic fragment. An atomic fragment is a fragment that contains
-			// all the data required to reassemble a full packet. As per RFC 6946,
-			// atomic fragments must not interfere with "normal" fragmented traffic
-			// so we skip processing the fragment instead of feeding it through the
-			// reassembly process below.
-			return false, nil
-		}
-
-		if err := e.processFragmentExtHdr(&extHdr, it, pkt, h); err != nil {
-			return true, err
-		}
-	case header.IPv6DestinationOptionsExtHdr:
-		if err := e.processIPv6DestinationOptionsExtHdr(&extHdr, it, *pkt, dstAddr); err != nil {
-			return true, err
-		}
-	case header.IPv6RawPayloadHeader:
-		if err := e.processIPv6RawPayloadHeader(&extHdr, it, *pkt, *routerAlert, previousHeaderStart, *hasFragmentHeader); err != nil {
-			return true, err
-		}
-	default:
-		// Since the iterator returns IPv6RawPayloadHeader for unknown Extension
-		// Header IDs this should never happen unless we missed a supported type
-		// here.
-		panic(fmt.Sprintf("unrecognized type from it.Next() = %T", extHdr))
-	}
-	return false, nil
-}
-
 // processExtensionHeaders processes the extension headers in the given packet.
 // Returns an error if the processing of a header failed or if the packet should
 // be discarded.
 func (e *endpoint) processExtensionHeaders(h header.IPv6, pkt *stack.PacketBuffer, forwarding bool) error {
+	stats := e.stats.ip
+	srcAddr := h.SourceAddress()
+	dstAddr := h.DestinationAddress()
+
 	// Create a VV to parse the packet. We don't plan to modify anything here.
 	// vv consists of:
 	//	- Any IPv6 header bytes after the first 40 (i.e. extensions).
@@ -1411,137 +1341,25 @@ func (e *endpoint) processExtensionHeaders(h header.IPv6, pkt *stack.PacketBuffe
 	buf.Merge(&dataBuf)
 	it := header.MakeIPv6PayloadIterator(header.IPv6ExtensionHeaderIdentifier(h.NextHeader()), buf)
 
-	// Add a reference to pkt because fragment header processing can replace this
-	// packet with a new one that has an extra reference. Adding a reference here
-	// keeps the two in parity so they can both be DecRef'd the same way.
-	pkt.IncRef()
-	defer func() {
-		pkt.DecRef()
-	}()
-
 	var (
 		hasFragmentHeader bool
 		routerAlert       *header.IPv6RouterAlertOption
+		// Create an extra packet buffer reference to keep track of the packet to
+		// DecRef so that we do not incur a memory allocation for deferring a DecRef
+		// within the loop.
+		resPktToDecRef *stack.PacketBuffer
 	)
-	for {
-		if done, err := e.processExtensionHeader(&it, &pkt, h, &routerAlert, &hasFragmentHeader, forwarding); err != nil || done {
-			return err
-		}
-	}
-}
-
-func (e *endpoint) processIPv6RawPayloadHeader(extHdr *header.IPv6RawPayloadHeader, it *header.IPv6PayloadIterator, pkt *stack.PacketBuffer, routerAlert *header.IPv6RouterAlertOption, previousHeaderStart uint32, hasFragmentHeader bool) error {
-	stats := e.stats.ip
-	// If the last header in the payload isn't a known IPv6 extension header,
-	// handle it as if it is transport layer data.å
-
-	// Calculate the number of octets parsed from data. We want to consume all
-	// the data except the unparsed portion located at the end, whose size is
-	// extHdr.Buf.Size().
-	trim := pkt.Data().Size() - int(extHdr.Buf.Size())
-
-	// For unfragmented packets, extHdr still contains the transport header.
-	// Consume that too.
-	//
-	// For reassembled fragments, pkt.TransportHeader is unset, so this is a
-	// no-op and pkt.Data begins with the transport header.
-	trim += len(pkt.TransportHeader().View())
-
-	if _, ok := pkt.Data().Consume(trim); !ok {
-		stats.MalformedPacketsReceived.Increment()
-		return fmt.Errorf("could not consume %d bytes", trim)
-	}
-
-	proto := tcpip.TransportProtocolNumber(extHdr.Identifier)
-	// If the packet was reassembled from a fragment, it will not have a
-	// transport header set yet.
-	if len(pkt.TransportHeader().View()) == 0 {
-		e.protocol.parseTransport(pkt, proto)
-	}
-
-	stats.PacketsDelivered.Increment()
-	if proto == header.ICMPv6ProtocolNumber {
-		e.handleICMP(pkt, hasFragmentHeader, routerAlert)
-		return nil
-	}
-	stats.PacketsDelivered.Increment()
-	switch res := e.dispatcher.DeliverTransportPacket(proto, pkt); res {
-	case stack.TransportPacketHandled:
-		return nil
-	case stack.TransportPacketDestinationPortUnreachable:
-		// As per RFC 4443 section 3.1:
-		//   A destination node SHOULD originate a Destination Unreachable
-		//   message with Code 4 in response to a packet for which the
-		//   transport protocol (e.g., UDP) has no listener, if that transport
-		//   protocol has no alternative means to inform the sender.
-		_ = e.protocol.returnError(&icmpReasonPortUnreachable{}, pkt, true /* deliveredLocally */)
-		return fmt.Errorf("destination port unreachable")
-	case stack.TransportPacketProtocolUnreachable:
-		// As per RFC 8200 section 4. (page 7):
-		//   Extension headers are numbered from IANA IP Protocol Numbers
-		//   [IANA-PN], the same values used for IPv4 and IPv6.  When
-		//   processing a sequence of Next Header values in a packet, the
-		//   first one that is not an extension header [IANA-EH] indicates
-		//   that the next item in the packet is the corresponding upper-layer
-		//   header.
-		// With more related information on page 8:
-		//   If, as a result of processing a header, the destination node is
-		//   required to proceed to the next header but the Next Header value
-		//   in the current header is unrecognized by the node, it should
-		//   discard the packet and send an ICMP Parameter Problem message to
-		//   the source of the packet, with an ICMP Code value of 1
-		//   ("unrecognized Next Header type encountered") and the ICMP
-		//   Pointer field containing the offset of the unrecognized value
-		//   within the original packet.
-		//
-		// Which when taken together indicate that an unknown protocol should
-		// be treated as an unrecognized next header value.
-		// The location of the Next Header field is in a different place in
-		// the initial IPv6 header than it is in the extension headers so
-		// treat it specially.
-		prevHdrIDOffset := uint32(header.IPv6NextHeaderOffset)
-		if previousHeaderStart != 0 {
-			prevHdrIDOffset = previousHeaderStart
+	defer func() {
+		if resPktToDecRef != nil {
+			resPktToDecRef.DecRef()
 		}
-		_ = e.protocol.returnError(&icmpReasonParameterProblem{
-			code:    header.ICMPv6UnknownHeader,
-			pointer: prevHdrIDOffset,
-		}, pkt, true /* deliveredLocally */)
-		return fmt.Errorf("transport protocol unreachable")
-	default:
-		panic(fmt.Sprintf("unrecognized result from DeliverTransportPacket = %d", res))
-	}
-}
-
-func (e *endpoint) processIPv6RoutingExtHeader(extHdr *header.IPv6RoutingExtHdr, it *header.IPv6PayloadIterator, pkt *stack.PacketBuffer) error {
-	// As per RFC 8200 section 4.4, if a node encounters a routing header with
-	// an unrecognized routing type value, with a non-zero Segments Left
-	// value, the node must discard the packet and send an ICMP Parameter
-	// Problem, Code 0 to the packet's Source Address, pointing to the
-	// unrecognized Routing Type.
-	//
-	// If the Segments Left is 0, the node must ignore the Routing extension
-	// header and process the next header in the packet.
-	//
-	// Note, the stack does not yet handle any type of routing extension
-	// header, so we just make sure Segments Left is zero before processing
-	// the next extension header.
-	if extHdr.SegmentsLeft() == 0 {
-		return nil
-	}
-	_ = e.protocol.returnError(&icmpReasonParameterProblem{
-		code:    header.ICMPv6ErroneousHeader,
-		pointer: it.ParseOffset(),
-	}, pkt, true /* deliveredLocally */)
-	return fmt.Errorf("found unrecognized routing type with non-zero segments left in header = %#v", extHdr)
-}
-
-func (e *endpoint) processIPv6DestinationOptionsExtHdr(extHdr *header.IPv6DestinationOptionsExtHdr, it *header.IPv6PayloadIterator, pkt *stack.PacketBuffer, dstAddr tcpip.Address) error {
-	stats := e.stats.ip
-	optsIt := extHdr.Iter()
+	}()
 
 	for {
-		opt, done, err := optsIt.Next()
+		// Keep track of the start of the previous header so we can report the
+		// special case of a Hop by Hop at a location other than at the start.
+		previousHeaderStart := it.HeaderOffset()
+		extHdr, done, err := it.Next()
 		if err != nil {
 			stats.MalformedPacketsReceived.Increment()
 			return err
@@ -1550,239 +1368,388 @@ func (e *endpoint) processIPv6DestinationOptionsExtHdr(extHdr *header.IPv6Destin
 			break
 		}
 
-		// We currently do not support any IPv6 Destination extension header
-		// options.
-		switch opt.UnknownAction() {
-		case header.IPv6OptionUnknownActionSkip:
-		case header.IPv6OptionUnknownActionDiscard:
-			return fmt.Errorf("found unknown destination header option = %#v with discard action", opt)
-		case header.IPv6OptionUnknownActionDiscardSendICMPNoMulticastDest:
-			if header.IsV6MulticastAddress(dstAddr) {
-				return fmt.Errorf("found unknown destination header option %#v with discard action", opt)
+		// As per RFC 8200, section 4:
+		//
+		//   Extension headers (except for the Hop-by-Hop Options header) are
+		//   not processed, inserted, or deleted by any node along a packet's
+		//   delivery path until the packet reaches the node identified in the
+		//   Destination Address field of the IPv6 header.
+		//
+		// Furthermore, as per RFC 8200 section 4.1, the Hop By Hop extension
+		// header is restricted to appear first in the list of extension headers.
+		//
+		// Therefore, we can immediately return once we hit any header other
+		// than the Hop-by-Hop header while forwarding a packet.
+		if forwarding {
+			if _, ok := extHdr.(header.IPv6HopByHopOptionsExtHdr); !ok {
+				return nil
 			}
-			fallthrough
-		case header.IPv6OptionUnknownActionDiscardSendICMP:
-			// This case satisfies a requirement of RFC 8200 section 4.2
-			// which states that an unknown option starting with bits [10] should:
-			//
-			//    discard the packet and, regardless of whether or not the
-			//    packet's Destination Address was a multicast address, send an
-			//    ICMP Parameter Problem, Code 2, message to the packet's
-			//    Source Address, pointing to the unrecognized Option Type.
-			//
-			_ = e.protocol.returnError(&icmpReasonParameterProblem{
-				code:               header.ICMPv6UnknownOption,
-				pointer:            it.ParseOffset() + optsIt.OptionOffset(),
-				respondToMulticast: true,
-			}, pkt, true /* deliveredLocally */)
-			return fmt.Errorf("found unknown destination header option %#v with discard action", opt)
-		default:
-			panic(fmt.Sprintf("unrecognized action for an unrecognized Destination extension header option = %#v", opt))
 		}
-	}
-	return nil
-}
 
-func (e *endpoint) processIPv6HopByHopOptionsExtHdr(extHdr *header.IPv6HopByHopOptionsExtHdr, it *header.IPv6PayloadIterator, pkt *stack.PacketBuffer, dstAddr tcpip.Address, routerAlert **header.IPv6RouterAlertOption, previousHeaderStart uint32, forwarding bool) error {
-	stats := e.stats.ip
-	// As per RFC 8200 section 4.1, the Hop By Hop extension header is
-	// restricted to appear immediately after an IPv6 fixed header.
-	if previousHeaderStart != 0 {
-		_ = e.protocol.returnError(&icmpReasonParameterProblem{
-			code:    header.ICMPv6UnknownHeader,
-			pointer: previousHeaderStart,
-		}, pkt, !forwarding /* deliveredLocally */)
-		return fmt.Errorf("found Hop-by-Hop header = %#v with non-zero previous header offset = %d", extHdr, previousHeaderStart)
-	}
+		switch extHdr := extHdr.(type) {
+		case header.IPv6HopByHopOptionsExtHdr:
+			// As per RFC 8200 section 4.1, the Hop By Hop extension header is
+			// restricted to appear immediately after an IPv6 fixed header.
+			if previousHeaderStart != 0 {
+				_ = e.protocol.returnError(&icmpReasonParameterProblem{
+					code:    header.ICMPv6UnknownHeader,
+					pointer: previousHeaderStart,
+				}, pkt, !forwarding /* deliveredLocally */)
+				return fmt.Errorf("found Hop-by-Hop header = %#v with non-zero previous header offset = %d", extHdr, previousHeaderStart)
+			}
 
-	optsIt := extHdr.Iter()
+			optsIt := extHdr.Iter()
 
-	for {
-		opt, done, err := optsIt.Next()
-		if err != nil {
-			stats.MalformedPacketsReceived.Increment()
-			return err
-		}
-		if done {
-			break
-		}
+			for {
+				opt, done, err := optsIt.Next()
+				if err != nil {
+					stats.MalformedPacketsReceived.Increment()
+					return err
+				}
+				if done {
+					break
+				}
 
-		switch opt := opt.(type) {
-		case *header.IPv6RouterAlertOption:
-			if *routerAlert != nil {
-				// As per RFC 2711 section 3, there should be at most one Router
-				// Alert option per packet.
-				//
-				//    There MUST only be one option of this type, regardless of
-				//    value, per Hop-by-Hop header.
-				stats.MalformedPacketsReceived.Increment()
-				return fmt.Errorf("found multiple Router Alert options (%#v, %#v)", opt, *routerAlert)
+				switch opt := opt.(type) {
+				case *header.IPv6RouterAlertOption:
+					if routerAlert != nil {
+						// As per RFC 2711 section 3, there should be at most one Router
+						// Alert option per packet.
+						//
+						//    There MUST only be one option of this type, regardless of
+						//    value, per Hop-by-Hop header.
+						stats.MalformedPacketsReceived.Increment()
+						return fmt.Errorf("found multiple Router Alert options (%#v, %#v)", opt, routerAlert)
+					}
+					routerAlert = opt
+					stats.OptionRouterAlertReceived.Increment()
+				default:
+					switch opt.UnknownAction() {
+					case header.IPv6OptionUnknownActionSkip:
+					case header.IPv6OptionUnknownActionDiscard:
+						return fmt.Errorf("found unknown Hop-by-Hop header option = %#v with discard action", opt)
+					case header.IPv6OptionUnknownActionDiscardSendICMPNoMulticastDest:
+						if header.IsV6MulticastAddress(dstAddr) {
+							return fmt.Errorf("found unknown hop-by-hop header option = %#v with discard action", opt)
+						}
+						fallthrough
+					case header.IPv6OptionUnknownActionDiscardSendICMP:
+						// This case satisfies a requirement of RFC 8200 section 4.2 which
+						// states that an unknown option starting with bits [10] should:
+						//
+						//    discard the packet and, regardless of whether or not the
+						//    packet's Destination Address was a multicast address, send an
+						//    ICMP Parameter Problem, Code 2, message to the packet's
+						//    Source Address, pointing to the unrecognized Option Type.
+						_ = e.protocol.returnError(&icmpReasonParameterProblem{
+							code:               header.ICMPv6UnknownOption,
+							pointer:            it.ParseOffset() + optsIt.OptionOffset(),
+							respondToMulticast: true,
+						}, pkt, !forwarding /* deliveredLocally */)
+						return fmt.Errorf("found unknown hop-by-hop header option = %#v with discard action", opt)
+					default:
+						panic(fmt.Sprintf("unrecognized action for an unrecognized Hop By Hop extension header option = %#v", opt))
+					}
+				}
 			}
-			*routerAlert = opt
-			stats.OptionRouterAlertReceived.Increment()
-		default:
-			switch opt.UnknownAction() {
-			case header.IPv6OptionUnknownActionSkip:
-			case header.IPv6OptionUnknownActionDiscard:
-				return fmt.Errorf("found unknown Hop-by-Hop header option = %#v with discard action", opt)
-			case header.IPv6OptionUnknownActionDiscardSendICMPNoMulticastDest:
-				if header.IsV6MulticastAddress(dstAddr) {
-					return fmt.Errorf("found unknown hop-by-hop header option = %#v with discard action", opt)
+
+		case header.IPv6RoutingExtHdr:
+			// As per RFC 8200 section 4.4, if a node encounters a routing header with
+			// an unrecognized routing type value, with a non-zero Segments Left
+			// value, the node must discard the packet and send an ICMP Parameter
+			// Problem, Code 0 to the packet's Source Address, pointing to the
+			// unrecognized Routing Type.
+			//
+			// If the Segments Left is 0, the node must ignore the Routing extension
+			// header and process the next header in the packet.
+			//
+			// Note, the stack does not yet handle any type of routing extension
+			// header, so we just make sure Segments Left is zero before processing
+			// the next extension header.
+			if extHdr.SegmentsLeft() != 0 {
+				_ = e.protocol.returnError(&icmpReasonParameterProblem{
+					code:    header.ICMPv6ErroneousHeader,
+					pointer: it.ParseOffset(),
+				}, pkt, true /* deliveredLocally */)
+				return fmt.Errorf("found unrecognized routing type with non-zero segments left in header = %#v", extHdr)
+			}
+
+		case header.IPv6FragmentExtHdr:
+			hasFragmentHeader = true
+
+			if extHdr.IsAtomic() {
+				// This fragment extension header indicates that this packet is an
+				// atomic fragment. An atomic fragment is a fragment that contains
+				// all the data required to reassemble a full packet. As per RFC 6946,
+				// atomic fragments must not interfere with "normal" fragmented traffic
+				// so we skip processing the fragment instead of feeding it through the
+				// reassembly process below.
+				continue
+			}
+
+			fragmentFieldOffset := it.ParseOffset()
+
+			// Don't consume the iterator if we have the first fragment because we
+			// will use it to validate that the first fragment holds the upper layer
+			// header.
+			rawPayload := it.AsRawHeader(extHdr.FragmentOffset() != 0 /* consume */)
+
+			if extHdr.FragmentOffset() == 0 {
+				// Check that the iterator ends with a raw payload as the first fragment
+				// should include all headers up to and including any upper layer
+				// headers, as per RFC 8200 section 4.5; only upper layer data
+				// (non-headers) should follow the fragment extension header.
+				var lastHdr header.IPv6PayloadHeader
+
+				for {
+					it, done, err := it.Next()
+					if err != nil {
+						stats.MalformedPacketsReceived.Increment()
+						stats.MalformedFragmentsReceived.Increment()
+						return err
+					}
+					if done {
+						break
+					}
+
+					lastHdr = it
 				}
-				fallthrough
-			case header.IPv6OptionUnknownActionDiscardSendICMP:
-				// This case satisfies a requirement of RFC 8200 section 4.2 which
-				// states that an unknown option starting with bits [10] should:
+
+				// If the last header is a raw header, then the last portion of the IPv6
+				// payload is not a known IPv6 extension header. Note, this does not
+				// mean that the last portion is an upper layer header or not an
+				// extension header because:
+				//  1) we do not yet support all extension headers
+				//  2) we do not validate the upper layer header before reassembling.
 				//
-				//    discard the packet and, regardless of whether or not the
-				//    packet's Destination Address was a multicast address, send an
-				//    ICMP Parameter Problem, Code 2, message to the packet's
-				//    Source Address, pointing to the unrecognized Option Type.
+				// This check makes sure that a known IPv6 extension header is not
+				// present after the Fragment extension header in a non-initial
+				// fragment.
+				//
+				// TODO(#2196): Support IPv6 Authentication and Encapsulated
+				// Security Payload extension headers.
+				// TODO(#2333): Validate that the upper layer header is valid.
+				switch lastHdr.(type) {
+				case header.IPv6RawPayloadHeader:
+				default:
+					stats.MalformedPacketsReceived.Increment()
+					stats.MalformedFragmentsReceived.Increment()
+					return fmt.Errorf("known extension header = %#v present after fragment header in a non-initial fragment", lastHdr)
+				}
+			}
+
+			fragmentPayloadLen := rawPayload.Buf.Size()
+			if fragmentPayloadLen == 0 {
+				// Drop the packet as it's marked as a fragment but has no payload.
+				stats.MalformedPacketsReceived.Increment()
+				stats.MalformedFragmentsReceived.Increment()
+				return fmt.Errorf("fragment has no payload")
+			}
+
+			// As per RFC 2460 Section 4.5:
+			//
+			//    If the length of a fragment, as derived from the fragment packet's
+			//    Payload Length field, is not a multiple of 8 octets and the M flag
+			//    of that fragment is 1, then that fragment must be discarded and an
+			//    ICMP Parameter Problem, Code 0, message should be sent to the source
+			//    of the fragment, pointing to the Payload Length field of the
+			//    fragment packet.
+			if extHdr.More() && fragmentPayloadLen%header.IPv6FragmentExtHdrFragmentOffsetBytesPerUnit != 0 {
+				stats.MalformedPacketsReceived.Increment()
+				stats.MalformedFragmentsReceived.Increment()
 				_ = e.protocol.returnError(&icmpReasonParameterProblem{
-					code:               header.ICMPv6UnknownOption,
-					pointer:            it.ParseOffset() + optsIt.OptionOffset(),
-					respondToMulticast: true,
-				}, pkt, !forwarding /* deliveredLocally */)
-				return fmt.Errorf("found unknown hop-by-hop header option = %#v with discard action", opt)
-			default:
-				panic(fmt.Sprintf("unrecognized action for an unrecognized Hop By Hop extension header option = %#v", opt))
+					code:    header.ICMPv6ErroneousHeader,
+					pointer: header.IPv6PayloadLenOffset,
+				}, pkt, true /* deliveredLocally */)
+				return fmt.Errorf("found fragment length = %d that is not a multiple of 8 octets", fragmentPayloadLen)
 			}
-		}
-	}
-	return nil
-}
 
-func (e *endpoint) processFragmentExtHdr(extHdr *header.IPv6FragmentExtHdr, it *header.IPv6PayloadIterator, pkt **stack.PacketBuffer, h header.IPv6) error {
-	stats := e.stats.ip
-	fragmentFieldOffset := it.ParseOffset()
-
-	// Don't consume the iterator if we have the first fragment because we
-	// will use it to validate that the first fragment holds the upper layer
-	// header.
-	rawPayload := it.AsRawHeader(extHdr.FragmentOffset() != 0 /* consume */)
-
-	if extHdr.FragmentOffset() == 0 {
-		// Check that the iterator ends with a raw payload as the first fragment
-		// should include all headers up to and including any upper layer
-		// headers, as per RFC 8200 section 4.5; only upper layer data
-		// (non-headers) should follow the fragment extension header.
-		var lastHdr header.IPv6PayloadHeader
-
-		for {
-			it, done, err := it.Next()
+			// The packet is a fragment, let's try to reassemble it.
+			start := extHdr.FragmentOffset() * header.IPv6FragmentExtHdrFragmentOffsetBytesPerUnit
+
+			// As per RFC 2460 Section 4.5:
+			//
+			//    If the length and offset of a fragment are such that the Payload
+			//    Length of the packet reassembled from that fragment would exceed
+			//    65,535 octets, then that fragment must be discarded and an ICMP
+			//    Parameter Problem, Code 0, message should be sent to the source of
+			//    the fragment, pointing to the Fragment Offset field of the fragment
+			//    packet.
+			lengthAfterReassembly := int(start) + int(fragmentPayloadLen)
+			if lengthAfterReassembly > header.IPv6MaximumPayloadSize {
+				stats.MalformedPacketsReceived.Increment()
+				stats.MalformedFragmentsReceived.Increment()
+				_ = e.protocol.returnError(&icmpReasonParameterProblem{
+					code:    header.ICMPv6ErroneousHeader,
+					pointer: fragmentFieldOffset,
+				}, pkt, true /* deliveredLocally */)
+				return fmt.Errorf("determined that reassembled packet length = %d would exceed allowed length = %d", lengthAfterReassembly, header.IPv6MaximumPayloadSize)
+			}
+
+			// Note that pkt doesn't have its transport header set after reassembly,
+			// and won't until DeliverNetworkPacket sets it.
+			resPkt, proto, ready, err := e.protocol.fragmentation.Process(
+				// IPv6 ignores the Protocol field since the ID only needs to be unique
+				// across source-destination pairs, as per RFC 8200 section 4.5.
+				fragmentation.FragmentID{
+					Source:      srcAddr,
+					Destination: dstAddr,
+					ID:          extHdr.ID(),
+				},
+				start,
+				start+uint16(fragmentPayloadLen)-1,
+				extHdr.More(),
+				uint8(rawPayload.Identifier),
+				pkt,
+			)
 			if err != nil {
 				stats.MalformedPacketsReceived.Increment()
 				stats.MalformedFragmentsReceived.Increment()
 				return err
 			}
-			if done {
-				break
+
+			if ready {
+				resPktToDecRef = resPkt
+				pkt = resPkt
+
+				// We create a new iterator with the reassembled packet because we could
+				// have more extension headers in the reassembled payload, as per RFC
+				// 8200 section 4.5. We also use the NextHeader value from the first
+				// fragment.
+				it = header.MakeIPv6PayloadIterator(header.IPv6ExtensionHeaderIdentifier(proto), pkt.Data().AsBuffer())
 			}
 
-			lastHdr = it
-		}
+		case header.IPv6DestinationOptionsExtHdr:
+			optsIt := extHdr.Iter()
+
+			for {
+				opt, done, err := optsIt.Next()
+				if err != nil {
+					stats.MalformedPacketsReceived.Increment()
+					return err
+				}
+				if done {
+					break
+				}
+
+				// We currently do not support any IPv6 Destination extension header
+				// options.
+				switch opt.UnknownAction() {
+				case header.IPv6OptionUnknownActionSkip:
+				case header.IPv6OptionUnknownActionDiscard:
+					return fmt.Errorf("found unknown destination header option = %#v with discard action", opt)
+				case header.IPv6OptionUnknownActionDiscardSendICMPNoMulticastDest:
+					if header.IsV6MulticastAddress(dstAddr) {
+						return fmt.Errorf("found unknown destination header option %#v with discard action", opt)
+					}
+					fallthrough
+				case header.IPv6OptionUnknownActionDiscardSendICMP:
+					// This case satisfies a requirement of RFC 8200 section 4.2
+					// which states that an unknown option starting with bits [10] should:
+					//
+					//    discard the packet and, regardless of whether or not the
+					//    packet's Destination Address was a multicast address, send an
+					//    ICMP Parameter Problem, Code 2, message to the packet's
+					//    Source Address, pointing to the unrecognized Option Type.
+					//
+					_ = e.protocol.returnError(&icmpReasonParameterProblem{
+						code:               header.ICMPv6UnknownOption,
+						pointer:            it.ParseOffset() + optsIt.OptionOffset(),
+						respondToMulticast: true,
+					}, pkt, true /* deliveredLocally */)
+					return fmt.Errorf("found unknown destination header option %#v with discard action", opt)
+				default:
+					panic(fmt.Sprintf("unrecognized action for an unrecognized Destination extension header option = %#v", opt))
+				}
+			}
 
-		// If the last header is a raw header, then the last portion of the IPv6
-		// payload is not a known IPv6 extension header. Note, this does not
-		// mean that the last portion is an upper layer header or not an
-		// extension header because:
-		//  1) we do not yet support all extension headers
-		//  2) we do not validate the upper layer header before reassembling.
-		//
-		// This check makes sure that a known IPv6 extension header is not
-		// present after the Fragment extension header in a non-initial
-		// fragment.
-		//
-		// TODO(#2196): Support IPv6 Authentication and Encapsulated
-		// Security Payload extension headers.
-		// TODO(#2333): Validate that the upper layer header is valid.
-		switch lastHdr.(type) {
 		case header.IPv6RawPayloadHeader:
-		default:
-			stats.MalformedPacketsReceived.Increment()
-			stats.MalformedFragmentsReceived.Increment()
-			return fmt.Errorf("known extension header = %#v present after fragment header in a non-initial fragment", lastHdr)
-		}
-	}
+			// If the last header in the payload isn't a known IPv6 extension header,
+			// handle it as if it is transport layer data.
 
-	fragmentPayloadLen := rawPayload.Buf.Size()
-	if fragmentPayloadLen == 0 {
-		// Drop the packet as it's marked as a fragment but has no payload.
-		stats.MalformedPacketsReceived.Increment()
-		stats.MalformedFragmentsReceived.Increment()
-		return fmt.Errorf("fragment has no payload")
-	}
+			// Calculate the number of octets parsed from data. We want to consume all
+			// the data except the unparsed portion located at the end, whose size is
+			// extHdr.Buf.Size().
+			trim := pkt.Data().Size() - int(extHdr.Buf.Size())
 
-	// As per RFC 2460 Section 4.5:
-	//
-	//    If the length of a fragment, as derived from the fragment packet's
-	//    Payload Length field, is not a multiple of 8 octets and the M flag
-	//    of that fragment is 1, then that fragment must be discarded and an
-	//    ICMP Parameter Problem, Code 0, message should be sent to the source
-	//    of the fragment, pointing to the Payload Length field of the
-	//    fragment packet.
-	if extHdr.More() && fragmentPayloadLen%header.IPv6FragmentExtHdrFragmentOffsetBytesPerUnit != 0 {
-		stats.MalformedPacketsReceived.Increment()
-		stats.MalformedFragmentsReceived.Increment()
-		_ = e.protocol.returnError(&icmpReasonParameterProblem{
-			code:    header.ICMPv6ErroneousHeader,
-			pointer: header.IPv6PayloadLenOffset,
-		}, *pkt, true /* deliveredLocally */)
-		return fmt.Errorf("found fragment length = %d that is not a multiple of 8 octets", fragmentPayloadLen)
-	}
+			// For unfragmented packets, extHdr still contains the transport header.
+			// Consume that too.
+			//
+			// For reassembled fragments, pkt.TransportHeader is unset, so this is a
+			// no-op and pkt.Data begins with the transport header.
+			trim += len(pkt.TransportHeader().View())
 
-	// The packet is a fragment, let's try to reassemble it.
-	start := extHdr.FragmentOffset() * header.IPv6FragmentExtHdrFragmentOffsetBytesPerUnit
+			if _, ok := pkt.Data().Consume(trim); !ok {
+				stats.MalformedPacketsReceived.Increment()
+				return fmt.Errorf("could not consume %d bytes", trim)
+			}
 
-	// As per RFC 2460 Section 4.5:
-	//
-	//    If the length and offset of a fragment are such that the Payload
-	//    Length of the packet reassembled from that fragment would exceed
-	//    65,535 octets, then that fragment must be discarded and an ICMP
-	//    Parameter Problem, Code 0, message should be sent to the source of
-	//    the fragment, pointing to the Fragment Offset field of the fragment
-	//    packet.
-	lengthAfterReassembly := int(start) + int(fragmentPayloadLen)
-	if lengthAfterReassembly > header.IPv6MaximumPayloadSize {
-		stats.MalformedPacketsReceived.Increment()
-		stats.MalformedFragmentsReceived.Increment()
-		_ = e.protocol.returnError(&icmpReasonParameterProblem{
-			code:    header.ICMPv6ErroneousHeader,
-			pointer: fragmentFieldOffset,
-		}, *pkt, true /* deliveredLocally */)
-		return fmt.Errorf("determined that reassembled packet length = %d would exceed allowed length = %d", lengthAfterReassembly, header.IPv6MaximumPayloadSize)
-	}
-
-	// Note that pkt doesn't have its transport header set after reassembly,
-	// and won't until DeliverNetworkPacket sets it.
-	resPkt, proto, ready, err := e.protocol.fragmentation.Process(
-		// IPv6 ignores the Protocol field since the ID only needs to be unique
-		// across source-destination pairs, as per RFC 8200 section 4.5.
-		fragmentation.FragmentID{
-			Source:      h.SourceAddress(),
-			Destination: h.DestinationAddress(),
-			ID:          extHdr.ID(),
-		},
-		start,
-		start+uint16(fragmentPayloadLen)-1,
-		extHdr.More(),
-		uint8(rawPayload.Identifier),
-		*pkt,
-	)
-	if err != nil {
-		stats.MalformedPacketsReceived.Increment()
-		stats.MalformedFragmentsReceived.Increment()
-		return err
-	}
+			proto := tcpip.TransportProtocolNumber(extHdr.Identifier)
+			// If the packet was reassembled from a fragment, it will not have a
+			// transport header set yet.
+			if len(pkt.TransportHeader().View()) == 0 {
+				e.protocol.parseTransport(pkt, proto)
+			}
+
+			stats.PacketsDelivered.Increment()
+			if proto == header.ICMPv6ProtocolNumber {
+				e.handleICMP(pkt, hasFragmentHeader, routerAlert)
+			} else {
+				stats.PacketsDelivered.Increment()
+				switch res := e.dispatcher.DeliverTransportPacket(proto, pkt); res {
+				case stack.TransportPacketHandled:
+				case stack.TransportPacketDestinationPortUnreachable:
+					// As per RFC 4443 section 3.1:
+					//   A destination node SHOULD originate a Destination Unreachable
+					//   message with Code 4 in response to a packet for which the
+					//   transport protocol (e.g., UDP) has no listener, if that transport
+					//   protocol has no alternative means to inform the sender.
+					_ = e.protocol.returnError(&icmpReasonPortUnreachable{}, pkt, true /* deliveredLocally */)
+					return fmt.Errorf("destination port unreachable")
+				case stack.TransportPacketProtocolUnreachable:
+					// As per RFC 8200 section 4. (page 7):
+					//   Extension headers are numbered from IANA IP Protocol Numbers
+					//   [IANA-PN], the same values used for IPv4 and IPv6.  When
+					//   processing a sequence of Next Header values in a packet, the
+					//   first one that is not an extension header [IANA-EH] indicates
+					//   that the next item in the packet is the corresponding upper-layer
+					//   header.
+					// With more related information on page 8:
+					//   If, as a result of processing a header, the destination node is
+					//   required to proceed to the next header but the Next Header value
+					//   in the current header is unrecognized by the node, it should
+					//   discard the packet and send an ICMP Parameter Problem message to
+					//   the source of the packet, with an ICMP Code value of 1
+					//   ("unrecognized Next Header type encountered") and the ICMP
+					//   Pointer field containing the offset of the unrecognized value
+					//   within the original packet.
+					//
+					// Which when taken together indicate that an unknown protocol should
+					// be treated as an unrecognized next header value.
+					// The location of the Next Header field is in a different place in
+					// the initial IPv6 header than it is in the extension headers so
+					// treat it specially.
+					prevHdrIDOffset := uint32(header.IPv6NextHeaderOffset)
+					if previousHeaderStart != 0 {
+						prevHdrIDOffset = previousHeaderStart
+					}
+					_ = e.protocol.returnError(&icmpReasonParameterProblem{
+						code:    header.ICMPv6UnknownHeader,
+						pointer: prevHdrIDOffset,
+					}, pkt, true /* deliveredLocally */)
+					return fmt.Errorf("transport protocol unreachable")
+				default:
+					panic(fmt.Sprintf("unrecognized result from DeliverTransportPacket = %d", res))
+				}
+			}
 
-	if ready {
-		// We create a new iterator with the reassembled packet because we could
-		// have more extension headers in the reassembled payload, as per RFC
-		// 8200 section 4.5. We also use the NextHeader value from the first
-		// fragment.
-		*it = header.MakeIPv6PayloadIterator(header.IPv6ExtensionHeaderIdentifier(proto), resPkt.Data().AsBuffer())
-		(*pkt).DecRef()
-		*pkt = resPkt
+		default:
+			// Since the iterator returns IPv6RawPayloadHeader for unknown Extension
+			// Header IDs this should never happen unless we missed a supported type
+			// here.
+			panic(fmt.Sprintf("unrecognized type from it.Next() = %T", extHdr))
+
+		}
 	}
 	return nil
 }
@@ -2220,8 +2187,8 @@ func (p *protocol) MinimumPacketSize() int {
 }
 
 // ParseAddresses implements stack.NetworkProtocol.
-func (*protocol) ParseAddresses(b []byte) (src, dst tcpip.Address) {
-	h := header.IPv6(b)
+func (*protocol) ParseAddresses(v []byte) (src, dst tcpip.Address) {
+	h := header.IPv6(v)
 	return h.SourceAddress(), h.DestinationAddress()
 }
 

From f9c7f99425b67ad41a4d25b92eab790909d549dc Mon Sep 17 00:00:00 2001
From: Nayana Bidari <nybidari@google.com>
Date: Wed, 31 May 2023 19:11:49 -0700
Subject: [PATCH 210/211] Set minimum value for RTT in TCP.

RTT value should not be zero, set the minimum RTT value to 1ms. This does not
happen often and was identified while investigating
http://gvisor.dev/issues/6113.

Updates #6113

PiperOrigin-RevId: 536885961
---
 pkg/tcpip/transport/tcp/snd.go | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/pkg/tcpip/transport/tcp/snd.go b/pkg/tcpip/transport/tcp/snd.go
index d762c410e7..9c73069119 100644
--- a/pkg/tcpip/transport/tcp/snd.go
+++ b/pkg/tcpip/transport/tcp/snd.go
@@ -34,6 +34,9 @@ const (
 	// MaxRTO is the maximum allowed value for the retransmit timeout.
 	MaxRTO = 120 * time.Second
 
+	// MinSRTT is the minimum allowed value for smoothed RTT.
+	MinSRTT = 1 * time.Millisecond
+
 	// InitialCwnd is the initial congestion window.
 	InitialCwnd = 10
 
@@ -384,6 +387,10 @@ func (s *sender) updateRTO(rtt time.Duration) {
 		}
 	}
 
+	if s.rtt.TCPRTTState.SRTT < MinSRTT {
+		s.rtt.TCPRTTState.SRTT = MinSRTT
+	}
+
 	s.RTO = s.rtt.TCPRTTState.SRTT + 4*s.rtt.TCPRTTState.RTTVar
 	s.rtt.Unlock()
 	if s.RTO < s.minRTO {

From be2e4ac102c3cce2e82ff9c3622f329420a33ebf Mon Sep 17 00:00:00 2001
From: Kevin Krakauer <krakauer@google.com>
Date: Thu, 13 Jul 2023 15:13:41 -0700
Subject: [PATCH 211/211] bump maximum out-of-order TCP recv buf use from 25%
 to 75%

This was the original intention per comments, but was accidentally capped too
low.

Fixes #9153.

PiperOrigin-RevId: 547937499
---
 pkg/tcpip/transport/tcp/rcv.go | 23 ++++++++++++++++++-----
 1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/pkg/tcpip/transport/tcp/rcv.go b/pkg/tcpip/transport/tcp/rcv.go
index 98e4d12fb7..5b65d8d5f4 100644
--- a/pkg/tcpip/transport/tcp/rcv.go
+++ b/pkg/tcpip/transport/tcp/rcv.go
@@ -480,13 +480,26 @@ func (r *receiver) handleRcvdSegment(s *segment) (drop bool, err tcpip.Error) {
 	// Defer segment processing if it can't be consumed now.
 	if !r.consumeSegment(s, segSeq, segLen) {
 		if segLen > 0 || s.flags.Contains(header.TCPFlagFin) {
-			// We only store the segment if it's within our buffer size limit.
+			// We only store the segment if it's within our buffer
+			// size limit.
 			//
-			// Only use 75% of the receive buffer queue for out-of-order
-			// segments. This ensures that we always leave some space for the inorder
-			// segments to arrive allowing pending segments to be processed and
+			// Only use 75% of the receive buffer queue for
+			// out-of-order segments. This ensures that we always
+			// leave some space for the inorder segments to arrive
+			// allowing pending segments to be processed and
 			// delivered to the user.
-			if rcvBufSize := r.ep.ops.GetReceiveBufferSize(); rcvBufSize > 0 && (r.PendingBufUsed+int(segLen)) < int(rcvBufSize)>>2 {
+			//
+			// The ratio must be at least 50% (the size of rwnd) to
+			// leave space for retransmitted dropped packets. 51%
+			// would make recovery slow when there are multiple
+			// drops by necessitating multiple round trips. 100%
+			// would enable the buffer to be totally full of
+			// out-of-order data and stall the connection.
+			//
+			// An ideal solution is to ensure that there are at
+			// least N bytes free when N bytes are missing, but we
+			// don't have that computed at this point in the stack.
+			if rcvBufSize := r.ep.ops.GetReceiveBufferSize(); rcvBufSize > 0 && (r.PendingBufUsed+int(segLen)) < int(rcvBufSize-rcvBufSize/4) {
 				r.ep.rcvQueueMu.Lock()
 				r.PendingBufUsed += s.segMemSize()
 				r.ep.rcvQueueMu.Unlock()