summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBernie Innocenti <codewiz@google.com>2018-05-14 20:40:49 +0900
committerBernie Innocenti <codewiz@google.com>2018-05-16 14:05:15 +0900
commit7ac4a30be5d8cbfafc740f9f86d30f37d15f4483 (patch)
treeabe9305227662edad00cb78d44258d3d7c42da4b
parentcdc003dc8a8ffb418f33518ac545016e6991534a (diff)
downloadandroid-clat-pie-qpr2-release.tar.gz
In the event the packet socket has an error, it is likely not cleared and clatd can spin in a tight loop. This may happen when a network is going down (ENETDOWN, ENETUNREACH, ...). Ordinarily the ConnectivityService/Nat464Xlat/ClatdController control plane would tear down clatd in this case. However, if the control plane is deadlocked, clatd will chew up CPU indefinitely. This fix consists of detecting when poll() exits due to a socket error and trying to clear the error by issuing a no-op recv() on the packet socket and a 0-byte read() for the /dev/tun socket. Test: manually, on a marlyn device (kernel 3.18.70) Bug: 78602493 Change-Id: Ic23f999712a674df11e981a4314ad371e8d5fb6d (cherry picked from commit 69dc60de5d53488a3c45737ec17b242d703d682b)
-rw-r--r--clatd.c21
1 files changed, 15 insertions, 6 deletions
diff --git a/clatd.c b/clatd.c
index c579df0..168cc69 100644
--- a/clatd.c
+++ b/clatd.c
@@ -382,19 +382,28 @@ void event_loop(struct tun_data *tunnel) {
last_interface_poll = time(NULL);
while(running) {
- if(poll(wait_fd, 2, NO_TRAFFIC_INTERFACE_POLL_FREQUENCY*1000) == -1) {
- if(errno != EINTR) {
- logmsg(ANDROID_LOG_WARN,"event_loop/poll returned an error: %s",strerror(errno));
+ if (poll(wait_fd, ARRAY_SIZE(wait_fd),
+ NO_TRAFFIC_INTERFACE_POLL_FREQUENCY * 1000) == -1) {
+ if (errno != EINTR) {
+ logmsg(ANDROID_LOG_WARN,"event_loop/poll returned an error: %s", strerror(errno));
}
} else {
+ if (wait_fd[0].revents & POLLIN) {
+ ring_read(&tunnel->ring, tunnel->fd4, 0 /* to_ipv6 */);
+ }
+ // If any other bit is set, assume it's due to an error (i.e. POLLERR).
+ if (wait_fd[0].revents & ~POLLIN) {
+ // ring_read doesn't clear the error indication on the socket.
+ recv(tunnel->read_fd6, NULL, 0, MSG_PEEK);
+ logmsg(ANDROID_LOG_WARN, "event_loop: clearing error on read_fd6: %s",
+ strerror(errno));
+ }
+
// Call read_packet if the socket has data to be read, but also if an
// error is waiting. If we don't call read() after getting POLLERR, a
// subsequent poll() will return immediately with POLLERR again,
// causing this code to spin in a loop. Calling read() will clear the
// socket error flag instead.
- if (wait_fd[0].revents) {
- ring_read(&tunnel->ring, tunnel->fd4, 0 /* to_ipv6 */);
- }
if (wait_fd[1].revents) {
read_packet(tunnel->fd4, tunnel->write_fd6, 1 /* to_ipv6 */);
}