Network Performance Analysis: Latency and Bandwidth

The performance of most networks can be approximated as:

   send_time = latency + message_length / bandwidth;

The latency is often ignored--networks are advertised and sold based on bandwidth--yet network latency dominates performance for short messages.  For example, a typical gigabit ethernet network has a bandwidth of 1 billion bits per second, or 100 million bytes per second, or 100 bytes per microsecond.  Yet message latency is often 100 microseconds or more, which is the time to send 10,000 bytes!

The result is that short messages deliver terrible bandwidth, often in the kilobytes per second range.  Here are some measured performance numbers for gigabit ethernet: note how slowly the time increases until we're sending kilobytes or more of data.  The link isn't well utilized unless you're sending 100KB or more of data at once!

TOTAL TIME (ms)   0 roundtrip 1 roundtrip 2 roundtrip   BANDWIDTH (MB/sec)   0 roundtrip 1 roundtrip 2 roundtrip
1 byte 0.338 0.25 0.247   1 byte 0 0.004 0.008
10 bytes 0.092 0.189 0.236   10 bytes 0 0.053 0.085
100 bytes 0.147 0.382 0.428   100 bytes 0 0.262 0.467
1000 bytes 0.165 0.487 0.657   1000 bytes 0 2.054 3.044
10000 bytes 0.17 0.638 0.707   10000 bytes 0 15.674 28.283
100000 bytes 0.213 1.129 2.045   100000 bytes 0 88.562 97.804
1000000 bytes 0.723 9.269 17.442   1000000 bytes 0 107.887 114.666

In this program, we make a separate TCP connection for each run, and send data in several batched round trips.  This is typical of most real work, such as HTTP or database queries.

 

Here's the same benchmark over the increasingly misnamed "fast" ethernet, which is 100 megabits: 1/10 the bandwidth, but only slightly higher latency.  With this slower network, we do reasonably OK sending 10KB messages. 

TOTAL TIME (ms)   0 roundtrip 1 roundtrip 2 roundtrip   BANDWIDTH (MB/sec)   0 roundtrip 1 roundtrip 2 roundtrip
1 byte 0.445 1.687 0.446   1 byte 0 0.001 0.004
10 bytes 0.162 0.327 0.413   10 bytes 0 0.031 0.048
100 bytes 0.148 0.534 0.807   100 bytes 0 0.187 0.248
1000 bytes 0.155 0.686 1.147   1000 bytes 0 1.458 1.744
10000 bytes 0.161 1.451 2.734   10000 bytes 0 6.892 7.315
100000 bytes 0.307 9.142 18.202   100000 bytes 0 10.939 10.988
1000000 bytes 1.861 86.854 171.804   1000000 bytes 0 11.514 11.641

 

On university of Alaska eduroam wireless, bandwidth is similar to fast ethernet, but latency is much higher.

 

TOTAL TIME (ms)   0 roundtrip 1 roundtrip 2 roundtrip   BANDWIDTH (MB/sec)   0 roundtrip 1 roundtrip 2 roundtrip
1 byte 2.635 6.808 8.663   1 byte 0 0 0
10 bytes 1.951 7.299 9.129   10 bytes 0 0.001 0.002
100 bytes 1.938 6.935 8.985   100 bytes 0 0.014 0.022
1000 bytes 2.868 6.382 10.188   1000 bytes 0 0.157 0.196
10000 bytes 2.287 8.207 11.275   10000 bytes 0 1.218 1.774
100000 bytes 2.196 18.609 31.73   100000 bytes 0 5.374 6.303
1000000 bytes 3.242 105.635 195.803   1000000 bytes 0 9.467 10.214

 

This one is truly amazing: even connecting from the same machine, no network involved, latency is still very important.  Bandwidth is great, over 1 gigabyte per second, but latency is still about 100 microseconds, probably mostly OS network stack and scheduling overhead. 

TOTAL TIME (ms)   0 roundtrip 1 roundtrip 2 roundtrip   BANDWIDTH (MB/sec)   0 roundtrip 1 roundtrip 2 roundtrip
1 byte 0.135 0.13 0.131   1 byte 0 0.008 0.015
10 bytes 0.056 0.102 0.12   10 bytes 0 0.098 0.167
100 bytes 0.053 0.101 0.122   100 bytes 0 0.989 1.638
1000 bytes 0.052 0.098 0.121   1000 bytes 0 10.205 16.513
10000 bytes 0.06 0.104 0.131   10000 bytes 0 96.2 152.798
100000 bytes 0.132 0.22 0.306   100000 bytes 0 454.421 653.828
1000000 bytes 0.903 1.525 1.954   1000000 bytes 0 655.77 1023.5

 

The bottom line?  You can't send millions of tiny messages and get decent network performance.  You need to send big blocks of data!

 

Here's the full sockets benchmark program.  The client is hardcoded, and only accessible from on campus.  There are several pitfalls here; one of which is "recv" is allowed to return spontaneously as soon as part of the data arrives.  I've given up and called my skt_recvN function to handle this case.  Another is doing two back-to-back "send" operations on the client triggers Nagle's algorithm, adding up to milliseconds of latency; I've hand-combined them into "struct setup_message" to avoid this.

#include "osl/socket.h" /* for Big32 class */
#include "osl/socket.cpp" /* for skt_recvN / skt_sendN classes */
#include <sys/wait.h>

#ifdef _WIN32
# include <winsock.h> /* windows sockets */
# pragma comment (lib, "wsock32.lib")  /* link with winsock library */
#else /* non-Windows: Berkeley sockets */
# include <sys/types.h> 
# include <sys/socket.h> /* socket */
# include <arpa/inet.h> /* AF_INET and sockaddr_in */
#endif

#define errcheck(code) if (code<0) { perror(#code); exit(1); }


struct sockaddr build_addr(int port,unsigned char ip3,unsigned char ip2,unsigned char ip1,unsigned char ip0) 
{
	struct sockaddr_in addr;
	addr.sin_family=AF_INET;
	addr.sin_port=htons(port); // port number in network byte order
	unsigned char bytes4[4]={ip3,ip2,ip1,ip0}; // IP address
	memcpy(&addr.sin_addr,bytes4,4);
	return *(struct sockaddr *)&addr;
}

int build_socket(void)
{
	int s=socket(AF_INET,SOCK_STREAM,0);
	// Allow us to re-open a port within 3 minutes
	int on = 1; /* for setsockopt */
	setsockopt(s,SOL_SOCKET, SO_REUSEADDR,&on,sizeof(on));
	return s;
}

enum {port=1025};

struct setup_message {
	Big32 len;
	Big32 count;
};

int server_comm(void)
{
// Create a socket
	int server=build_socket();
	
	/* Prevents 3-minute socket reuse timeout after a server crash. */
	int on = 1;
	setsockopt(server, SOL_SOCKET, SO_REUSEADDR, (const char *)&on, sizeof(on));
 

// Make listening socket
	struct sockaddr addr=build_addr(port, 0,0,0,0);
	errcheck(bind(server,&addr,sizeof(addr)));
	errcheck(listen(server,10));

	while (1) {
	// Wait for client to connect to me
		int s=accept(server,0,0);
		
	// Receive binary command length from client
		setup_message msg;
		skt_recvN(s,&msg,sizeof(msg));
		
	// Repeatedly receive bytes from client
		std::vector<char> target(msg.len,(char)0);
		int count=msg.count;
		while (count-->0) {
			skt_recvN(s,&target[0],target.size());
			Big32 OK=msg.len;
			skt_sendN(s,&OK,sizeof(OK));
		}
		
		close(s);
	}
}

int client_comm_len=0; // bytes to send
int client_comm_count=0; // repetitions
int client_comm(void)
{
	//double start=time_in_seconds();
// Create a socket
	int s=build_socket();

// Connect to server above
	struct sockaddr addr=build_addr(port, 137,229,25,241); /* viz1 (on campus only) */
	errcheck(connect(s,&addr,sizeof(addr)));
	//printf("	Connected at t=%.3f ms\n",1.0e3*(time_in_seconds()-start));

// Send binary request:
	setup_message msg;
	msg.len=client_comm_len;
	msg.count=client_comm_count;
	skt_sendN(s,&msg,sizeof(msg));

	std::vector<char> source(client_comm_len,'!');
	for (int i=0;i<client_comm_count;i++) {
		skt_sendN(s,&source[0],client_comm_len);
		Big32 OK;
		skt_recvN(s,&OK,sizeof(OK));
		if (OK!=msg.len) printf("Error: server len mismatch %d\n",(int)OK);
	}
	
	//printf("	Done at t=%.3f ms\n",1.0e3*(time_in_seconds()-start));
	close(s);
	return 0;
}

void foo(void) {
/*
  if (fork()) // be a server
  {
	server_comm();
  }
  else  // be a client
*/
  {
	for (client_comm_count=0;client_comm_count<3;client_comm_count++)
	for (client_comm_len=1;client_comm_len<=1000*1000;client_comm_len*=10)
	{
		double start=time_in_seconds();
		client_comm();
		double t=time_in_seconds()-start;
		printf("%d roundtrip\t%d bytes\t%.3f ms_total\t%.3f MB/sec\n",
			client_comm_count,client_comm_len,1.0e3*t,
			client_comm_len*client_comm_count*1.0e-6/t);
	}
  }
}

(Try this in NetRun now!)

 

CS 441 Lecture Note, 2016, Dr. Orion LawlorUAF Computer Science Department.