/* Measure Linux (or whatever) system call overhead: minimally about * 300 ns on this laptop [clarification needed]. Copying 8192 zero * bytes into userspace seems to take about 1400 ns more, or 171 ps * per byte. On this Ryzen 5 3500U, mostly running at 3.667GHz, I get: 1234567 $ \time ./syscallovh 10000000 1 0.81user 5.02system 0:05.84elapsed 99%CPU (0avgtext+0avgdata 1152maxresident)k 0inputs+0outputs (0major+72minor)pagefaults 0swaps $ \time ./syscallovh 10000000 1 0.86user 4.97system 0:05.83elapsed 99%CPU (0avgtext+0avgdata 1316maxresident)k 0inputs+0outputs (0major+73minor)pagefaults 0swaps $ \time ./syscallovh 10000000 1 0.85user 5.08system 0:05.94elapsed 99%CPU (0avgtext+0avgdata 1240maxresident)k 0inputs+0outputs (0major+74minor)pagefaults 0swaps $ \time ./syscallovh 10000000 1024 0.79user 5.50system 0:06.29elapsed 99%CPU (0avgtext+0avgdata 1248maxresident)k 0inputs+0outputs (0major+75minor)pagefaults 0swaps $ \time ./syscallovh 10000000 1024 0.84user 5.56system 0:06.40elapsed 99%CPU (0avgtext+0avgdata 1328maxresident)k 0inputs+0outputs (0major+75minor)pagefaults 0swaps $ \time ./syscallovh 10000000 1024 0.83user 5.43system 0:06.26elapsed 100%CPU (0avgtext+0avgdata 1268maxresident)k 0inputs+0outputs (0major+74minor)pagefaults 0swaps $ \time ./syscallovh 10000000 8192 0.92user 7.50system 0:08.43elapsed 99%CPU (0avgtext+0avgdata 1256maxresident)k 0inputs+0outputs (0major+76minor)pagefaults 0swaps $ \time ./syscallovh 10000000 8192 0.85user 7.46system 0:08.31elapsed 99%CPU (0avgtext+0avgdata 1152maxresident)k 0inputs+0outputs (0major+72minor)pagefaults 0swaps $ \time ./syscallovh 10000000 8192 0.84user 7.52system 0:08.37elapsed 99%CPU (0avgtext+0avgdata 1148maxresident)k 0inputs+0outputs (0major+75minor)pagefaults 0swaps $ \time ./syscallovh 10000 8192000 0.00user 9.56system 0:09.57elapsed 99%CPU (0avgtext+0avgdata 9144maxresident)k 0inputs+0outputs (0major+539minor)pagefaults 0swaps $ \time ./syscallovh 10000 8192000 0.00user 9.79system 0:09.79elapsed 99%CPU (0avgtext+0avgdata 9328maxresident)k 0inputs+0outputs (0major+542minor)pagefaults 0swaps $ \time ./syscallovh 10000 8192000 0.01user 9.65system 0:09.66elapsed 99%CPU (0avgtext+0avgdata 9264maxresident)k 0inputs+0outputs (0major+540minor)pagefaults 0swaps So: - the loop reading 1 byte took 583–594 nanoseconds per read; - 1024 bytes took 626–640ns per read, an additional 32–57ns, or 31–56ps per byte; and - 8192 bytes took 831–843ns per read, an additional 191–217ns over the 1024-byte number, or 26–30ps per byte. - 8192000 bytes took 957–979μs per read, which is 116–120ps per byte. */ #include #include #include #include char *devzero = "/dev/zero"; int main(int argc, char **argv) { if (argc != 3) { fprintf(stderr, "%s: usage: %s 102 4096\n" "Reads 4096 bytes from /dev/zero 102 times.\n", argv[0], argv[0]); return 1; } int n = atoi(argv[1]); size_t s = atoi(argv[2]); char c[s]; int fd = open(devzero, O_RDONLY); if (fd < 0) { perror(devzero); return 1; } for (int i = 0; i < n; i++) { read(fd, c, s); } return 0; }