| Start/ | End/ | |||
| True | False | - | Line | Source |
| 1 | /* | |||
| 2 | * arch/x86_64/lib/csum-partial.c | |||
| 3 | * | |||
| 4 | * This file contains network checksum routines that are better done | |||
| 5 | * in an architecture-specific manner due to speed. | |||
| 6 | */ | |||
| 7 | ||||
| 8 | #include <linux/compiler.h> | |||
| 9 | #include <linux/module.h> | |||
| 10 | #include <asm/checksum.h> | |||
| 11 | ||||
| 12 | #define __force_inline inline __attribute__((always_inline)) | |||
| 13 | ||||
| 0 | 0 | - | 14 | static inline unsigned short from32to16(unsigned a) |
| 15 | { | |||
| 16 | unsigned short b = a >> 16; | |||
| 17 | asm("addw %w2,%w0\n\t" | |||
| 18 | "adcw $0,%w0\n" | |||
| 19 | : "=r" (b) | |||
| 20 | : "0" (b), "r" (a)); | |||
| 0 | - | 21 | return b; | |
| 22 | } | |||
| 23 | ||||
| 24 | /* | |||
| 25 | * Do a 64-bit checksum on an arbitrary memory area. | |||
| 26 | * Returns a 32bit checksum. | |||
| 27 | * | |||
| 28 | * This isn't as time critical as it used to be because many NICs | |||
| 29 | * do hardware checksumming these days. | |||
| 30 | * | |||
| 31 | * Things tried and found to not make it faster: | |||
| 32 | * Manual Prefetching | |||
| 33 | * Unrolling to an 128 bytes inner loop. | |||
| 34 | * Using interleaving with more registers to break the carry chains. | |||
| 35 | */ | |||
| 522583 | 0 | 36 | static __force_inline unsigned do_csum(const unsigned char *buff, unsigned len) | |
| 37 | { | |||
| 38 | unsigned odd, count; | |||
| 39 | unsigned long result = 0; | |||
| 40 | ||||
| 0 | 522583 | - | 41 | if (unlikely(len == 0)) |
| 0 | - | 42 | return result; | |
| 43 | odd = 1 & (unsigned long) buff; | |||
| 0 | 522583 | - | 44 | if (unlikely(odd)) { |
| 45 | result = *buff << 8; | |||
| 46 | len--; | |||
| 47 | buff++; | |||
| 48 | } | |||
| 49 | count = len >> 1; /* nr of 16-bit words.. */ | |||
| 522583 | 0 | - | 50 | if (count) { |
| 0 | 522583 | - | 51 | if (2 & (unsigned long) buff) { |
| 52 | result += *(unsigned short *)buff; | |||
| 53 | count--; | |||
| 54 | len -= 2; | |||
| 55 | buff += 2; | |||
| 56 | } | |||
| 57 | count >>= 1; /* nr of 32-bit words.. */ | |||
| 522583 | 0 | - | 58 | if (count) { |
| 59 | unsigned long zero; | |||
| 60 | unsigned count64; | |||
| 124642 | 397941 | 61 | if (4 & (unsigned long) buff) { | |
| 62 | result += *(unsigned int *) buff; | |||
| 63 | count--; | |||
| 64 | len -= 4; | |||
| 65 | buff += 4; | |||
| 66 | } | |||
| 67 | count >>= 1; /* nr of 64-bit words.. */ | |||
| 68 | ||||
| 69 | /* main loop using 64byte blocks */ | |||
| 70 | zero = 0; | |||
| 71 | count64 = count >> 3; | |||
| 59828 | 522583 | 72 | while (count64) { | |
| 73 | asm("addq 0*8(%[src]),%[res]\n\t" | |||
| 74 | "adcq 1*8(%[src]),%[res]\n\t" | |||
| 75 | "adcq 2*8(%[src]),%[res]\n\t" | |||
| 76 | "adcq 3*8(%[src]),%[res]\n\t" | |||
| 77 | "adcq 4*8(%[src]),%[res]\n\t" | |||
| 78 | "adcq 5*8(%[src]),%[res]\n\t" | |||
| 79 | "adcq 6*8(%[src]),%[res]\n\t" | |||
| 80 | "adcq 7*8(%[src]),%[res]\n\t" | |||
| 81 | "adcq %[zero],%[res]" | |||
| 82 | : [res] "=r" (result) | |||
| 83 | : [src] "r" (buff), [zero] "r" (zero), | |||
| 84 | "[res]" (result)); | |||
| 85 | buff += 64; | |||
| 86 | count64--; | |||
| 87 | } | |||
| 88 | ||||
| 89 | /* last upto 7 8byte blocks */ | |||
| 90 | count %= 8; | |||
| 1951E3 | 522583 | 91 | while (count) { | |
| 92 | asm("addq %1,%0\n\t" | |||
| 93 | "adcq %2,%0\n" | |||
| 94 | : "=r" (result) | |||
| 95 | : "m" (*(unsigned long *)buff), | |||
| 96 | "r" (zero), "0" (result)); | |||
| 97 | --count; | |||
| 98 | buff += 8; | |||
| 99 | } | |||
| 100 | result = add32_with_carry(result>>32, | |||
| 101 | result&0xffffffff); | |||
| 102 | ||||
| 5461 | 517122 | 103 | if (len & 4) { | |
| 104 | result += *(unsigned int *) buff; | |||
| 105 | buff += 4; | |||
| 106 | } | |||
| 107 | } | |||
| 1025 | 521558 | 108 | if (len & 2) { | |
| 109 | result += *(unsigned short *) buff; | |||
| 110 | buff += 2; | |||
| 111 | } | |||
| 112 | } | |||
| 1149 | 521434 | 113 | if (len & 1) | |
| 114 | result += *buff; | |||
| 115 | result = add32_with_carry(result>>32, result & 0xffffffff); | |||
| 0 | 522583 | - | 116 | if (unlikely(odd)) { |
| 117 | result = from32to16(result); | |||
| 118 | result = ((result >> 8) & 0xff) | ((result & 0xff) << 8); | |||
| 119 | } | |||
| 522583 | 120 | return result; | ||
| 121 | } | |||
| 122 | ||||
| 123 | /* | |||
| 124 | * computes the checksum of a memory block at buff, length len, | |||
| 125 | * and adds in "sum" (32-bit) | |||
| 126 | * | |||
| 127 | * returns a 32-bit number suitable for feeding into itself | |||
| 128 | * or csum_tcpudp_magic | |||
| 129 | * | |||
| 130 | * this function must be called with even lengths, except | |||
| 131 | * for the last fragment, which may be odd | |||
| 132 | * | |||
| 133 | * it's best to have buff aligned on a 64-bit boundary | |||
| 134 | */ | |||
| 522583 | 0 | 135 | unsigned csum_partial(const unsigned char *buff, unsigned len, unsigned sum) | |
| 136 | { | |||
| 522583 | 137 | return add32_with_carry(do_csum(buff, len), sum); | ||
| 138 | } | |||
| 139 | ||||
| 140 | EXPORT_SYMBOL(csum_partial); | |||
| 141 | ||||
| 142 | /* | |||
| 143 | * this routine is used for miscellaneous IP-like checksums, mainly | |||
| 144 | * in icmp.c | |||
| 145 | */ | |||
| 0 | 0 | - | 146 | unsigned short ip_compute_csum(unsigned char * buff, int len) |
| 147 | { | |||
| 0 | - | 148 | return csum_fold(csum_partial(buff,len,0)); | |
| 149 | } | |||
| 150 | ||||
| ***TER 67% (22/33) of SOURCE FILE csum-partial.c | ||||