Line data Source code
1 : /* inffast.c -- fast decoding 2 : * Copyright (C) 1995-2004 Mark Adler 3 : * For conditions of distribution and use, see copyright notice in zlib.h 4 : */ 5 : 6 : #include <linux/zutil.h> 7 : #include "inftrees.h" 8 : #include "inflate.h" 9 : #include "inffast.h" 10 : 11 : #ifndef ASMINF 12 : 13 : union uu { 14 : unsigned short us; 15 : unsigned char b[2]; 16 : }; 17 : 18 : /* Endian independed version */ 19 : static inline unsigned short 20 : get_unaligned16(const unsigned short *p) 21 : { 22 : union uu mm; 23 : unsigned char *b = (unsigned char *)p; 24 : 25 : mm.b[0] = b[0]; 26 : mm.b[1] = b[1]; 27 : return mm.us; 28 : } 29 : 30 : /* 31 : Decode literal, length, and distance codes and write out the resulting 32 : literal and match bytes until either not enough input or output is 33 : available, an end-of-block is encountered, or a data error is encountered. 34 : When large enough input and output buffers are supplied to inflate(), for 35 : example, a 16K input buffer and a 64K output buffer, more than 95% of the 36 : inflate execution time is spent in this routine. 37 : 38 : Entry assumptions: 39 : 40 : state->mode == LEN 41 : strm->avail_in >= 6 42 : strm->avail_out >= 258 43 : start >= strm->avail_out 44 : state->bits < 8 45 : 46 : On return, state->mode is one of: 47 : 48 : LEN -- ran out of enough output space or enough available input 49 : TYPE -- reached end of block code, inflate() to interpret next block 50 : BAD -- error in block data 51 : 52 : Notes: 53 : 54 : - The maximum input bits used by a length/distance pair is 15 bits for the 55 : length code, 5 bits for the length extra, 15 bits for the distance code, 56 : and 13 bits for the distance extra. This totals 48 bits, or six bytes. 57 : Therefore if strm->avail_in >= 6, then there is enough input to avoid 58 : checking for available input while decoding. 59 : 60 : - The maximum bytes that a single length/distance pair can output is 258 61 : bytes, which is the maximum length that can be coded. inflate_fast() 62 : requires strm->avail_out >= 258 for each loop to avoid checking for 63 : output space. 64 : 65 : - @start: inflate()'s starting value for strm->avail_out 66 : */ 67 0 : void inflate_fast(z_streamp strm, unsigned start) 68 : { 69 0 : struct inflate_state *state; 70 0 : const unsigned char *in; /* local strm->next_in */ 71 0 : const unsigned char *last; /* while in < last, enough input available */ 72 0 : unsigned char *out; /* local strm->next_out */ 73 0 : unsigned char *beg; /* inflate()'s initial strm->next_out */ 74 0 : unsigned char *end; /* while out < end, enough space available */ 75 : #ifdef INFLATE_STRICT 76 : unsigned dmax; /* maximum distance from zlib header */ 77 : #endif 78 0 : unsigned wsize; /* window size or zero if not using window */ 79 0 : unsigned whave; /* valid bytes in the window */ 80 0 : unsigned write; /* window write index */ 81 0 : unsigned char *window; /* allocated sliding window, if wsize != 0 */ 82 0 : unsigned long hold; /* local strm->hold */ 83 0 : unsigned bits; /* local strm->bits */ 84 0 : code const *lcode; /* local strm->lencode */ 85 0 : code const *dcode; /* local strm->distcode */ 86 0 : unsigned lmask; /* mask for first level of length codes */ 87 0 : unsigned dmask; /* mask for first level of distance codes */ 88 0 : code this; /* retrieved table entry */ 89 0 : unsigned op; /* code bits, operation, extra bits, or */ 90 : /* window position, window bytes to copy */ 91 0 : unsigned len; /* match length, unused bytes */ 92 0 : unsigned dist; /* match distance */ 93 0 : unsigned char *from; /* where to copy match from */ 94 : 95 : /* copy state to local variables */ 96 0 : state = (struct inflate_state *)strm->state; 97 0 : in = strm->next_in; 98 0 : last = in + (strm->avail_in - 5); 99 0 : out = strm->next_out; 100 0 : beg = out - (start - strm->avail_out); 101 0 : end = out + (strm->avail_out - 257); 102 : #ifdef INFLATE_STRICT 103 : dmax = state->dmax; 104 : #endif 105 0 : wsize = state->wsize; 106 0 : whave = state->whave; 107 0 : write = state->write; 108 0 : window = state->window; 109 0 : hold = state->hold; 110 0 : bits = state->bits; 111 0 : lcode = state->lencode; 112 0 : dcode = state->distcode; 113 0 : lmask = (1U << state->lenbits) - 1; 114 0 : dmask = (1U << state->distbits) - 1; 115 : 116 : /* decode literals and length/distances until end-of-block or not enough 117 : input data or output space */ 118 0 : do { 119 0 : if (bits < 15) { 120 0 : hold += (unsigned long)(*in++) << bits; 121 0 : bits += 8; 122 0 : hold += (unsigned long)(*in++) << bits; 123 0 : bits += 8; 124 : } 125 0 : this = lcode[hold & lmask]; 126 0 : dolen: 127 0 : op = (unsigned)(this.bits); 128 0 : hold >>= op; 129 0 : bits -= op; 130 0 : op = (unsigned)(this.op); 131 0 : if (op == 0) { /* literal */ 132 0 : *out++ = (unsigned char)(this.val); 133 : } 134 0 : else if (op & 16) { /* length base */ 135 0 : len = (unsigned)(this.val); 136 0 : op &= 15; /* number of extra bits */ 137 0 : if (op) { 138 0 : if (bits < op) { 139 0 : hold += (unsigned long)(*in++) << bits; 140 0 : bits += 8; 141 : } 142 0 : len += (unsigned)hold & ((1U << op) - 1); 143 0 : hold >>= op; 144 0 : bits -= op; 145 : } 146 0 : if (bits < 15) { 147 0 : hold += (unsigned long)(*in++) << bits; 148 0 : bits += 8; 149 0 : hold += (unsigned long)(*in++) << bits; 150 0 : bits += 8; 151 : } 152 0 : this = dcode[hold & dmask]; 153 0 : dodist: 154 0 : op = (unsigned)(this.bits); 155 0 : hold >>= op; 156 0 : bits -= op; 157 0 : op = (unsigned)(this.op); 158 0 : if (op & 16) { /* distance base */ 159 0 : dist = (unsigned)(this.val); 160 0 : op &= 15; /* number of extra bits */ 161 0 : if (bits < op) { 162 0 : hold += (unsigned long)(*in++) << bits; 163 0 : bits += 8; 164 0 : if (bits < op) { 165 0 : hold += (unsigned long)(*in++) << bits; 166 0 : bits += 8; 167 : } 168 : } 169 0 : dist += (unsigned)hold & ((1U << op) - 1); 170 : #ifdef INFLATE_STRICT 171 : if (dist > dmax) { 172 : strm->msg = (char *)"invalid distance too far back"; 173 : state->mode = BAD; 174 : break; 175 : } 176 : #endif 177 0 : hold >>= op; 178 0 : bits -= op; 179 0 : op = (unsigned)(out - beg); /* max distance in output */ 180 0 : if (dist > op) { /* see if copy from window */ 181 0 : op = dist - op; /* distance back in window */ 182 0 : if (op > whave) { 183 0 : strm->msg = (char *)"invalid distance too far back"; 184 0 : state->mode = BAD; 185 0 : break; 186 : } 187 0 : from = window; 188 0 : if (write == 0) { /* very common case */ 189 0 : from += wsize - op; 190 0 : if (op < len) { /* some from window */ 191 0 : len -= op; 192 0 : do { 193 0 : *out++ = *from++; 194 0 : } while (--op); 195 0 : from = out - dist; /* rest from output */ 196 : } 197 : } 198 0 : else if (write < op) { /* wrap around window */ 199 0 : from += wsize + write - op; 200 0 : op -= write; 201 0 : if (op < len) { /* some from end of window */ 202 0 : len -= op; 203 0 : do { 204 0 : *out++ = *from++; 205 0 : } while (--op); 206 0 : from = window; 207 0 : if (write < len) { /* some from start of window */ 208 0 : op = write; 209 0 : len -= op; 210 0 : do { 211 0 : *out++ = *from++; 212 0 : } while (--op); 213 0 : from = out - dist; /* rest from output */ 214 : } 215 : } 216 : } 217 : else { /* contiguous in window */ 218 0 : from += write - op; 219 0 : if (op < len) { /* some from window */ 220 0 : len -= op; 221 0 : do { 222 0 : *out++ = *from++; 223 0 : } while (--op); 224 0 : from = out - dist; /* rest from output */ 225 : } 226 : } 227 0 : while (len > 2) { 228 0 : *out++ = *from++; 229 0 : *out++ = *from++; 230 0 : *out++ = *from++; 231 0 : len -= 3; 232 : } 233 0 : if (len) { 234 0 : *out++ = *from++; 235 0 : if (len > 1) 236 0 : *out++ = *from++; 237 : } 238 : } 239 : else { 240 0 : unsigned short *sout; 241 0 : unsigned long loops; 242 : 243 0 : from = out - dist; /* copy direct from output */ 244 : /* minimum length is three */ 245 : /* Align out addr */ 246 0 : if (!((long)(out - 1) & 1)) { 247 0 : *out++ = *from++; 248 0 : len--; 249 : } 250 0 : sout = (unsigned short *)(out); 251 0 : if (dist > 2) { 252 0 : unsigned short *sfrom; 253 : 254 0 : sfrom = (unsigned short *)(from); 255 0 : loops = len >> 1; 256 0 : do 257 : #ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS 258 0 : *sout++ = *sfrom++; 259 : #else 260 : *sout++ = get_unaligned16(sfrom++); 261 : #endif 262 0 : while (--loops); 263 : out = (unsigned char *)sout; 264 : from = (unsigned char *)sfrom; 265 : } else { /* dist == 1 or dist == 2 */ 266 0 : unsigned short pat16; 267 : 268 0 : pat16 = *(sout-1); 269 0 : if (dist == 1) { 270 0 : union uu mm; 271 : /* copy one char pattern to both bytes */ 272 0 : mm.us = pat16; 273 0 : mm.b[0] = mm.b[1]; 274 0 : pat16 = mm.us; 275 : } 276 0 : loops = len >> 1; 277 0 : do 278 0 : *sout++ = pat16; 279 0 : while (--loops); 280 : out = (unsigned char *)sout; 281 : } 282 0 : if (len & 1) 283 0 : *out++ = *from++; 284 : } 285 : } 286 0 : else if ((op & 64) == 0) { /* 2nd level distance code */ 287 0 : this = dcode[this.val + (hold & ((1U << op) - 1))]; 288 0 : goto dodist; 289 : } 290 : else { 291 0 : strm->msg = (char *)"invalid distance code"; 292 0 : state->mode = BAD; 293 0 : break; 294 : } 295 : } 296 0 : else if ((op & 64) == 0) { /* 2nd level length code */ 297 0 : this = lcode[this.val + (hold & ((1U << op) - 1))]; 298 0 : goto dolen; 299 : } 300 0 : else if (op & 32) { /* end-of-block */ 301 0 : state->mode = TYPE; 302 0 : break; 303 : } 304 : else { 305 0 : strm->msg = (char *)"invalid literal/length code"; 306 0 : state->mode = BAD; 307 0 : break; 308 : } 309 0 : } while (in < last && out < end); 310 : 311 : /* return unused bytes (on entry, bits < 8, so in won't go too far back) */ 312 0 : len = bits >> 3; 313 0 : in -= len; 314 0 : bits -= len << 3; 315 0 : hold &= (1U << bits) - 1; 316 : 317 : /* update state and return */ 318 0 : strm->next_in = in; 319 0 : strm->next_out = out; 320 0 : strm->avail_in = (unsigned)(in < last ? 5 + (last - in) : 5 - (in - last)); 321 0 : strm->avail_out = (unsigned)(out < end ? 322 0 : 257 + (end - out) : 257 - (out - end)); 323 0 : state->hold = hold; 324 0 : state->bits = bits; 325 0 : return; 326 : } 327 : 328 : /* 329 : inflate_fast() speedups that turned out slower (on a PowerPC G3 750CXe): 330 : - Using bit fields for code structure 331 : - Different op definition to avoid & for extra bits (do & for table bits) 332 : - Three separate decoding do-loops for direct, window, and write == 0 333 : - Special case for distance > 1 copies to do overlapped load and store copy 334 : - Explicit branch predictions (based on measured branch probabilities) 335 : - Deferring match copy and interspersed it with decoding subsequent codes 336 : - Swapping literal/length else 337 : - Swapping window/direct else 338 : - Larger unrolled copy loops (three is about right) 339 : - Moving len -= 3 statement into middle of loop 340 : */ 341 : 342 : #endif /* !ASMINF */