1 // Baseline JPEG decoder
2 
3 module imageformats.jpeg;
4 
5 import std.math         : ceil;
6 import std.bitmanip     : bigEndianToNative;
7 import std.stdio        : File, SEEK_SET, SEEK_CUR;
8 import std.typecons     : scoped;
9 import core.stdc.stdlib : alloca;
10 import imageformats;
11 
12 private:
13 
14 public bool detect_jpeg(Reader stream) {
15     try {
16         int w, h, c;
17         read_jpeg_info(stream, w, h, c);
18         return true;
19     } catch (Throwable) {
20         return false;
21     } finally {
22         stream.seek(0, SEEK_SET);
23     }
24 }
25 
26 ///
27 public IFImage read_jpeg(in char[] filename, long req_chans = 0) {
28     auto reader = scoped!FileReader(filename);
29     return read_jpeg(reader, req_chans);
30 }
31 
32 ///
33 public IFImage read_jpeg_from_mem(in ubyte[] source, long req_chans = 0) {
34     auto reader = scoped!MemReader(source);
35     return read_jpeg(reader, req_chans);
36 }
37 
38 ///
39 public void read_jpeg_info(in char[] filename, out int w, out int h, out int chans) {
40     auto reader = scoped!FileReader(filename);
41     return read_jpeg_info(reader, w, h, chans);
42 }
43 
44 ///
45 public void read_jpeg_info_from_mem(in ubyte[] source, out int w, out int h, out int chans) {
46     auto reader = scoped!MemReader(source);
47     return read_jpeg_info(reader, w, h, chans);
48 }
49 
50 package IFImage read_jpeg(Reader stream, long req_chans = 0) {
51     if (req_chans < 0 || 4 < req_chans)
52         throw new ImageIOException("come on...");
53 
54     // SOI
55     ubyte[2] tmp = void;
56     stream.readExact(tmp, tmp.length);
57     if (tmp[0..2] != jpeg_soi_marker)
58         throw new ImageIOException("not JPEG");
59 
60     JPEG_Decoder dc = { stream: stream };
61 
62     read_markers(dc);   // reads until first scan header or eoi
63     if (dc.eoi_reached)
64         throw new ImageIOException("no image data");
65 
66     dc.tgt_chans = (req_chans == 0) ? dc.num_comps : cast(int) req_chans;
67 
68     IFImage result = {
69         w      : dc.width,
70         h      : dc.height,
71         c      : cast(ColFmt) dc.tgt_chans,
72         pixels : decode_jpeg(dc),
73     };
74     return result;
75 }
76 
77 struct JPEG_Decoder {
78     Reader stream;
79 
80     bool has_frame_header = false;
81     bool eoi_reached = false;
82 
83     ubyte[64][4] qtables;
84     HuffTab[2] ac_tables;
85     HuffTab[2] dc_tables;
86 
87     ubyte cb;  // current byte (next bit always at MSB)
88     int bits_left;   // num of unused bits in cb
89 
90     bool correct_comp_ids;
91     Component[3] comps;
92     ubyte num_comps;
93     int tgt_chans;
94 
95     int width, height;
96 
97     int hmax, vmax;
98 
99     ushort restart_interval;    // number of MCUs in restart interval
100 
101     // image component
102     struct Component {
103         ubyte sfx, sfy;   // sampling factors, aka. h and v
104         size_t x, y;       // total num of samples, without fill samples
105         ubyte qtable;
106         ubyte ac_table;
107         ubyte dc_table;
108         int pred;                // dc prediction
109         ubyte[] data;   // reconstructed samples
110     }
111 
112     int num_mcu_x;
113     int num_mcu_y;
114 }
115 
116 struct HuffTab {
117     ubyte[256] values;
118     ubyte[257] sizes;
119     short[16] mincode, maxcode;
120     short[16] valptr;
121 }
122 
123 enum Marker : ubyte {
124     SOI = 0xd8,     // start of image
125     SOF0 = 0xc0,    // start of frame / baseline DCT
126     //SOF1 = 0xc1,    // start of frame / extended seq.
127     //SOF2 = 0xc2,    // start of frame / progressive DCT
128     SOF3 = 0xc3,    // start of frame / lossless
129     SOF9 = 0xc9,    // start of frame / extended seq., arithmetic
130     SOF11 = 0xcb,    // start of frame / lossless, arithmetic
131     DHT = 0xc4,     // define huffman tables
132     DQT = 0xdb,     // define quantization tables
133     DRI = 0xdd,     // define restart interval
134     SOS = 0xda,     // start of scan
135     DNL = 0xdc,     // define number of lines
136     RST0 = 0xd0,    // restart entropy coded data
137     // ...
138     RST7 = 0xd7,    // restart entropy coded data
139     APP0 = 0xe0,    // application 0 segment
140     // ...
141     APPf = 0xef,    // application f segment
142     //DAC = 0xcc,     // define arithmetic conditioning table
143     COM = 0xfe,     // comment
144     EOI = 0xd9,     // end of image
145 }
146 
147 void read_markers(ref JPEG_Decoder dc) {
148     bool has_next_scan_header = false;
149     while (!has_next_scan_header && !dc.eoi_reached) {
150         ubyte[2] marker;
151         dc.stream.readExact(marker, 2);
152 
153         if (marker[0] != 0xff)
154             throw new ImageIOException("no marker");
155         while (marker[1] == 0xff)
156             dc.stream.readExact(marker[1..$], 1);
157 
158         debug(DebugJPEG) writefln("marker: %s (%1$x)\t", cast(Marker) marker[1]);
159         switch (marker[1]) with (Marker) {
160             case DHT: dc.read_huffman_tables(); break;
161             case DQT: dc.read_quantization_tables(); break;
162             case SOF0:
163                 if (dc.has_frame_header)
164                     throw new ImageIOException("extra frame header");
165                 debug(DebugJPEG) writeln();
166                 dc.read_frame_header();
167                 dc.has_frame_header = true;
168                 break;
169             case SOS:
170                 if (!dc.has_frame_header)
171                     throw new ImageIOException("no frame header");
172                 dc.read_scan_header();
173                 has_next_scan_header = true;
174                 break;
175             case DRI: dc.read_restart_interval(); break;
176             case EOI: dc.eoi_reached = true; break;
177             case APP0: .. case APPf: goto case;
178             case COM:
179                 debug(DebugJPEG) writefln("-> skipping segment");
180                 ubyte[2] lenbuf = void;
181                 dc.stream.readExact(lenbuf, lenbuf.length);
182                 int len = bigEndianToNative!ushort(lenbuf) - 2;
183                 dc.stream.seek(len, SEEK_CUR);
184                 break;
185             default: throw new ImageIOException("invalid / unsupported marker");
186         }
187     }
188 }
189 
190 // DHT -- define huffman tables
191 void read_huffman_tables(ref JPEG_Decoder dc) {
192     ubyte[19] tmp = void;
193     dc.stream.readExact(tmp, 2);
194     int len = bigEndianToNative!ushort(tmp[0..2]);
195     len -= 2;
196 
197     while (0 < len) {
198         dc.stream.readExact(tmp, 17);   // info byte & the BITS
199         ubyte table_slot = tmp[0] & 0xf; // must be 0 or 1 for baseline
200         ubyte table_class = tmp[0] >> 4;  // 0 = dc table, 1 = ac table
201         if (1 < table_slot || 1 < table_class)
202             throw new ImageIOException("invalid / not supported");
203 
204         // compute total number of huffman codes
205         int mt = 0;
206         foreach (i; 1..17)
207             mt += tmp[i];
208         if (256 < mt)   // TODO where in the spec?
209             throw new ImageIOException("invalid / not supported");
210 
211         if (table_class == 0) {
212             dc.stream.readExact(dc.dc_tables[table_slot].values, mt);
213             derive_table(dc.dc_tables[table_slot], tmp[1..17]);
214         } else {
215             dc.stream.readExact(dc.ac_tables[table_slot].values, mt);
216             derive_table(dc.ac_tables[table_slot], tmp[1..17]);
217         }
218 
219         len -= 17 + mt;
220     }
221 }
222 
223 // num_values is the BITS
224 void derive_table(ref HuffTab table, in ref ubyte[16] num_values) {
225     short[256] codes;
226 
227     int k = 0;
228     foreach (i; 0..16) {
229         foreach (j; 0..num_values[i]) {
230             table.sizes[k] = cast(ubyte) (i + 1);
231             ++k;
232         }
233     }
234     table.sizes[k] = 0;
235 
236     k = 0;
237     short code = 0;
238     ubyte si = table.sizes[k];
239     while (true) {
240         do {
241             codes[k] = code;
242             ++code;
243             ++k;
244         } while (si == table.sizes[k]);
245 
246         if (table.sizes[k] == 0)
247             break;
248 
249         debug(DebugJPEG) assert(si < table.sizes[k]);
250         do {
251             code <<= 1;
252             ++si;
253         } while (si != table.sizes[k]);
254     }
255 
256     derive_mincode_maxcode_valptr(
257         table.mincode, table.maxcode, table.valptr,
258         codes, num_values
259     );
260 }
261 
262 // F.15
263 void derive_mincode_maxcode_valptr(
264         ref short[16] mincode, ref short[16] maxcode, ref short[16] valptr,
265         in ref short[256] codes, in ref ubyte[16] num_values) pure
266 {
267     mincode[] = -1;
268     maxcode[] = -1;
269     valptr[] = -1;
270 
271     int j = 0;
272     foreach (i; 0..16) {
273         if (num_values[i] != 0) {
274             valptr[i] = cast(short) j;
275             mincode[i] = codes[j];
276             j += num_values[i] - 1;
277             maxcode[i] = codes[j];
278             j += 1;
279         }
280     }
281 }
282 
283 // DQT -- define quantization tables
284 void read_quantization_tables(ref JPEG_Decoder dc) {
285     ubyte[2] tmp = void;
286     dc.stream.readExact(tmp, 2);
287     int len = bigEndianToNative!ushort(tmp[0..2]);
288     if (len % 65 != 2)
289         throw new ImageIOException("invalid / not supported");
290     len -= 2;
291     while (0 < len) {
292         dc.stream.readExact(tmp, 1);
293         ubyte table_info = tmp[0];
294         ubyte table_slot = table_info & 0xf;
295         ubyte precision = table_info >> 4;  // 0 = 8 bit, 1 = 16 bit
296         if (3 < table_slot || precision != 0)    // only 8 bit for baseline
297             throw new ImageIOException("invalid / not supported");
298 
299         dc.stream.readExact(dc.qtables[table_slot], 64);
300         len -= 1 + 64;
301     }
302 }
303 
304 // SOF0 -- start of frame
305 void read_frame_header(ref JPEG_Decoder dc) {
306     ubyte[9] tmp = void;
307     dc.stream.readExact(tmp, 8);
308     int len = bigEndianToNative!ushort(tmp[0..2]);  // 8 + num_comps*3
309     ubyte precision = tmp[2];
310     dc.height = bigEndianToNative!ushort(tmp[3..5]);
311     dc.width = bigEndianToNative!ushort(tmp[5..7]);
312     dc.num_comps = tmp[7];
313 
314     if ( precision != 8 ||
315          (dc.num_comps != 1 && dc.num_comps != 3) ||
316          len != 8 + dc.num_comps*3 )
317         throw new ImageIOException("invalid / not supported");
318 
319     dc.hmax = 0;
320     dc.vmax = 0;
321     int mcu_du = 0; // data units in one mcu
322     dc.stream.readExact(tmp, dc.num_comps*3);
323     foreach (i; 0..dc.num_comps) {
324         ubyte ci = tmp[i*3];
325         // JFIF says ci should be i+1, but there are images where ci is i. Normalize ids
326         // so that ci == i, always. So much for standards...
327         if (i == 0) { dc.correct_comp_ids = ci == i+1; }
328         if ((dc.correct_comp_ids && ci != i+1)
329         || (!dc.correct_comp_ids && ci != i))
330             throw new ImageIOException("invalid component id");
331 
332         auto comp = &dc.comps[i];
333         ubyte sampling_factors = tmp[i*3 + 1];
334         comp.sfx = sampling_factors >> 4;
335         comp.sfy = sampling_factors & 0xf;
336         comp.qtable = tmp[i*3 + 2];
337         if ( comp.sfy < 1 || 4 < comp.sfy ||
338              comp.sfx < 1 || 4 < comp.sfx ||
339              3 < comp.qtable )
340             throw new ImageIOException("invalid / not supported");
341 
342         if (dc.hmax < comp.sfx) dc.hmax = comp.sfx;
343         if (dc.vmax < comp.sfy) dc.vmax = comp.sfy;
344 
345         mcu_du += comp.sfx * comp.sfy;
346     }
347     if (10 < mcu_du)
348         throw new ImageIOException("invalid / not supported");
349 
350     foreach (i; 0..dc.num_comps) {
351         dc.comps[i].x = cast(size_t) ceil(dc.width * (cast(double) dc.comps[i].sfx / dc.hmax));
352         dc.comps[i].y = cast(size_t) ceil(dc.height * (cast(double) dc.comps[i].sfy / dc.vmax));
353 
354         debug(DebugJPEG) writefln("%d comp %d sfx/sfy: %d/%d", i, dc.comps[i].id,
355                                                                   dc.comps[i].sfx,
356                                                                   dc.comps[i].sfy);
357     }
358 
359     size_t mcu_w = dc.hmax * 8;
360     size_t mcu_h = dc.vmax * 8;
361     dc.num_mcu_x = cast(int) ((dc.width + mcu_w-1) / mcu_w);
362     dc.num_mcu_y = cast(int) ((dc.height + mcu_h-1) / mcu_h);
363 
364     debug(DebugJPEG) {
365         writefln("\tlen: %s", len);
366         writefln("\tprecision: %s", precision);
367         writefln("\tdimensions: %s x %s", dc.width, dc.height);
368         writefln("\tnum_comps: %s", dc.num_comps);
369         writefln("\tnum_mcu_x: %s", dc.num_mcu_x);
370         writefln("\tnum_mcu_y: %s", dc.num_mcu_y);
371     }
372 
373 }
374 
375 // SOS -- start of scan
376 void read_scan_header(ref JPEG_Decoder dc) {
377     ubyte[3] tmp = void;
378     dc.stream.readExact(tmp, tmp.length);
379     ushort len = bigEndianToNative!ushort(tmp[0..2]);
380     ubyte num_scan_comps = tmp[2];
381 
382     if ( num_scan_comps != dc.num_comps ||
383          len != (6+num_scan_comps*2) )
384         throw new ImageIOException("invalid / not supported");
385 
386     auto buf = (cast(ubyte*) alloca((len-3) * ubyte.sizeof))[0..len-3];
387     dc.stream.readExact(buf, buf.length);
388 
389     foreach (i; 0..num_scan_comps) {
390         uint ci = buf[i*2] - ((dc.correct_comp_ids) ? 1 : 0);
391         if (ci >= dc.num_comps)
392             throw new ImageIOException("invalid component id");
393 
394         ubyte tables = buf[i*2+1];
395         dc.comps[ci].dc_table = tables >> 4;
396         dc.comps[ci].ac_table = tables & 0xf;
397         if ( 1 < dc.comps[ci].dc_table ||
398              1 < dc.comps[ci].ac_table )
399             throw new ImageIOException("invalid / not supported");
400     }
401 
402     // ignore these
403     //ubyte spectral_start = buf[$-3];
404     //ubyte spectral_end = buf[$-2];
405     //ubyte approx = buf[$-1];
406 }
407 
408 void read_restart_interval(ref JPEG_Decoder dc) {
409     ubyte[4] tmp = void;
410     dc.stream.readExact(tmp, tmp.length);
411     ushort len = bigEndianToNative!ushort(tmp[0..2]);
412     if (len != 4)
413         throw new ImageIOException("invalid / not supported");
414     dc.restart_interval = bigEndianToNative!ushort(tmp[2..4]);
415     debug(DebugJPEG) writeln("restart interval set to: ", dc.restart_interval);
416 }
417 
418 // reads data after the SOS segment
419 ubyte[] decode_jpeg(ref JPEG_Decoder dc) {
420     foreach (ref comp; dc.comps[0..dc.num_comps])
421         comp.data = new ubyte[dc.num_mcu_x*comp.sfx*8*dc.num_mcu_y*comp.sfy*8];
422 
423     // E.7 -- Multiple scans are for progressive images which are not supported
424     //while (!dc.eoi_reached) {
425         decode_scan(dc);    // E.2.3
426         //read_markers(dc);   // reads until next scan header or eoi
427     //}
428 
429     // throw away fill samples and convert to target format
430     return dc.reconstruct();
431 }
432 
433 // E.2.3 and E.8 and E.9
434 void decode_scan(ref JPEG_Decoder dc) {
435     debug(DebugJPEG) writeln("decode scan...");
436 
437     int intervals, mcus;
438     if (0 < dc.restart_interval) {
439         int total_mcus = dc.num_mcu_x * dc.num_mcu_y;
440         intervals = (total_mcus + dc.restart_interval-1) / dc.restart_interval;
441         mcus = dc.restart_interval;
442     } else {
443         intervals = 1;
444         mcus = dc.num_mcu_x * dc.num_mcu_y;
445     }
446     debug(DebugJPEG) writeln("intervals: ", intervals);
447 
448     foreach (mcu_j; 0 .. dc.num_mcu_y) {
449         foreach (mcu_i; 0 .. dc.num_mcu_x) {
450 
451             // decode mcu
452             foreach (c; 0..dc.num_comps) {
453                 auto comp = &dc.comps[c];
454                 foreach (du_j; 0 .. comp.sfy) {
455                     foreach (du_i; 0 .. comp.sfx) {
456                         // decode entropy, dequantize & dezigzag
457                         short[64] data = decode_block(dc, *comp, dc.qtables[comp.qtable]);
458                         // idct & level-shift
459                         int outx = (mcu_i * comp.sfx + du_i) * 8;
460                         int outy = (mcu_j * comp.sfy + du_j) * 8;
461                         int dst_stride = dc.num_mcu_x * comp.sfx*8;
462                         ubyte* dst = comp.data.ptr + outy*dst_stride + outx;
463                         stbi__idct_block(dst, dst_stride, data);
464                     }
465                 }
466             }
467 
468             --mcus;
469 
470             if (!mcus) {
471                 --intervals;
472                 if (!intervals)
473                     return;
474 
475                 read_restart(dc.stream);    // RSTx marker
476 
477                 if (intervals == 1) {
478                     // last interval, may have fewer MCUs than defined by DRI
479                     mcus = (dc.num_mcu_y - mcu_j - 1) * dc.num_mcu_x + dc.num_mcu_x - mcu_i - 1;
480                 } else {
481                     mcus = dc.restart_interval;
482                 }
483 
484                 // reset decoder
485                 dc.cb = 0;
486                 dc.bits_left = 0;
487                 foreach (k; 0..dc.num_comps)
488                     dc.comps[k].pred = 0;
489             }
490 
491         }
492     }
493 }
494 
495 // RST0-RST7
496 void read_restart(Reader stream) {
497     ubyte[2] tmp = void;
498     stream.readExact(tmp, tmp.length);
499     if (tmp[0] != 0xff || tmp[1] < Marker.RST0 || Marker.RST7 < tmp[1])
500         throw new ImageIOException("reset marker missing");
501     // the markers should cycle 0 through 7, could check that here...
502 }
503 
504 immutable ubyte[64] dezigzag = [
505      0,  1,  8, 16,  9,  2,  3, 10,
506     17, 24, 32, 25, 18, 11,  4,  5,
507     12, 19, 26, 33, 40, 48, 41, 34,
508     27, 20, 13,  6,  7, 14, 21, 28,
509     35, 42, 49, 56, 57, 50, 43, 36,
510     29, 22, 15, 23, 30, 37, 44, 51,
511     58, 59, 52, 45, 38, 31, 39, 46,
512     53, 60, 61, 54, 47, 55, 62, 63,
513 ];
514 
515 // decode entropy, dequantize & dezigzag (see section F.2)
516 short[64] decode_block(ref JPEG_Decoder dc, ref JPEG_Decoder.Component comp,
517                                                     in ref ubyte[64] qtable)
518 {
519     short[64] res = 0;
520 
521     ubyte t = decode_huff(dc, dc.dc_tables[comp.dc_table]);
522     int diff = t ? dc.receive_and_extend(t) : 0;
523 
524     comp.pred = comp.pred + diff;
525     res[0] = cast(short) (comp.pred * qtable[0]);
526 
527     int k = 1;
528     do {
529         ubyte rs = decode_huff(dc, dc.ac_tables[comp.ac_table]);
530         ubyte rrrr = rs >> 4;
531         ubyte ssss = rs & 0xf;
532 
533         if (ssss == 0) {
534             if (rrrr != 0xf)
535                 break;      // end of block
536             k += 16;    // run length is 16
537             continue;
538         }
539 
540         k += rrrr;
541 
542         if (63 < k)
543             throw new ImageIOException("corrupt block");
544         res[dezigzag[k]] = cast(short) (dc.receive_and_extend(ssss) * qtable[k]);
545         k += 1;
546     } while (k < 64);
547 
548     return res;
549 }
550 
551 int receive_and_extend(ref JPEG_Decoder dc, ubyte s) {
552     // receive
553     int symbol = 0;
554     foreach (_; 0..s)
555         symbol = (symbol << 1) + nextbit(dc);
556     // extend
557     int vt = 1 << (s-1);
558     if (symbol < vt)
559         return symbol + (-1 << s) + 1;
560     return symbol;
561 }
562 
563 // F.16 -- the DECODE
564 ubyte decode_huff(ref JPEG_Decoder dc, in ref HuffTab tab) {
565     short code = nextbit(dc);
566 
567     int i = 0;
568     while (tab.maxcode[i] < code) {
569         code = cast(short) ((code << 1) + nextbit(dc));
570         i += 1;
571         if (tab.maxcode.length <= i)
572             throw new ImageIOException("corrupt huffman coding");
573     }
574     int j = tab.valptr[i] + code - tab.mincode[i];
575     if (tab.values.length <= cast(uint) j)
576         throw new ImageIOException("corrupt huffman coding");
577     return tab.values[j];
578 }
579 
580 // F.2.2.5 and F.18
581 ubyte nextbit(ref JPEG_Decoder dc) {
582     if (!dc.bits_left) {
583         ubyte[1] bytebuf;
584         dc.stream.readExact(bytebuf, 1);
585         dc.cb = bytebuf[0];
586         dc.bits_left = 8;
587 
588         if (dc.cb == 0xff) {
589             dc.stream.readExact(bytebuf, 1);
590             if (bytebuf[0] != 0x0) {
591                 throw new ImageIOException("unexpected marker");
592             }
593         }
594     }
595 
596     ubyte r = dc.cb >> 7;
597     dc.cb <<= 1;
598     dc.bits_left -= 1;
599     return r;
600 }
601 
602 ubyte[] reconstruct(in ref JPEG_Decoder dc) {
603     auto result = new ubyte[dc.width * dc.height * dc.tgt_chans];
604 
605     switch (dc.num_comps * 10 + dc.tgt_chans) {
606         case 34, 33:
607             // Use specialized bilinear filtering functions for the frequent cases where
608             // Cb & Cr channels have half resolution.
609             if ((dc.comps[0].sfx <= 2 && dc.comps[0].sfy <= 2)
610             && (dc.comps[0].sfx + dc.comps[0].sfy >= 3)
611             && dc.comps[1].sfx == 1 && dc.comps[1].sfy == 1
612             && dc.comps[2].sfx == 1 && dc.comps[2].sfy == 1) {
613                 void function(in ubyte[], in ubyte[], ubyte[]) resample;
614                 switch (dc.comps[0].sfx * 10 + dc.comps[0].sfy) {
615                     case 22: resample = &upsample_h2_v2; break;
616                     case 21: resample = &upsample_h2_v1; break;
617                     case 12: resample = &upsample_h1_v2; break;
618                     default: throw new ImageIOException("bug");
619                 }
620 
621                 auto comp1 = new ubyte[](dc.width);
622                 auto comp2 = new ubyte[](dc.width);
623 
624                 size_t s = 0;
625                 size_t di = 0;
626                 foreach (j; 0 .. dc.height) {
627                     size_t mi = j / dc.comps[0].sfy;
628                     size_t si = (mi == 0 || mi >= (dc.height-1)/dc.comps[0].sfy)
629                               ? mi : mi - 1 + s * 2;
630                     s = s ^ 1;
631 
632                     size_t cs = dc.num_mcu_x * dc.comps[1].sfx * 8;
633                     size_t cl0 = mi * cs;
634                     size_t cl1 = si * cs;
635                     resample(dc.comps[1].data[cl0 .. cl0 + dc.comps[1].x],
636                              dc.comps[1].data[cl1 .. cl1 + dc.comps[1].x],
637                              comp1[]);
638                     resample(dc.comps[2].data[cl0 .. cl0 + dc.comps[2].x],
639                              dc.comps[2].data[cl1 .. cl1 + dc.comps[2].x],
640                              comp2[]);
641 
642                     foreach (i; 0 .. dc.width) {
643                         result[di .. di+3] = ycbcr_to_rgb(
644                             dc.comps[0].data[j * dc.num_mcu_x * dc.comps[0].sfx * 8 + i],
645                             comp1[i],
646                             comp2[i],
647                         );
648                         if (dc.tgt_chans == 4)
649                             result[di+3] = 255;
650                         di += dc.tgt_chans;
651                     }
652                 }
653 
654                 return result;
655             }
656 
657             foreach (const ref comp; dc.comps[0..dc.num_comps]) {
658                 if (comp.sfx != dc.hmax || comp.sfy != dc.vmax)
659                     return dc.upsample(result);
660             }
661 
662             size_t si, di;
663             foreach (j; 0 .. dc.height) {
664                 foreach (i; 0 .. dc.width) {
665                     result[di .. di+3] = ycbcr_to_rgb(
666                         dc.comps[0].data[si+i],
667                         dc.comps[1].data[si+i],
668                         dc.comps[2].data[si+i],
669                     );
670                     if (dc.tgt_chans == 4)
671                         result[di+3] = 255;
672                     di += dc.tgt_chans;
673                 }
674                 si += dc.num_mcu_x * dc.comps[0].sfx * 8;
675             }
676             return result;
677         case 32, 12, 31, 11:
678             const comp = &dc.comps[0];
679             if (comp.sfx == dc.hmax && comp.sfy == dc.vmax) {
680                 size_t si, di;
681                 if (dc.tgt_chans == 2) {
682                     foreach (j; 0 .. dc.height) {
683                         foreach (i; 0 .. dc.width) {
684                             result[di++] = comp.data[si+i];
685                             result[di++] = 255;
686                         }
687                         si += dc.num_mcu_x * comp.sfx * 8;
688                     }
689                 } else {
690                     foreach (j; 0 .. dc.height) {
691                         result[di .. di+dc.width] = comp.data[si .. si+dc.width];
692                         si += dc.num_mcu_x * comp.sfx * 8;
693                         di += dc.width;
694                     }
695                 }
696                 return result;
697             } else {
698                 // need to resample (haven't tested this...)
699                 return dc.upsample_luma(result);
700             }
701         case 14, 13:
702             const comp = &dc.comps[0];
703             size_t si, di;
704             foreach (j; 0 .. dc.height) {
705                 foreach (i; 0 .. dc.width) {
706                     result[di .. di+3] = comp.data[si+i];
707                     if (dc.tgt_chans == 4)
708                         result[di+3] = 255;
709                     di += dc.tgt_chans;
710                 }
711                 si += dc.num_mcu_x * comp.sfx * 8;
712             }
713             return result;
714         default: assert(0);
715     }
716 }
717 
718 void upsample_h2_v2(in ubyte[] line0, in ubyte[] line1, ubyte[] result) {
719     ubyte mix(ubyte mm, ubyte ms, ubyte sm, ubyte ss) {
720        return cast(ubyte) (( cast(uint) mm * 3 * 3
721                            + cast(uint) ms * 3 * 1
722                            + cast(uint) sm * 1 * 3
723                            + cast(uint) ss * 1 * 1
724                            + 8) / 16);
725     }
726 
727     result[0] = cast(ubyte) (( cast(uint) line0[0] * 3
728                              + cast(uint) line1[0] * 1
729                              + 2) / 4);
730     if (line0.length == 1) return;
731     result[1] = mix(line0[0], line0[1], line1[0], line1[1]);
732 
733     size_t di = 2;
734     foreach (i; 1 .. line0.length) {
735         result[di] = mix(line0[i], line0[i-1], line1[i], line1[i-1]);
736         di += 1;
737         if (i == line0.length-1) {
738             if (di < result.length) {
739                 result[di] = cast(ubyte) (( cast(uint) line0[i] * 3
740                                           + cast(uint) line1[i] * 1
741                                           + 2) / 4);
742             }
743             return;
744         }
745         result[di] = mix(line0[i], line0[i+1], line1[i], line1[i+1]);
746         di += 1;
747     }
748 }
749 
750 void upsample_h2_v1(in ubyte[] line0, in ubyte[] _line1, ubyte[] result) {
751     result[0] = line0[0];
752     if (line0.length == 1) return;
753     result[1] = cast(ubyte) (( cast(uint) line0[0] * 3
754                              + cast(uint) line0[1] * 1
755                              + 2) / 4);
756     size_t di = 2;
757     foreach (i; 1 .. line0.length) {
758         result[di] = cast(ubyte) (( cast(uint) line0[i-1] * 1
759                                   + cast(uint) line0[i+0] * 3
760                                   + 2) / 4);
761         di += 1;
762         if (i == line0.length-1) {
763             if (di < result.length) result[di] = line0[i];
764             return;
765         }
766         result[di] = cast(ubyte) (( cast(uint) line0[i+0] * 3
767                                   + cast(uint) line0[i+1] * 1
768                                   + 2) / 4);
769         di += 1;
770     }
771 }
772 
773 void upsample_h1_v2(in ubyte[] line0, in ubyte[] line1, ubyte[] result) {
774     foreach (i; 0 .. result.length) {
775         result[i] = cast(ubyte) (( cast(uint) line0[i] * 3
776                                  + cast(uint) line1[i] * 1
777                                  + 2) / 4);
778     }
779 }
780 
781 // Nearest neighbor
782 ubyte[] upsample_luma(in ref JPEG_Decoder dc, ubyte[] result) {
783     const size_t stride0 = dc.num_mcu_x * dc.comps[0].sfx * 8;
784     const y_step0 = cast(float) dc.comps[0].sfy / cast(float) dc.vmax;
785     const x_step0 = cast(float) dc.comps[0].sfx / cast(float) dc.hmax;
786 
787     float y0 = y_step0 * 0.5f;
788     size_t y0i = 0;
789 
790     size_t di;
791 
792     foreach (j; 0 .. dc.height) {
793         float x0 = x_step0 * 0.5f;
794         size_t x0i = 0;
795         foreach (i; 0 .. dc.width) {
796             result[di] = dc.comps[0].data[y0i + x0i];
797             if (dc.tgt_chans == 2)
798                 result[di+1] = 255;
799             di += dc.tgt_chans;
800             x0 += x_step0;
801             if (x0 >= 1.0f) { x0 -= 1.0f; x0i += 1; }
802         }
803         y0 += y_step0;
804         if (y0 >= 1.0f) { y0 -= 1.0f; y0i += stride0; }
805     }
806     return result;
807 }
808 
809 // Nearest neighbor
810 ubyte[] upsample(in ref JPEG_Decoder dc, ubyte[] result) {
811     const size_t stride0 = dc.num_mcu_x * dc.comps[0].sfx * 8;
812     const size_t stride1 = dc.num_mcu_x * dc.comps[1].sfx * 8;
813     const size_t stride2 = dc.num_mcu_x * dc.comps[2].sfx * 8;
814 
815     const y_step0 = cast(float) dc.comps[0].sfy / cast(float) dc.vmax;
816     const y_step1 = cast(float) dc.comps[1].sfy / cast(float) dc.vmax;
817     const y_step2 = cast(float) dc.comps[2].sfy / cast(float) dc.vmax;
818     const x_step0 = cast(float) dc.comps[0].sfx / cast(float) dc.hmax;
819     const x_step1 = cast(float) dc.comps[1].sfx / cast(float) dc.hmax;
820     const x_step2 = cast(float) dc.comps[2].sfx / cast(float) dc.hmax;
821 
822     float y0 = y_step0 * 0.5f;
823     float y1 = y_step1 * 0.5f;
824     float y2 = y_step2 * 0.5f;
825     size_t y0i = 0;
826     size_t y1i = 0;
827     size_t y2i = 0;
828 
829     size_t di;
830 
831     foreach (_j; 0 .. dc.height) {
832         float x0 = x_step0 * 0.5f;
833         float x1 = x_step1 * 0.5f;
834         float x2 = x_step2 * 0.5f;
835         size_t x0i = 0;
836         size_t x1i = 0;
837         size_t x2i = 0;
838         foreach (i; 0 .. dc.width) {
839             result[di .. di+3] = ycbcr_to_rgb(
840                 dc.comps[0].data[y0i + x0i],
841                 dc.comps[1].data[y1i + x1i],
842                 dc.comps[2].data[y2i + x2i],
843             );
844             if (dc.tgt_chans == 4)
845                 result[di+3] = 255;
846             di += dc.tgt_chans;
847             x0 += x_step0;
848             x1 += x_step1;
849             x2 += x_step2;
850             if (x0 >= 1.0) { x0 -= 1.0f; x0i += 1; }
851             if (x1 >= 1.0) { x1 -= 1.0f; x1i += 1; }
852             if (x2 >= 1.0) { x2 -= 1.0f; x2i += 1; }
853         }
854         y0 += y_step0;
855         y1 += y_step1;
856         y2 += y_step2;
857         if (y0 >= 1.0) { y0 -= 1.0f; y0i += stride0; }
858         if (y1 >= 1.0) { y1 -= 1.0f; y1i += stride1; }
859         if (y2 >= 1.0) { y2 -= 1.0f; y2i += stride2; }
860     }
861     return result;
862 }
863 
864 ubyte[3] ycbcr_to_rgb(ubyte y, ubyte cb, ubyte cr) pure {
865     ubyte[3] rgb = void;
866     rgb[0] = clamp(y + 1.402*(cr-128));
867     rgb[1] = clamp(y - 0.34414*(cb-128) - 0.71414*(cr-128));
868     rgb[2] = clamp(y + 1.772*(cb-128));
869     return rgb;
870 }
871 
872 ubyte clamp(float x) pure {
873     if (x < 0) return 0;
874     if (255 < x) return 255;
875     return cast(ubyte) x;
876 }
877 
878 // ------------------------------------------------------------
879 // The IDCT stuff here (to the next dashed line) is copied and adapted from
880 // stb_image which is released under public domain.  Many thanks to stb_image
881 // author, Sean Barrett.
882 // Link: https://github.com/nothings/stb/blob/master/stb_image.h
883 
884 pure int f2f(float x) { return cast(int) (x * 4096 + 0.5); }
885 pure int fsh(int x) { return x << 12; }
886 
887 // from stb_image, derived from jidctint -- DCT_ISLOW
888 pure void STBI__IDCT_1D(ref int t0, ref int t1, ref int t2, ref int t3,
889                         ref int x0, ref int x1, ref int x2, ref int x3,
890         int s0, int s1, int s2, int s3, int s4, int s5, int s6, int s7)
891 {
892    int p1,p2,p3,p4,p5;
893    //int t0,t1,t2,t3,p1,p2,p3,p4,p5,x0,x1,x2,x3;
894    p2 = s2;
895    p3 = s6;
896    p1 = (p2+p3) * f2f(0.5411961f);
897    t2 = p1 + p3 * f2f(-1.847759065f);
898    t3 = p1 + p2 * f2f( 0.765366865f);
899    p2 = s0;
900    p3 = s4;
901    t0 = fsh(p2+p3);
902    t1 = fsh(p2-p3);
903    x0 = t0+t3;
904    x3 = t0-t3;
905    x1 = t1+t2;
906    x2 = t1-t2;
907    t0 = s7;
908    t1 = s5;
909    t2 = s3;
910    t3 = s1;
911    p3 = t0+t2;
912    p4 = t1+t3;
913    p1 = t0+t3;
914    p2 = t1+t2;
915    p5 = (p3+p4)*f2f( 1.175875602f);
916    t0 = t0*f2f( 0.298631336f);
917    t1 = t1*f2f( 2.053119869f);
918    t2 = t2*f2f( 3.072711026f);
919    t3 = t3*f2f( 1.501321110f);
920    p1 = p5 + p1*f2f(-0.899976223f);
921    p2 = p5 + p2*f2f(-2.562915447f);
922    p3 = p3*f2f(-1.961570560f);
923    p4 = p4*f2f(-0.390180644f);
924    t3 += p1+p4;
925    t2 += p2+p3;
926    t1 += p2+p4;
927    t0 += p1+p3;
928 }
929 
930 // idct and level-shift
931 pure void stbi__idct_block(ubyte* dst, int dst_stride, in ref short[64] data) {
932    int i;
933    int[64] val;
934    int* v = val.ptr;
935    const(short)* d = data.ptr;
936 
937    // columns
938    for (i=0; i < 8; ++i,++d, ++v) {
939       // if all zeroes, shortcut -- this avoids dequantizing 0s and IDCTing
940       if (d[ 8]==0 && d[16]==0 && d[24]==0 && d[32]==0
941            && d[40]==0 && d[48]==0 && d[56]==0) {
942          //    no shortcut                 0     seconds
943          //    (1|2|3|4|5|6|7)==0          0     seconds
944          //    all separate               -0.047 seconds
945          //    1 && 2|3 && 4|5 && 6|7:    -0.047 seconds
946          int dcterm = d[0] << 2;
947          v[0] = v[8] = v[16] = v[24] = v[32] = v[40] = v[48] = v[56] = dcterm;
948       } else {
949          int t0,t1,t2,t3,x0,x1,x2,x3;
950          STBI__IDCT_1D(
951              t0, t1, t2, t3,
952              x0, x1, x2, x3,
953              d[ 0], d[ 8], d[16], d[24],
954              d[32], d[40], d[48], d[56]
955          );
956          // constants scaled things up by 1<<12; let's bring them back
957          // down, but keep 2 extra bits of precision
958          x0 += 512; x1 += 512; x2 += 512; x3 += 512;
959          v[ 0] = (x0+t3) >> 10;
960          v[56] = (x0-t3) >> 10;
961          v[ 8] = (x1+t2) >> 10;
962          v[48] = (x1-t2) >> 10;
963          v[16] = (x2+t1) >> 10;
964          v[40] = (x2-t1) >> 10;
965          v[24] = (x3+t0) >> 10;
966          v[32] = (x3-t0) >> 10;
967       }
968    }
969 
970    ubyte* o = dst;
971    for (i=0, v=val.ptr; i < 8; ++i,v+=8,o+=dst_stride) {
972       // no fast case since the first 1D IDCT spread components out
973       int t0,t1,t2,t3,x0,x1,x2,x3;
974       STBI__IDCT_1D(
975           t0, t1, t2, t3,
976           x0, x1, x2, x3,
977           v[0],v[1],v[2],v[3],v[4],v[5],v[6],v[7]
978       );
979       // constants scaled things up by 1<<12, plus we had 1<<2 from first
980       // loop, plus horizontal and vertical each scale by sqrt(8) so together
981       // we've got an extra 1<<3, so 1<<17 total we need to remove.
982       // so we want to round that, which means adding 0.5 * 1<<17,
983       // aka 65536. Also, we'll end up with -128 to 127 that we want
984       // to encode as 0-255 by adding 128, so we'll add that before the shift
985       x0 += 65536 + (128<<17);
986       x1 += 65536 + (128<<17);
987       x2 += 65536 + (128<<17);
988       x3 += 65536 + (128<<17);
989       // tried computing the shifts into temps, or'ing the temps to see
990       // if any were out of range, but that was slower
991       o[0] = stbi__clamp((x0+t3) >> 17);
992       o[7] = stbi__clamp((x0-t3) >> 17);
993       o[1] = stbi__clamp((x1+t2) >> 17);
994       o[6] = stbi__clamp((x1-t2) >> 17);
995       o[2] = stbi__clamp((x2+t1) >> 17);
996       o[5] = stbi__clamp((x2-t1) >> 17);
997       o[3] = stbi__clamp((x3+t0) >> 17);
998       o[4] = stbi__clamp((x3-t0) >> 17);
999    }
1000 }
1001 
1002 // clamp to 0-255
1003 pure ubyte stbi__clamp(int x) {
1004    if (cast(uint) x > 255) {
1005       if (x < 0) return 0;
1006       if (x > 255) return 255;
1007    }
1008    return cast(ubyte) x;
1009 }
1010 
1011 static immutable ubyte[2] jpeg_soi_marker = [0xff, 0xd8];
1012 
1013 // the above is adapted from stb_image
1014 // ------------------------------------------------------------
1015 
1016 package void read_jpeg_info(Reader stream, out int w, out int h, out int chans) {
1017     ubyte[2] marker = void;
1018     stream.readExact(marker, 2);
1019 
1020     // SOI
1021     if (marker[0..2] != jpeg_soi_marker)
1022         throw new ImageIOException("not JPEG");
1023 
1024     while (true) {
1025         stream.readExact(marker, 2);
1026 
1027         if (marker[0] != 0xff)
1028             throw new ImageIOException("no frame header");
1029         while (marker[1] == 0xff)
1030             stream.readExact(marker[1..$], 1);
1031 
1032         enum SKIP = 0xff;
1033         switch (marker[1]) with (Marker) {
1034             case SOF0: .. case SOF3: goto case;
1035             case SOF9: .. case SOF11:
1036                 ubyte[8] tmp;
1037                 stream.readExact(tmp[0..8], 8);
1038                 //int len = bigEndianToNative!ushort(tmp[0..2]);
1039                 w = bigEndianToNative!ushort(tmp[5..7]);
1040                 h = bigEndianToNative!ushort(tmp[3..5]);
1041                 chans = tmp[7];
1042                 return;
1043             case SOS, EOI: throw new ImageIOException("no frame header");
1044             case DRI, DHT, DQT, COM: goto case SKIP;
1045             case APP0: .. case APPf: goto case SKIP;
1046             case SKIP:
1047                 ubyte[2] lenbuf = void;
1048                 stream.readExact(lenbuf, 2);
1049                 int skiplen = bigEndianToNative!ushort(lenbuf) - 2;
1050                 stream.seek(skiplen, SEEK_CUR);
1051                 break;
1052             default: throw new ImageIOException("unsupported marker");
1053         }
1054     }
1055     assert(0);
1056 }