1 // Baseline JPEG decoder
2 
3 module imageformats.jpeg;
4 
5 import std.math;    // floor, ceil
6 import std.bitmanip;   // endianness stuff
7 import std.stdio;    // File
8 import core.stdc.stdlib : alloca;
9 import imageformats;
10 
11 private:
12 
13 public bool detect_jpeg(Reader stream) {
14     try {
15         int w, h, c;
16         read_jpeg_info(stream, w, h, c);
17         return true;
18     } catch (Throwable) {
19         return false;
20     } finally {
21         stream.seek(0, SEEK_SET);
22     }
23 }
24 
25 ///
26 public IFImage read_jpeg(in char[] filename, long req_chans = 0) {
27     scope reader = new FileReader(filename);
28     return read_jpeg(reader, req_chans);
29 }
30 
31 ///
32 public IFImage read_jpeg_from_mem(in ubyte[] source, long req_chans = 0) {
33     scope reader = new MemReader(source);
34     return read_jpeg(reader, req_chans);
35 }
36 
37 ///
38 public void read_jpeg_info(in char[] filename, out int w, out int h, out int chans) {
39     scope reader = new FileReader(filename);
40     return read_jpeg_info(reader, w, h, chans);
41 }
42 
43 ///
44 public void read_jpeg_info_from_mem(in ubyte[] source, out int w, out int h, out int chans) {
45     scope reader = new MemReader(source);
46     return read_jpeg_info(reader, w, h, chans);
47 }
48 
49 package IFImage read_jpeg(Reader stream, long req_chans = 0) {
50     if (req_chans < 0 || 4 < req_chans)
51         throw new ImageIOException("come on...");
52 
53     // SOI
54     ubyte[2] tmp = void;
55     stream.readExact(tmp, tmp.length);
56     if (tmp[0..2] != jpeg_soi_marker)
57         throw new ImageIOException("not JPEG");
58 
59     JPEG_Decoder dc = { stream: stream };
60 
61     read_markers(dc);   // reads until first scan header or eoi
62     if (dc.eoi_reached)
63         throw new ImageIOException("no image data");
64 
65     dc.tgt_chans = (req_chans == 0) ? dc.num_comps : cast(int) req_chans;
66 
67     IFImage result = {
68         w      : dc.width,
69         h      : dc.height,
70         c      : cast(ColFmt) dc.tgt_chans,
71         pixels : decode_jpeg(dc),
72     };
73     return result;
74 }
75 
76 struct JPEG_Decoder {
77     Reader stream;
78 
79     bool has_frame_header = false;
80     bool eoi_reached = false;
81 
82     ubyte[64][4] qtables;
83     HuffTab[2] ac_tables;
84     HuffTab[2] dc_tables;
85 
86     ubyte cb;  // current byte (next bit always at MSB)
87     int bits_left;   // num of unused bits in cb
88 
89     bool correct_comp_ids;
90     Component[3] comps;
91     ubyte num_comps;
92     int tgt_chans;
93 
94     int width, height;
95 
96     int hmax, vmax;
97 
98     ushort restart_interval;    // number of MCUs in restart interval
99 
100     // image component
101     struct Component {
102         ubyte sfx, sfy;   // sampling factors, aka. h and v
103         size_t x, y;       // total num of samples, without fill samples
104         ubyte qtable;
105         ubyte ac_table;
106         ubyte dc_table;
107         int pred;                // dc prediction
108         ubyte[] data;   // reconstructed samples
109     }
110 
111     int num_mcu_x;
112     int num_mcu_y;
113 }
114 
115 struct HuffTab {
116     ubyte[256] values;
117     ubyte[257] sizes;
118     short[16] mincode, maxcode;
119     short[16] valptr;
120 }
121 
122 enum Marker : ubyte {
123     SOI = 0xd8,     // start of image
124     SOF0 = 0xc0,    // start of frame / baseline DCT
125     //SOF1 = 0xc1,    // start of frame / extended seq.
126     //SOF2 = 0xc2,    // start of frame / progressive DCT
127     SOF3 = 0xc3,    // start of frame / lossless
128     SOF9 = 0xc9,    // start of frame / extended seq., arithmetic
129     SOF11 = 0xcb,    // start of frame / lossless, arithmetic
130     DHT = 0xc4,     // define huffman tables
131     DQT = 0xdb,     // define quantization tables
132     DRI = 0xdd,     // define restart interval
133     SOS = 0xda,     // start of scan
134     DNL = 0xdc,     // define number of lines
135     RST0 = 0xd0,    // restart entropy coded data
136     // ...
137     RST7 = 0xd7,    // restart entropy coded data
138     APP0 = 0xe0,    // application 0 segment
139     // ...
140     APPf = 0xef,    // application f segment
141     //DAC = 0xcc,     // define arithmetic conditioning table
142     COM = 0xfe,     // comment
143     EOI = 0xd9,     // end of image
144 }
145 
146 void read_markers(ref JPEG_Decoder dc) {
147     bool has_next_scan_header = false;
148     while (!has_next_scan_header && !dc.eoi_reached) {
149         ubyte[2] marker;
150         dc.stream.readExact(marker, 2);
151 
152         if (marker[0] != 0xff)
153             throw new ImageIOException("no marker");
154         while (marker[1] == 0xff)
155             dc.stream.readExact(marker[1..$], 1);
156 
157         debug(DebugJPEG) writefln("marker: %s (%1$x)\t", cast(Marker) marker[1]);
158         switch (marker[1]) with (Marker) {
159             case DHT: dc.read_huffman_tables(); break;
160             case DQT: dc.read_quantization_tables(); break;
161             case SOF0:
162                 if (dc.has_frame_header)
163                     throw new ImageIOException("extra frame header");
164                 debug(DebugJPEG) writeln();
165                 dc.read_frame_header();
166                 dc.has_frame_header = true;
167                 break;
168             case SOS:
169                 if (!dc.has_frame_header)
170                     throw new ImageIOException("no frame header");
171                 dc.read_scan_header();
172                 has_next_scan_header = true;
173                 break;
174             case DRI: dc.read_restart_interval(); break;
175             case EOI: dc.eoi_reached = true; break;
176             case APP0: .. case APPf: goto case;
177             case COM:
178                 debug(DebugJPEG) writefln("-> skipping segment");
179                 ubyte[2] lenbuf = void;
180                 dc.stream.readExact(lenbuf, lenbuf.length);
181                 int len = bigEndianToNative!ushort(lenbuf) - 2;
182                 dc.stream.seek(len, SEEK_CUR);
183                 break;
184             default: throw new ImageIOException("invalid / unsupported marker");
185         }
186     }
187 }
188 
189 // DHT -- define huffman tables
190 void read_huffman_tables(ref JPEG_Decoder dc) {
191     ubyte[19] tmp = void;
192     dc.stream.readExact(tmp, 2);
193     int len = bigEndianToNative!ushort(tmp[0..2]);
194     len -= 2;
195 
196     while (0 < len) {
197         dc.stream.readExact(tmp, 17);   // info byte & the BITS
198         ubyte table_slot = tmp[0] & 0xf; // must be 0 or 1 for baseline
199         ubyte table_class = tmp[0] >> 4;  // 0 = dc table, 1 = ac table
200         if (1 < table_slot || 1 < table_class)
201             throw new ImageIOException("invalid / not supported");
202 
203         // compute total number of huffman codes
204         int mt = 0;
205         foreach (i; 1..17)
206             mt += tmp[i];
207         if (256 < mt)   // TODO where in the spec?
208             throw new ImageIOException("invalid / not supported");
209 
210         if (table_class == 0) {
211             dc.stream.readExact(dc.dc_tables[table_slot].values, mt);
212             derive_table(dc.dc_tables[table_slot], tmp[1..17]);
213         } else {
214             dc.stream.readExact(dc.ac_tables[table_slot].values, mt);
215             derive_table(dc.ac_tables[table_slot], tmp[1..17]);
216         }
217 
218         len -= 17 + mt;
219     }
220 }
221 
222 // num_values is the BITS
223 void derive_table(ref HuffTab table, in ref ubyte[16] num_values) {
224     short[256] codes;
225 
226     int k = 0;
227     foreach (i; 0..16) {
228         foreach (j; 0..num_values[i]) {
229             table.sizes[k] = cast(ubyte) (i + 1);
230             ++k;
231         }
232     }
233     table.sizes[k] = 0;
234 
235     k = 0;
236     short code = 0;
237     ubyte si = table.sizes[k];
238     while (true) {
239         do {
240             codes[k] = code;
241             ++code;
242             ++k;
243         } while (si == table.sizes[k]);
244 
245         if (table.sizes[k] == 0)
246             break;
247 
248         debug(DebugJPEG) assert(si < table.sizes[k]);
249         do {
250             code <<= 1;
251             ++si;
252         } while (si != table.sizes[k]);
253     }
254 
255     derive_mincode_maxcode_valptr(
256         table.mincode, table.maxcode, table.valptr,
257         codes, num_values
258     );
259 }
260 
261 // F.15
262 void derive_mincode_maxcode_valptr(
263         ref short[16] mincode, ref short[16] maxcode, ref short[16] valptr,
264         in ref short[256] codes, in ref ubyte[16] num_values) pure
265 {
266     mincode[] = -1;
267     maxcode[] = -1;
268     valptr[] = -1;
269 
270     int j = 0;
271     foreach (i; 0..16) {
272         if (num_values[i] != 0) {
273             valptr[i] = cast(short) j;
274             mincode[i] = codes[j];
275             j += num_values[i] - 1;
276             maxcode[i] = codes[j];
277             j += 1;
278         }
279     }
280 }
281 
282 // DQT -- define quantization tables
283 void read_quantization_tables(ref JPEG_Decoder dc) {
284     ubyte[2] tmp = void;
285     dc.stream.readExact(tmp, 2);
286     int len = bigEndianToNative!ushort(tmp[0..2]);
287     if (len % 65 != 2)
288         throw new ImageIOException("invalid / not supported");
289     len -= 2;
290     while (0 < len) {
291         dc.stream.readExact(tmp, 1);
292         ubyte table_info = tmp[0];
293         ubyte table_slot = table_info & 0xf;
294         ubyte precision = table_info >> 4;  // 0 = 8 bit, 1 = 16 bit
295         if (3 < table_slot || precision != 0)    // only 8 bit for baseline
296             throw new ImageIOException("invalid / not supported");
297 
298         dc.stream.readExact(dc.qtables[table_slot], 64);
299         len -= 1 + 64;
300     }
301 }
302 
303 // SOF0 -- start of frame
304 void read_frame_header(ref JPEG_Decoder dc) {
305     ubyte[9] tmp = void;
306     dc.stream.readExact(tmp, 8);
307     int len = bigEndianToNative!ushort(tmp[0..2]);  // 8 + num_comps*3
308     ubyte precision = tmp[2];
309     dc.height = bigEndianToNative!ushort(tmp[3..5]);
310     dc.width = bigEndianToNative!ushort(tmp[5..7]);
311     dc.num_comps = tmp[7];
312 
313     if ( precision != 8 ||
314          (dc.num_comps != 1 && dc.num_comps != 3) ||
315          len != 8 + dc.num_comps*3 )
316         throw new ImageIOException("invalid / not supported");
317 
318     dc.hmax = 0;
319     dc.vmax = 0;
320     int mcu_du = 0; // data units in one mcu
321     dc.stream.readExact(tmp, dc.num_comps*3);
322     foreach (i; 0..dc.num_comps) {
323         ubyte ci = tmp[i*3];
324         // JFIF says ci should be i+1, but there are images where ci is i. Normalize ids
325         // so that ci == i, always. So much for standards...
326         if (i == 0) { dc.correct_comp_ids = ci == i+1; }
327         if ((dc.correct_comp_ids && ci != i+1)
328         || (!dc.correct_comp_ids && ci != i))
329             throw new ImageIOException("invalid component id");
330 
331         auto comp = &dc.comps[i];
332         ubyte sampling_factors = tmp[i*3 + 1];
333         comp.sfx = sampling_factors >> 4;
334         comp.sfy = sampling_factors & 0xf;
335         comp.qtable = tmp[i*3 + 2];
336         if ( comp.sfy < 1 || 4 < comp.sfy ||
337              comp.sfx < 1 || 4 < comp.sfx ||
338              3 < comp.qtable )
339             throw new ImageIOException("invalid / not supported");
340 
341         if (dc.hmax < comp.sfx) dc.hmax = comp.sfx;
342         if (dc.vmax < comp.sfy) dc.vmax = comp.sfy;
343 
344         mcu_du += comp.sfx * comp.sfy;
345     }
346     if (10 < mcu_du)
347         throw new ImageIOException("invalid / not supported");
348 
349     foreach (i; 0..dc.num_comps) {
350         dc.comps[i].x = cast(size_t) ceil(dc.width * (cast(double) dc.comps[i].sfx / dc.hmax));
351         dc.comps[i].y = cast(size_t) ceil(dc.height * (cast(double) dc.comps[i].sfy / dc.vmax));
352 
353         debug(DebugJPEG) writefln("%d comp %d sfx/sfy: %d/%d", i, dc.comps[i].id,
354                                                                   dc.comps[i].sfx,
355                                                                   dc.comps[i].sfy);
356     }
357 
358     size_t mcu_w = dc.hmax * 8;
359     size_t mcu_h = dc.vmax * 8;
360     dc.num_mcu_x = cast(int) ((dc.width + mcu_w-1) / mcu_w);
361     dc.num_mcu_y = cast(int) ((dc.height + mcu_h-1) / mcu_h);
362 
363     debug(DebugJPEG) {
364         writefln("\tlen: %s", len);
365         writefln("\tprecision: %s", precision);
366         writefln("\tdimensions: %s x %s", dc.width, dc.height);
367         writefln("\tnum_comps: %s", dc.num_comps);
368         writefln("\tnum_mcu_x: %s", dc.num_mcu_x);
369         writefln("\tnum_mcu_y: %s", dc.num_mcu_y);
370     }
371 
372 }
373 
374 // SOS -- start of scan
375 void read_scan_header(ref JPEG_Decoder dc) {
376     ubyte[3] tmp = void;
377     dc.stream.readExact(tmp, tmp.length);
378     ushort len = bigEndianToNative!ushort(tmp[0..2]);
379     ubyte num_scan_comps = tmp[2];
380 
381     if ( num_scan_comps != dc.num_comps ||
382          len != (6+num_scan_comps*2) )
383         throw new ImageIOException("invalid / not supported");
384 
385     auto buf = (cast(ubyte*) alloca((len-3) * ubyte.sizeof))[0..len-3];
386     dc.stream.readExact(buf, buf.length);
387 
388     foreach (i; 0..num_scan_comps) {
389         uint ci = buf[i*2] - ((dc.correct_comp_ids) ? 1 : 0);
390         if (ci >= dc.num_comps)
391             throw new ImageIOException("invalid component id");
392 
393         ubyte tables = buf[i*2+1];
394         dc.comps[ci].dc_table = tables >> 4;
395         dc.comps[ci].ac_table = tables & 0xf;
396         if ( 1 < dc.comps[ci].dc_table ||
397              1 < dc.comps[ci].ac_table )
398             throw new ImageIOException("invalid / not supported");
399     }
400 
401     // ignore these
402     //ubyte spectral_start = buf[$-3];
403     //ubyte spectral_end = buf[$-2];
404     //ubyte approx = buf[$-1];
405 }
406 
407 void read_restart_interval(ref JPEG_Decoder dc) {
408     ubyte[4] tmp = void;
409     dc.stream.readExact(tmp, tmp.length);
410     ushort len = bigEndianToNative!ushort(tmp[0..2]);
411     if (len != 4)
412         throw new ImageIOException("invalid / not supported");
413     dc.restart_interval = bigEndianToNative!ushort(tmp[2..4]);
414     debug(DebugJPEG) writeln("restart interval set to: ", dc.restart_interval);
415 }
416 
417 // reads data after the SOS segment
418 ubyte[] decode_jpeg(ref JPEG_Decoder dc) {
419     foreach (ref comp; dc.comps[0..dc.num_comps])
420         comp.data = new ubyte[dc.num_mcu_x*comp.sfx*8*dc.num_mcu_y*comp.sfy*8];
421 
422     // E.7 -- Multiple scans are for progressive images which are not supported
423     //while (!dc.eoi_reached) {
424         decode_scan(dc);    // E.2.3
425         //read_markers(dc);   // reads until next scan header or eoi
426     //}
427 
428     // throw away fill samples and convert to target format
429     return dc.reconstruct();
430 }
431 
432 // E.2.3 and E.8 and E.9
433 void decode_scan(ref JPEG_Decoder dc) {
434     debug(DebugJPEG) writeln("decode scan...");
435 
436     int intervals, mcus;
437     if (0 < dc.restart_interval) {
438         int total_mcus = dc.num_mcu_x * dc.num_mcu_y;
439         intervals = (total_mcus + dc.restart_interval-1) / dc.restart_interval;
440         mcus = dc.restart_interval;
441     } else {
442         intervals = 1;
443         mcus = dc.num_mcu_x * dc.num_mcu_y;
444     }
445     debug(DebugJPEG) writeln("intervals: ", intervals);
446 
447     foreach (mcu_j; 0 .. dc.num_mcu_y) {
448         foreach (mcu_i; 0 .. dc.num_mcu_x) {
449 
450             // decode mcu
451             foreach (c; 0..dc.num_comps) {
452                 auto comp = &dc.comps[c];
453                 foreach (du_j; 0 .. comp.sfy) {
454                     foreach (du_i; 0 .. comp.sfx) {
455                         // decode entropy, dequantize & dezigzag
456                         short[64] data = decode_block(dc, *comp, dc.qtables[comp.qtable]);
457                         // idct & level-shift
458                         int outx = (mcu_i * comp.sfx + du_i) * 8;
459                         int outy = (mcu_j * comp.sfy + du_j) * 8;
460                         int dst_stride = dc.num_mcu_x * comp.sfx*8;
461                         ubyte* dst = comp.data.ptr + outy*dst_stride + outx;
462                         stbi__idct_block(dst, dst_stride, data);
463                     }
464                 }
465             }
466 
467             --mcus;
468 
469             if (!mcus) {
470                 --intervals;
471                 if (!intervals)
472                     return;
473 
474                 read_restart(dc.stream);    // RSTx marker
475 
476                 if (intervals == 1) {
477                     // last interval, may have fewer MCUs than defined by DRI
478                     mcus = (dc.num_mcu_y - mcu_j - 1) * dc.num_mcu_x + dc.num_mcu_x - mcu_i - 1;
479                 } else {
480                     mcus = dc.restart_interval;
481                 }
482 
483                 // reset decoder
484                 dc.cb = 0;
485                 dc.bits_left = 0;
486                 foreach (k; 0..dc.num_comps)
487                     dc.comps[k].pred = 0;
488             }
489 
490         }
491     }
492 }
493 
494 // RST0-RST7
495 void read_restart(Reader stream) {
496     ubyte[2] tmp = void;
497     stream.readExact(tmp, tmp.length);
498     if (tmp[0] != 0xff || tmp[1] < Marker.RST0 || Marker.RST7 < tmp[1])
499         throw new ImageIOException("reset marker missing");
500     // the markers should cycle 0 through 7, could check that here...
501 }
502 
503 immutable ubyte[64] dezigzag = [
504      0,  1,  8, 16,  9,  2,  3, 10,
505     17, 24, 32, 25, 18, 11,  4,  5,
506     12, 19, 26, 33, 40, 48, 41, 34,
507     27, 20, 13,  6,  7, 14, 21, 28,
508     35, 42, 49, 56, 57, 50, 43, 36,
509     29, 22, 15, 23, 30, 37, 44, 51,
510     58, 59, 52, 45, 38, 31, 39, 46,
511     53, 60, 61, 54, 47, 55, 62, 63,
512 ];
513 
514 // decode entropy, dequantize & dezigzag (see section F.2)
515 short[64] decode_block(ref JPEG_Decoder dc, ref JPEG_Decoder.Component comp,
516                                                     in ref ubyte[64] qtable)
517 {
518     short[64] res = 0;
519 
520     ubyte t = decode_huff(dc, dc.dc_tables[comp.dc_table]);
521     int diff = t ? dc.receive_and_extend(t) : 0;
522 
523     comp.pred = comp.pred + diff;
524     res[0] = cast(short) (comp.pred * qtable[0]);
525 
526     int k = 1;
527     do {
528         ubyte rs = decode_huff(dc, dc.ac_tables[comp.ac_table]);
529         ubyte rrrr = rs >> 4;
530         ubyte ssss = rs & 0xf;
531 
532         if (ssss == 0) {
533             if (rrrr != 0xf)
534                 break;      // end of block
535             k += 16;    // run length is 16
536             continue;
537         }
538 
539         k += rrrr;
540 
541         if (63 < k)
542             throw new ImageIOException("corrupt block");
543         res[dezigzag[k]] = cast(short) (dc.receive_and_extend(ssss) * qtable[k]);
544         k += 1;
545     } while (k < 64);
546 
547     return res;
548 }
549 
550 int receive_and_extend(ref JPEG_Decoder dc, ubyte s) {
551     // receive
552     int symbol = 0;
553     foreach (_; 0..s)
554         symbol = (symbol << 1) + nextbit(dc);
555     // extend
556     int vt = 1 << (s-1);
557     if (symbol < vt)
558         return symbol + (-1 << s) + 1;
559     return symbol;
560 }
561 
562 // F.16 -- the DECODE
563 ubyte decode_huff(ref JPEG_Decoder dc, in ref HuffTab tab) {
564     short code = nextbit(dc);
565 
566     int i = 0;
567     while (tab.maxcode[i] < code) {
568         code = cast(short) ((code << 1) + nextbit(dc));
569         i += 1;
570         if (tab.maxcode.length <= i)
571             throw new ImageIOException("corrupt huffman coding");
572     }
573     int j = tab.valptr[i] + code - tab.mincode[i];
574     if (tab.values.length <= cast(uint) j)
575         throw new ImageIOException("corrupt huffman coding");
576     return tab.values[j];
577 }
578 
579 // F.2.2.5 and F.18
580 ubyte nextbit(ref JPEG_Decoder dc) {
581     if (!dc.bits_left) {
582         ubyte[1] bytebuf;
583         dc.stream.readExact(bytebuf, 1);
584         dc.cb = bytebuf[0];
585         dc.bits_left = 8;
586 
587         if (dc.cb == 0xff) {
588             dc.stream.readExact(bytebuf, 1);
589             if (bytebuf[0] != 0x0) {
590                 throw new ImageIOException("unexpected marker");
591             }
592         }
593     }
594 
595     ubyte r = dc.cb >> 7;
596     dc.cb <<= 1;
597     dc.bits_left -= 1;
598     return r;
599 }
600 
601 ubyte[] reconstruct(in ref JPEG_Decoder dc) {
602     auto result = new ubyte[dc.width * dc.height * dc.tgt_chans];
603 
604     switch (dc.num_comps * 10 + dc.tgt_chans) {
605         case 34, 33:
606             // Use specialized bilinear filtering functions for the frequent cases where
607             // Cb & Cr channels have half resolution.
608             if ((dc.comps[0].sfx <= 2 && dc.comps[0].sfy <= 2)
609             && (dc.comps[0].sfx + dc.comps[0].sfy >= 3)
610             && dc.comps[1].sfx == 1 && dc.comps[1].sfy == 1
611             && dc.comps[2].sfx == 1 && dc.comps[2].sfy == 1) {
612                 void function(in ubyte[], in ubyte[], ubyte[]) resample;
613                 switch (dc.comps[0].sfx * 10 + dc.comps[0].sfy) {
614                     case 22: resample = &upsample_h2_v2; break;
615                     case 21: resample = &upsample_h2_v1; break;
616                     case 12: resample = &upsample_h1_v2; break;
617                     default: throw new ImageIOException("bug");
618                 }
619 
620                 auto comp1 = new ubyte[](dc.width);
621                 auto comp2 = new ubyte[](dc.width);
622 
623                 size_t s = 0;
624                 size_t di = 0;
625                 foreach (j; 0 .. dc.height) {
626                     size_t mi = j / dc.comps[0].sfy;
627                     size_t si = (mi == 0 || mi >= (dc.height-1)/dc.comps[0].sfy)
628                               ? mi : mi - 1 + s * 2;
629                     s = s ^ 1;
630 
631                     size_t cs = dc.num_mcu_x * dc.comps[1].sfx * 8;
632                     size_t cl0 = mi * cs;
633                     size_t cl1 = si * cs;
634                     resample(dc.comps[1].data[cl0 .. cl0 + dc.comps[1].x],
635                              dc.comps[1].data[cl1 .. cl1 + dc.comps[1].x],
636                              comp1[]);
637                     resample(dc.comps[2].data[cl0 .. cl0 + dc.comps[2].x],
638                              dc.comps[2].data[cl1 .. cl1 + dc.comps[2].x],
639                              comp2[]);
640 
641                     foreach (i; 0 .. dc.width) {
642                         result[di .. di+3] = ycbcr_to_rgb(
643                             dc.comps[0].data[j * dc.num_mcu_x * dc.comps[0].sfx * 8 + i],
644                             comp1[i],
645                             comp2[i],
646                         );
647                         if (dc.tgt_chans == 4)
648                             result[di+3] = 255;
649                         di += dc.tgt_chans;
650                     }
651                 }
652 
653                 return result;
654             }
655 
656             foreach (const ref comp; dc.comps[0..dc.num_comps]) {
657                 if (comp.sfx != dc.hmax || comp.sfy != dc.vmax)
658                     return dc.upsample(result);
659             }
660 
661             size_t si, di;
662             foreach (j; 0 .. dc.height) {
663                 foreach (i; 0 .. dc.width) {
664                     result[di .. di+3] = ycbcr_to_rgb(
665                         dc.comps[0].data[si+i],
666                         dc.comps[1].data[si+i],
667                         dc.comps[2].data[si+i],
668                     );
669                     if (dc.tgt_chans == 4)
670                         result[di+3] = 255;
671                     di += dc.tgt_chans;
672                 }
673                 si += dc.num_mcu_x * dc.comps[0].sfx * 8;
674             }
675             return result;
676         case 32, 12, 31, 11:
677             const comp = &dc.comps[0];
678             if (comp.sfx == dc.hmax && comp.sfy == dc.vmax) {
679                 size_t si, di;
680                 if (dc.tgt_chans == 2) {
681                     foreach (j; 0 .. dc.height) {
682                         foreach (i; 0 .. dc.width) {
683                             result[di++] = comp.data[si+i];
684                             result[di++] = 255;
685                         }
686                         si += dc.num_mcu_x * comp.sfx * 8;
687                     }
688                 } else {
689                     foreach (j; 0 .. dc.height) {
690                         result[di .. di+dc.width] = comp.data[si .. si+dc.width];
691                         si += dc.num_mcu_x * comp.sfx * 8;
692                         di += dc.width;
693                     }
694                 }
695                 return result;
696             } else {
697                 // need to resample (haven't tested this...)
698                 return dc.upsample_luma(result);
699             }
700         case 14, 13:
701             const comp = &dc.comps[0];
702             size_t si, di;
703             foreach (j; 0 .. dc.height) {
704                 foreach (i; 0 .. dc.width) {
705                     result[di .. di+3] = comp.data[si+i];
706                     if (dc.tgt_chans == 4)
707                         result[di+3] = 255;
708                     di += dc.tgt_chans;
709                 }
710                 si += dc.num_mcu_x * comp.sfx * 8;
711             }
712             return result;
713         default: assert(0);
714     }
715 }
716 
717 void upsample_h2_v2(in ubyte[] line0, in ubyte[] line1, ubyte[] result) {
718     ubyte mix(ubyte mm, ubyte ms, ubyte sm, ubyte ss) {
719        return cast(ubyte) (( cast(uint) mm * 3 * 3
720                            + cast(uint) ms * 3 * 1
721                            + cast(uint) sm * 1 * 3
722                            + cast(uint) ss * 1 * 1
723                            + 8) / 16);
724     }
725 
726     result[0] = cast(ubyte) (( cast(uint) line0[0] * 3
727                              + cast(uint) line1[0] * 1
728                              + 2) / 4);
729     if (line0.length == 1) return;
730     result[1] = mix(line0[0], line0[1], line1[0], line1[1]);
731 
732     size_t di = 2;
733     foreach (i; 1 .. line0.length) {
734         result[di] = mix(line0[i], line0[i-1], line1[i], line1[i-1]);
735         di += 1;
736         if (i == line0.length-1) {
737             if (di < result.length) {
738                 result[di] = cast(ubyte) (( cast(uint) line0[i] * 3
739                                           + cast(uint) line1[i] * 1
740                                           + 2) / 4);
741             }
742             return;
743         }
744         result[di] = mix(line0[i], line0[i+1], line1[i], line1[i+1]);
745         di += 1;
746     }
747 }
748 
749 void upsample_h2_v1(in ubyte[] line0, in ubyte[] _line1, ubyte[] result) {
750     result[0] = line0[0];
751     if (line0.length == 1) return;
752     result[1] = cast(ubyte) (( cast(uint) line0[0] * 3
753                              + cast(uint) line0[1] * 1
754                              + 2) / 4);
755     size_t di = 2;
756     foreach (i; 1 .. line0.length) {
757         result[di] = cast(ubyte) (( cast(uint) line0[i-1] * 1
758                                   + cast(uint) line0[i+0] * 3
759                                   + 2) / 4);
760         di += 1;
761         if (i == line0.length-1) {
762             if (di < result.length) result[di] = line0[i];
763             return;
764         }
765         result[di] = cast(ubyte) (( cast(uint) line0[i+0] * 3
766                                   + cast(uint) line0[i+1] * 1
767                                   + 2) / 4);
768         di += 1;
769     }
770 }
771 
772 void upsample_h1_v2(in ubyte[] line0, in ubyte[] line1, ubyte[] result) {
773     foreach (i; 0 .. result.length) {
774         result[i] = cast(ubyte) (( cast(uint) line0[i] * 3
775                                  + cast(uint) line1[i] * 1
776                                  + 2) / 4);
777     }
778 }
779 
780 // Nearest neighbor
781 ubyte[] upsample_luma(in ref JPEG_Decoder dc, ubyte[] result) {
782     const size_t stride0 = dc.num_mcu_x * dc.comps[0].sfx * 8;
783     const y_step0 = cast(float) dc.comps[0].sfy / cast(float) dc.vmax;
784     const x_step0 = cast(float) dc.comps[0].sfx / cast(float) dc.hmax;
785 
786     float y0 = y_step0 * 0.5f;
787     size_t y0i = 0;
788 
789     size_t di;
790 
791     foreach (j; 0 .. dc.height) {
792         float x0 = x_step0 * 0.5f;
793         size_t x0i = 0;
794         foreach (i; 0 .. dc.width) {
795             result[di] = dc.comps[0].data[y0i + x0i];
796             if (dc.tgt_chans == 2)
797                 result[di+1] = 255;
798             di += dc.tgt_chans;
799             x0 += x_step0;
800             if (x0 >= 1.0f) { x0 -= 1.0f; x0i += 1; }
801         }
802         y0 += y_step0;
803         if (y0 >= 1.0f) { y0 -= 1.0f; y0i += stride0; }
804     }
805     return result;
806 }
807 
808 // Nearest neighbor
809 ubyte[] upsample(in ref JPEG_Decoder dc, ubyte[] result) {
810     const size_t stride0 = dc.num_mcu_x * dc.comps[0].sfx * 8;
811     const size_t stride1 = dc.num_mcu_x * dc.comps[1].sfx * 8;
812     const size_t stride2 = dc.num_mcu_x * dc.comps[2].sfx * 8;
813 
814     const y_step0 = cast(float) dc.comps[0].sfy / cast(float) dc.vmax;
815     const y_step1 = cast(float) dc.comps[1].sfy / cast(float) dc.vmax;
816     const y_step2 = cast(float) dc.comps[2].sfy / cast(float) dc.vmax;
817     const x_step0 = cast(float) dc.comps[0].sfx / cast(float) dc.hmax;
818     const x_step1 = cast(float) dc.comps[1].sfx / cast(float) dc.hmax;
819     const x_step2 = cast(float) dc.comps[2].sfx / cast(float) dc.hmax;
820 
821     float y0 = y_step0 * 0.5f;
822     float y1 = y_step1 * 0.5f;
823     float y2 = y_step2 * 0.5f;
824     size_t y0i = 0;
825     size_t y1i = 0;
826     size_t y2i = 0;
827 
828     size_t di;
829 
830     foreach (_j; 0 .. dc.height) {
831         float x0 = x_step0 * 0.5f;
832         float x1 = x_step1 * 0.5f;
833         float x2 = x_step2 * 0.5f;
834         size_t x0i = 0;
835         size_t x1i = 0;
836         size_t x2i = 0;
837         foreach (i; 0 .. dc.width) {
838             result[di .. di+3] = ycbcr_to_rgb(
839                 dc.comps[0].data[y0i + x0i],
840                 dc.comps[1].data[y1i + x1i],
841                 dc.comps[2].data[y2i + x2i],
842             );
843             if (dc.tgt_chans == 4)
844                 result[di+3] = 255;
845             di += dc.tgt_chans;
846             x0 += x_step0;
847             x1 += x_step1;
848             x2 += x_step2;
849             if (x0 >= 1.0) { x0 -= 1.0f; x0i += 1; }
850             if (x1 >= 1.0) { x1 -= 1.0f; x1i += 1; }
851             if (x2 >= 1.0) { x2 -= 1.0f; x2i += 1; }
852         }
853         y0 += y_step0;
854         y1 += y_step1;
855         y2 += y_step2;
856         if (y0 >= 1.0) { y0 -= 1.0f; y0i += stride0; }
857         if (y1 >= 1.0) { y1 -= 1.0f; y1i += stride1; }
858         if (y2 >= 1.0) { y2 -= 1.0f; y2i += stride2; }
859     }
860     return result;
861 }
862 
863 ubyte[3] ycbcr_to_rgb(ubyte y, ubyte cb, ubyte cr) pure {
864     ubyte[3] rgb = void;
865     rgb[0] = clamp(y + 1.402*(cr-128));
866     rgb[1] = clamp(y - 0.34414*(cb-128) - 0.71414*(cr-128));
867     rgb[2] = clamp(y + 1.772*(cb-128));
868     return rgb;
869 }
870 
871 ubyte clamp(float x) pure {
872     if (x < 0) return 0;
873     if (255 < x) return 255;
874     return cast(ubyte) x;
875 }
876 
877 // ------------------------------------------------------------
878 // The IDCT stuff here (to the next dashed line) is copied and adapted from
879 // stb_image which is released under public domain.  Many thanks to stb_image
880 // author, Sean Barrett.
881 // Link: https://github.com/nothings/stb/blob/master/stb_image.h
882 
883 pure int f2f(float x) { return cast(int) (x * 4096 + 0.5); }
884 pure int fsh(int x) { return x << 12; }
885 
886 // from stb_image, derived from jidctint -- DCT_ISLOW
887 pure void STBI__IDCT_1D(ref int t0, ref int t1, ref int t2, ref int t3,
888                         ref int x0, ref int x1, ref int x2, ref int x3,
889         int s0, int s1, int s2, int s3, int s4, int s5, int s6, int s7)
890 {
891    int p1,p2,p3,p4,p5;
892    //int t0,t1,t2,t3,p1,p2,p3,p4,p5,x0,x1,x2,x3;
893    p2 = s2;
894    p3 = s6;
895    p1 = (p2+p3) * f2f(0.5411961f);
896    t2 = p1 + p3 * f2f(-1.847759065f);
897    t3 = p1 + p2 * f2f( 0.765366865f);
898    p2 = s0;
899    p3 = s4;
900    t0 = fsh(p2+p3);
901    t1 = fsh(p2-p3);
902    x0 = t0+t3;
903    x3 = t0-t3;
904    x1 = t1+t2;
905    x2 = t1-t2;
906    t0 = s7;
907    t1 = s5;
908    t2 = s3;
909    t3 = s1;
910    p3 = t0+t2;
911    p4 = t1+t3;
912    p1 = t0+t3;
913    p2 = t1+t2;
914    p5 = (p3+p4)*f2f( 1.175875602f);
915    t0 = t0*f2f( 0.298631336f);
916    t1 = t1*f2f( 2.053119869f);
917    t2 = t2*f2f( 3.072711026f);
918    t3 = t3*f2f( 1.501321110f);
919    p1 = p5 + p1*f2f(-0.899976223f);
920    p2 = p5 + p2*f2f(-2.562915447f);
921    p3 = p3*f2f(-1.961570560f);
922    p4 = p4*f2f(-0.390180644f);
923    t3 += p1+p4;
924    t2 += p2+p3;
925    t1 += p2+p4;
926    t0 += p1+p3;
927 }
928 
929 // idct and level-shift
930 pure void stbi__idct_block(ubyte* dst, int dst_stride, in ref short[64] data) {
931    int i;
932    int[64] val;
933    int* v = val.ptr;
934    const(short)* d = data.ptr;
935 
936    // columns
937    for (i=0; i < 8; ++i,++d, ++v) {
938       // if all zeroes, shortcut -- this avoids dequantizing 0s and IDCTing
939       if (d[ 8]==0 && d[16]==0 && d[24]==0 && d[32]==0
940            && d[40]==0 && d[48]==0 && d[56]==0) {
941          //    no shortcut                 0     seconds
942          //    (1|2|3|4|5|6|7)==0          0     seconds
943          //    all separate               -0.047 seconds
944          //    1 && 2|3 && 4|5 && 6|7:    -0.047 seconds
945          int dcterm = d[0] << 2;
946          v[0] = v[8] = v[16] = v[24] = v[32] = v[40] = v[48] = v[56] = dcterm;
947       } else {
948          int t0,t1,t2,t3,x0,x1,x2,x3;
949          STBI__IDCT_1D(
950              t0, t1, t2, t3,
951              x0, x1, x2, x3,
952              d[ 0], d[ 8], d[16], d[24],
953              d[32], d[40], d[48], d[56]
954          );
955          // constants scaled things up by 1<<12; let's bring them back
956          // down, but keep 2 extra bits of precision
957          x0 += 512; x1 += 512; x2 += 512; x3 += 512;
958          v[ 0] = (x0+t3) >> 10;
959          v[56] = (x0-t3) >> 10;
960          v[ 8] = (x1+t2) >> 10;
961          v[48] = (x1-t2) >> 10;
962          v[16] = (x2+t1) >> 10;
963          v[40] = (x2-t1) >> 10;
964          v[24] = (x3+t0) >> 10;
965          v[32] = (x3-t0) >> 10;
966       }
967    }
968 
969    ubyte* o = dst;
970    for (i=0, v=val.ptr; i < 8; ++i,v+=8,o+=dst_stride) {
971       // no fast case since the first 1D IDCT spread components out
972       int t0,t1,t2,t3,x0,x1,x2,x3;
973       STBI__IDCT_1D(
974           t0, t1, t2, t3,
975           x0, x1, x2, x3,
976           v[0],v[1],v[2],v[3],v[4],v[5],v[6],v[7]
977       );
978       // constants scaled things up by 1<<12, plus we had 1<<2 from first
979       // loop, plus horizontal and vertical each scale by sqrt(8) so together
980       // we've got an extra 1<<3, so 1<<17 total we need to remove.
981       // so we want to round that, which means adding 0.5 * 1<<17,
982       // aka 65536. Also, we'll end up with -128 to 127 that we want
983       // to encode as 0-255 by adding 128, so we'll add that before the shift
984       x0 += 65536 + (128<<17);
985       x1 += 65536 + (128<<17);
986       x2 += 65536 + (128<<17);
987       x3 += 65536 + (128<<17);
988       // tried computing the shifts into temps, or'ing the temps to see
989       // if any were out of range, but that was slower
990       o[0] = stbi__clamp((x0+t3) >> 17);
991       o[7] = stbi__clamp((x0-t3) >> 17);
992       o[1] = stbi__clamp((x1+t2) >> 17);
993       o[6] = stbi__clamp((x1-t2) >> 17);
994       o[2] = stbi__clamp((x2+t1) >> 17);
995       o[5] = stbi__clamp((x2-t1) >> 17);
996       o[3] = stbi__clamp((x3+t0) >> 17);
997       o[4] = stbi__clamp((x3-t0) >> 17);
998    }
999 }
1000 
1001 // clamp to 0-255
1002 pure ubyte stbi__clamp(int x) {
1003    if (cast(uint) x > 255) {
1004       if (x < 0) return 0;
1005       if (x > 255) return 255;
1006    }
1007    return cast(ubyte) x;
1008 }
1009 
1010 static immutable ubyte[2] jpeg_soi_marker = [0xff, 0xd8];
1011 
1012 // the above is adapted from stb_image
1013 // ------------------------------------------------------------
1014 
1015 package void read_jpeg_info(Reader stream, out int w, out int h, out int chans) {
1016     ubyte[2] marker = void;
1017     stream.readExact(marker, 2);
1018 
1019     // SOI
1020     if (marker[0..2] != jpeg_soi_marker)
1021         throw new ImageIOException("not JPEG");
1022 
1023     while (true) {
1024         stream.readExact(marker, 2);
1025 
1026         if (marker[0] != 0xff)
1027             throw new ImageIOException("no frame header");
1028         while (marker[1] == 0xff)
1029             stream.readExact(marker[1..$], 1);
1030 
1031         enum SKIP = 0xff;
1032         switch (marker[1]) with (Marker) {
1033             case SOF0: .. case SOF3: goto case;
1034             case SOF9: .. case SOF11:
1035                 ubyte[8] tmp;
1036                 stream.readExact(tmp[0..8], 8);
1037                 //int len = bigEndianToNative!ushort(tmp[0..2]);
1038                 w = bigEndianToNative!ushort(tmp[5..7]);
1039                 h = bigEndianToNative!ushort(tmp[3..5]);
1040                 chans = tmp[7];
1041                 return;
1042             case SOS, EOI: throw new ImageIOException("no frame header");
1043             case DRI, DHT, DQT, COM: goto case SKIP;
1044             case APP0: .. case APPf: goto case SKIP;
1045             case SKIP:
1046                 ubyte[2] lenbuf = void;
1047                 stream.readExact(lenbuf, 2);
1048                 int skiplen = bigEndianToNative!ushort(lenbuf) - 2;
1049                 stream.seek(skiplen, SEEK_CUR);
1050                 break;
1051             default: throw new ImageIOException("unsupported marker");
1052         }
1053     }
1054     assert(0);
1055 }