1 // Baseline JPEG decoder
2 
3 module imageformats.jpeg;
4 
5 import std.math         : ceil;
6 import std.bitmanip     : bigEndianToNative;
7 import std.stdio        : File, SEEK_SET, SEEK_CUR;
8 import std.typecons     : scoped;
9 import imageformats;
10 
11 private:
12 
13 /// Reads a JPEG image. req_chans defines the format of returned image
14 /// (you can use ColFmt here).
15 public IFImage read_jpeg(in char[] filename, long req_chans = 0) {
16     auto reader = scoped!FileReader(filename);
17     return read_jpeg(reader, req_chans);
18 }
19 
20 /// Reads an image from a buffer containing a JPEG image. req_chans defines the
21 /// format of returned image (you can use ColFmt here).
22 public IFImage read_jpeg_from_mem(in ubyte[] source, long req_chans = 0) {
23     auto reader = scoped!MemReader(source);
24     return read_jpeg(reader, req_chans);
25 }
26 
27 /// Returns width, height and color format information via w, h and chans.
28 public void read_jpeg_info(in char[] filename, out int w, out int h, out int chans) {
29     auto reader = scoped!FileReader(filename);
30     return read_jpeg_info(reader, w, h, chans);
31 }
32 
33 /// Returns width, height and color format information via w, h and chans.
34 public void read_jpeg_info_from_mem(in ubyte[] source, out int w, out int h, out int chans) {
35     auto reader = scoped!MemReader(source);
36     return read_jpeg_info(reader, w, h, chans);
37 }
38 
39 // Detects whether a JPEG image is readable from stream.
40 package bool detect_jpeg(Reader stream) {
41     try {
42         int w, h, c;
43         read_jpeg_info(stream, w, h, c);
44         return true;
45     } catch (Throwable) {
46         return false;
47     } finally {
48         stream.seek(0, SEEK_SET);
49     }
50 }
51 
52 package void read_jpeg_info(Reader stream, out int w, out int h, out int chans) {
53     ubyte[2] marker = void;
54     stream.readExact(marker, 2);
55 
56     // SOI
57     if (marker[0] != 0xff || marker[1] != Marker.SOI)
58         throw new ImageIOException("not JPEG");
59 
60     while (true) {
61         stream.readExact(marker, 2);
62 
63         if (marker[0] != 0xff)
64             throw new ImageIOException("no frame header");
65         while (marker[1] == 0xff)
66             stream.readExact(marker[1..$], 1);
67 
68         switch (marker[1]) with (Marker) {
69             case SOF0: .. case SOF3:
70             case SOF9: .. case SOF11:
71                 ubyte[8] tmp;
72                 stream.readExact(tmp[0..8], 8);
73                 //int len = bigEndianToNative!ushort(tmp[0..2]);
74                 w = bigEndianToNative!ushort(tmp[5..7]);
75                 h = bigEndianToNative!ushort(tmp[3..5]);
76                 chans = tmp[7];
77                 return;
78             case SOS, EOI: throw new ImageIOException("no frame header");
79             case DRI, DHT, DQT, COM:
80             case APP0: .. case APPf:
81                 ubyte[2] lenbuf = void;
82                 stream.readExact(lenbuf, 2);
83                 int skiplen = bigEndianToNative!ushort(lenbuf) - 2;
84                 stream.seek(skiplen, SEEK_CUR);
85                 break;
86             default: throw new ImageIOException("unsupported marker");
87         }
88     }
89     assert(0);
90 }
91 
92 package IFImage read_jpeg(Reader stream, long req_chans = 0) {
93     if (req_chans < 0 || 4 < req_chans)
94         throw new ImageIOException("come on...");
95 
96     // SOI
97     ubyte[2] tmp = void;
98     stream.readExact(tmp, tmp.length);
99     if (tmp[0] != 0xff || tmp[1] != Marker.SOI)
100         throw new ImageIOException("not JPEG");
101 
102     JPEG_Decoder dc = { stream: stream };
103 
104     read_markers(dc);   // reads until first scan header or eoi
105     if (dc.eoi_reached)
106         throw new ImageIOException("no image data");
107 
108     dc.tgt_chans = (req_chans == 0) ? dc.num_comps : cast(int) req_chans;
109 
110     foreach (ref comp; dc.comps[0..dc.num_comps])
111         comp.data = new ubyte[dc.num_mcu_x*comp.sfx*8*dc.num_mcu_y*comp.sfy*8];
112 
113     // E.7 -- Multiple scans are for progressive images which are not supported
114     //while (!dc.eoi_reached) {
115         decode_scan(dc);    // E.2.3
116         //read_markers(dc);   // reads until next scan header or eoi
117     //}
118 
119     // throw away fill samples and convert to target format
120     ubyte[] pixels = dc.reconstruct();
121 
122     IFImage result = {
123         w      : dc.width,
124         h      : dc.height,
125         c      : cast(ColFmt) dc.tgt_chans,
126         pixels : pixels,
127     };
128     return result;
129 }
130 
131 struct JPEG_Decoder {
132     Reader stream;
133 
134     bool has_frame_header = false;
135     bool eoi_reached = false;
136 
137     ubyte[64][4] qtables;
138     HuffTab[2] ac_tables;
139     HuffTab[2] dc_tables;
140 
141     ubyte cb;  // current byte (next bit always at MSB)
142     int bits_left;   // num of unused bits in cb
143 
144     bool correct_comp_ids;
145     Component[3] comps;
146     ubyte num_comps;
147     int tgt_chans;
148 
149     int width, height;
150 
151     int hmax, vmax;
152 
153     ushort restart_interval;    // number of MCUs in restart interval
154 
155     // image component
156     struct Component {
157         ubyte sfx, sfy;   // sampling factors, aka. h and v
158         size_t x, y;       // total num of samples, without fill samples
159         ubyte qtable;
160         ubyte ac_table;
161         ubyte dc_table;
162         int pred;                // dc prediction
163         ubyte[] data;   // reconstructed samples
164     }
165 
166     int num_mcu_x;
167     int num_mcu_y;
168 }
169 
170 struct HuffTab {
171     ubyte[256] values;
172     ubyte[257] sizes;
173     short[16] mincode, maxcode;
174     short[16] valptr;
175 }
176 
177 enum Marker : ubyte {
178     SOI = 0xd8,     // start of image
179     SOF0 = 0xc0,    // start of frame / baseline DCT
180     //SOF1 = 0xc1,    // start of frame / extended seq.
181     //SOF2 = 0xc2,    // start of frame / progressive DCT
182     SOF3 = 0xc3,    // start of frame / lossless
183     SOF9 = 0xc9,    // start of frame / extended seq., arithmetic
184     SOF11 = 0xcb,    // start of frame / lossless, arithmetic
185     DHT = 0xc4,     // define huffman tables
186     DQT = 0xdb,     // define quantization tables
187     DRI = 0xdd,     // define restart interval
188     SOS = 0xda,     // start of scan
189     DNL = 0xdc,     // define number of lines
190     RST0 = 0xd0,    // restart entropy coded data
191     // ...
192     RST7 = 0xd7,    // restart entropy coded data
193     APP0 = 0xe0,    // application 0 segment
194     // ...
195     APPf = 0xef,    // application f segment
196     //DAC = 0xcc,     // define arithmetic conditioning table
197     COM = 0xfe,     // comment
198     EOI = 0xd9,     // end of image
199 }
200 
201 void read_markers(ref JPEG_Decoder dc) {
202     bool has_next_scan_header = false;
203     while (!has_next_scan_header && !dc.eoi_reached) {
204         ubyte[2] marker;
205         dc.stream.readExact(marker, 2);
206 
207         if (marker[0] != 0xff)
208             throw new ImageIOException("no marker");
209         while (marker[1] == 0xff)
210             dc.stream.readExact(marker[1..$], 1);
211 
212         switch (marker[1]) with (Marker) {
213             case DHT: dc.read_huffman_tables(); break;
214             case DQT: dc.read_quantization_tables(); break;
215             case SOF0:
216                 if (dc.has_frame_header)
217                     throw new ImageIOException("extra frame header");
218                 dc.read_frame_header();
219                 dc.has_frame_header = true;
220                 break;
221             case SOS:
222                 if (!dc.has_frame_header)
223                     throw new ImageIOException("no frame header");
224                 dc.read_scan_header();
225                 has_next_scan_header = true;
226                 break;
227             case DRI: dc.read_restart_interval(); break;
228             case EOI: dc.eoi_reached = true; break;
229             case APP0: .. case APPf:
230             case COM:
231                 ubyte[2] lenbuf = void;
232                 dc.stream.readExact(lenbuf, lenbuf.length);
233                 int len = bigEndianToNative!ushort(lenbuf) - 2;
234                 dc.stream.seek(len, SEEK_CUR);
235                 break;
236             default: throw new ImageIOException("invalid / unsupported marker");
237         }
238     }
239 }
240 
241 // DHT -- define huffman tables
242 void read_huffman_tables(ref JPEG_Decoder dc) {
243     ubyte[19] tmp = void;
244     dc.stream.readExact(tmp, 2);
245     int len = bigEndianToNative!ushort(tmp[0..2]);
246     len -= 2;
247 
248     while (0 < len) {
249         dc.stream.readExact(tmp, 17);   // info byte & the BITS
250         ubyte table_slot = tmp[0] & 0xf; // must be 0 or 1 for baseline
251         ubyte table_class = tmp[0] >> 4;  // 0 = dc table, 1 = ac table
252         if (1 < table_slot || 1 < table_class)
253             throw new ImageIOException("invalid / not supported");
254 
255         // compute total number of huffman codes
256         int mt = 0;
257         foreach (i; 1..17)
258             mt += tmp[i];
259         if (256 < mt)   // TODO where in the spec?
260             throw new ImageIOException("invalid / not supported");
261 
262         if (table_class == 0) {
263             dc.stream.readExact(dc.dc_tables[table_slot].values, mt);
264             derive_table(dc.dc_tables[table_slot], tmp[1..17]);
265         } else {
266             dc.stream.readExact(dc.ac_tables[table_slot].values, mt);
267             derive_table(dc.ac_tables[table_slot], tmp[1..17]);
268         }
269 
270         len -= 17 + mt;
271     }
272 }
273 
274 // num_values is the BITS
275 void derive_table(ref HuffTab table, in ref ubyte[16] num_values) {
276     short[256] codes;
277 
278     int k = 0;
279     foreach (i; 0..16) {
280         foreach (j; 0..num_values[i]) {
281             table.sizes[k] = cast(ubyte) (i + 1);
282             ++k;
283         }
284     }
285     table.sizes[k] = 0;
286 
287     k = 0;
288     short code = 0;
289     ubyte si = table.sizes[k];
290     while (true) {
291         do {
292             codes[k] = code;
293             ++code;
294             ++k;
295         } while (si == table.sizes[k]);
296 
297         if (table.sizes[k] == 0)
298             break;
299 
300 //        assert(si < table.sizes[k]);
301         do {
302             code <<= 1;
303             ++si;
304         } while (si != table.sizes[k]);
305     }
306 
307     derive_mincode_maxcode_valptr(
308         table.mincode, table.maxcode, table.valptr,
309         codes, num_values
310     );
311 }
312 
313 // F.15
314 void derive_mincode_maxcode_valptr(ref short[16] mincode, ref short[16] maxcode,
315      ref short[16] valptr, in ref short[256] codes, in ref ubyte[16] num_values) pure
316 {
317     mincode[] = -1;
318     maxcode[] = -1;
319     valptr[] = -1;
320 
321     int j = 0;
322     foreach (i; 0..16) {
323         if (num_values[i] != 0) {
324             valptr[i] = cast(short) j;
325             mincode[i] = codes[j];
326             j += num_values[i] - 1;
327             maxcode[i] = codes[j];
328             j += 1;
329         }
330     }
331 }
332 
333 // DQT -- define quantization tables
334 void read_quantization_tables(ref JPEG_Decoder dc) {
335     ubyte[2] tmp = void;
336     dc.stream.readExact(tmp, 2);
337     int len = bigEndianToNative!ushort(tmp[0..2]);
338     if (len % 65 != 2)
339         throw new ImageIOException("invalid / not supported");
340     len -= 2;
341     while (0 < len) {
342         dc.stream.readExact(tmp, 1);
343         ubyte table_info = tmp[0];
344         ubyte table_slot = table_info & 0xf;
345         ubyte precision = table_info >> 4;  // 0 = 8 bit, 1 = 16 bit
346         if (3 < table_slot || precision != 0)    // only 8 bit for baseline
347             throw new ImageIOException("invalid / not supported");
348 
349         dc.stream.readExact(dc.qtables[table_slot], 64);
350         len -= 1 + 64;
351     }
352 }
353 
354 // SOF0 -- start of frame
355 void read_frame_header(ref JPEG_Decoder dc) {
356     ubyte[9] tmp = void;
357     dc.stream.readExact(tmp, 8);
358     int len = bigEndianToNative!ushort(tmp[0..2]);  // 8 + num_comps*3
359     ubyte precision = tmp[2];
360     dc.height = bigEndianToNative!ushort(tmp[3..5]);
361     dc.width = bigEndianToNative!ushort(tmp[5..7]);
362     dc.num_comps = tmp[7];
363 
364     if ( precision != 8 ||
365          (dc.num_comps != 1 && dc.num_comps != 3) ||
366          len != 8 + dc.num_comps*3 )
367         throw new ImageIOException("invalid / not supported");
368 
369     dc.hmax = 0;
370     dc.vmax = 0;
371     int mcu_du = 0; // data units in one mcu
372     dc.stream.readExact(tmp, dc.num_comps*3);
373     foreach (i; 0..dc.num_comps) {
374         ubyte ci = tmp[i*3];
375         // JFIF says ci should be i+1, but there are images where ci is i. Normalize ids
376         // so that ci == i, always. So much for standards...
377         if (i == 0) { dc.correct_comp_ids = ci == i+1; }
378         if ((dc.correct_comp_ids && ci != i+1)
379         || (!dc.correct_comp_ids && ci != i))
380             throw new ImageIOException("invalid component id");
381 
382         auto comp = &dc.comps[i];
383         ubyte sampling_factors = tmp[i*3 + 1];
384         comp.sfx = sampling_factors >> 4;
385         comp.sfy = sampling_factors & 0xf;
386         comp.qtable = tmp[i*3 + 2];
387         if ( comp.sfy < 1 || 4 < comp.sfy ||
388              comp.sfx < 1 || 4 < comp.sfx ||
389              3 < comp.qtable )
390             throw new ImageIOException("invalid / not supported");
391 
392         if (dc.hmax < comp.sfx) dc.hmax = comp.sfx;
393         if (dc.vmax < comp.sfy) dc.vmax = comp.sfy;
394 
395         mcu_du += comp.sfx * comp.sfy;
396     }
397     if (10 < mcu_du)
398         throw new ImageIOException("invalid / not supported");
399 
400     foreach (i; 0..dc.num_comps) {
401         dc.comps[i].x = cast(size_t) ceil(dc.width * (cast(double) dc.comps[i].sfx / dc.hmax));
402         dc.comps[i].y = cast(size_t) ceil(dc.height * (cast(double) dc.comps[i].sfy / dc.vmax));
403     }
404 
405     size_t mcu_w = dc.hmax * 8;
406     size_t mcu_h = dc.vmax * 8;
407     dc.num_mcu_x = cast(int) ((dc.width + mcu_w-1) / mcu_w);
408     dc.num_mcu_y = cast(int) ((dc.height + mcu_h-1) / mcu_h);
409 
410     debug(DebugJPEG) {
411         writefln("\tlen: %s", len);
412         writefln("\tprecision: %s", precision);
413         writefln("\tdimensions: %s x %s", dc.width, dc.height);
414         writefln("\tnum_comps: %s", dc.num_comps);
415         writefln("\tnum_mcu_x: %s", dc.num_mcu_x);
416         writefln("\tnum_mcu_y: %s", dc.num_mcu_y);
417     }
418 
419 }
420 
421 // SOS -- start of scan
422 void read_scan_header(ref JPEG_Decoder dc) {
423     ubyte[3] tmp = void;
424     dc.stream.readExact(tmp, tmp.length);
425     ushort len = bigEndianToNative!ushort(tmp[0..2]);
426     ubyte num_scan_comps = tmp[2];
427 
428     if ( num_scan_comps != dc.num_comps ||
429          len != (6+num_scan_comps*2) )
430         throw new ImageIOException("invalid / not supported");
431 
432     ubyte[16] buf;
433     dc.stream.readExact(buf, len-3);
434 
435     foreach (i; 0..num_scan_comps) {
436         uint ci = buf[i*2] - ((dc.correct_comp_ids) ? 1 : 0);
437         if (ci >= dc.num_comps)
438             throw new ImageIOException("invalid component id");
439 
440         ubyte tables = buf[i*2+1];
441         dc.comps[ci].dc_table = tables >> 4;
442         dc.comps[ci].ac_table = tables & 0xf;
443         if ( 1 < dc.comps[ci].dc_table ||
444              1 < dc.comps[ci].ac_table )
445             throw new ImageIOException("invalid / not supported");
446     }
447 
448     // ignore these
449     //ubyte spectral_start = buf[$-3];
450     //ubyte spectral_end = buf[$-2];
451     //ubyte approx = buf[$-1];
452 }
453 
454 void read_restart_interval(ref JPEG_Decoder dc) {
455     ubyte[4] tmp = void;
456     dc.stream.readExact(tmp, tmp.length);
457     ushort len = bigEndianToNative!ushort(tmp[0..2]);
458     if (len != 4)
459         throw new ImageIOException("invalid / not supported");
460     dc.restart_interval = bigEndianToNative!ushort(tmp[2..4]);
461 }
462 
463 // E.2.3 and E.8 and E.9
464 void decode_scan(ref JPEG_Decoder dc) {
465     int intervals, mcus;
466     if (0 < dc.restart_interval) {
467         int total_mcus = dc.num_mcu_x * dc.num_mcu_y;
468         intervals = (total_mcus + dc.restart_interval-1) / dc.restart_interval;
469         mcus = dc.restart_interval;
470     } else {
471         intervals = 1;
472         mcus = dc.num_mcu_x * dc.num_mcu_y;
473     }
474 
475     foreach (mcu_j; 0 .. dc.num_mcu_y) {
476         foreach (mcu_i; 0 .. dc.num_mcu_x) {
477 
478             // decode mcu
479             foreach (c; 0..dc.num_comps) {
480                 auto comp = &dc.comps[c];
481                 foreach (du_j; 0 .. comp.sfy) {
482                     foreach (du_i; 0 .. comp.sfx) {
483                         // decode entropy, dequantize & dezigzag
484                         short[64] data = decode_block(dc, *comp, dc.qtables[comp.qtable]);
485                         // idct & level-shift
486                         int outx = (mcu_i * comp.sfx + du_i) * 8;
487                         int outy = (mcu_j * comp.sfy + du_j) * 8;
488                         int dst_stride = dc.num_mcu_x * comp.sfx*8;
489                         ubyte* dst = comp.data.ptr + outy*dst_stride + outx;
490                         stbi__idct_block(dst, dst_stride, data);
491                     }
492                 }
493             }
494 
495             --mcus;
496 
497             if (!mcus) {
498                 --intervals;
499                 if (!intervals)
500                     return;
501 
502                 read_restart(dc.stream);    // RSTx marker
503 
504                 if (intervals == 1) {
505                     // last interval, may have fewer MCUs than defined by DRI
506                     mcus = (dc.num_mcu_y - mcu_j - 1)
507                          * dc.num_mcu_x + dc.num_mcu_x - mcu_i - 1;
508                 } else {
509                     mcus = dc.restart_interval;
510                 }
511 
512                 // reset decoder
513                 dc.cb = 0;
514                 dc.bits_left = 0;
515                 foreach (k; 0..dc.num_comps)
516                     dc.comps[k].pred = 0;
517             }
518 
519         }
520     }
521 }
522 
523 // RST0-RST7
524 void read_restart(Reader stream) {
525     ubyte[2] tmp = void;
526     stream.readExact(tmp, tmp.length);
527     if (tmp[0] != 0xff || tmp[1] < Marker.RST0 || Marker.RST7 < tmp[1])
528         throw new ImageIOException("reset marker missing");
529     // the markers should cycle 0 through 7, could check that here...
530 }
531 
532 immutable ubyte[64] dezigzag = [
533      0,  1,  8, 16,  9,  2,  3, 10,
534     17, 24, 32, 25, 18, 11,  4,  5,
535     12, 19, 26, 33, 40, 48, 41, 34,
536     27, 20, 13,  6,  7, 14, 21, 28,
537     35, 42, 49, 56, 57, 50, 43, 36,
538     29, 22, 15, 23, 30, 37, 44, 51,
539     58, 59, 52, 45, 38, 31, 39, 46,
540     53, 60, 61, 54, 47, 55, 62, 63,
541 ];
542 
543 // decode entropy, dequantize & dezigzag (see section F.2)
544 short[64] decode_block(ref JPEG_Decoder dc, ref JPEG_Decoder.Component comp,
545                                                     in ref ubyte[64] qtable)
546 {
547     short[64] res = 0;
548 
549     ubyte t = decode_huff(dc, dc.dc_tables[comp.dc_table]);
550     int diff = t ? dc.receive_and_extend(t) : 0;
551 
552     comp.pred = comp.pred + diff;
553     res[0] = cast(short) (comp.pred * qtable[0]);
554 
555     int k = 1;
556     do {
557         ubyte rs = decode_huff(dc, dc.ac_tables[comp.ac_table]);
558         ubyte rrrr = rs >> 4;
559         ubyte ssss = rs & 0xf;
560 
561         if (ssss == 0) {
562             if (rrrr != 0xf)
563                 break;      // end of block
564             k += 16;    // run length is 16
565             continue;
566         }
567 
568         k += rrrr;
569 
570         if (63 < k)
571             throw new ImageIOException("corrupt block");
572         res[dezigzag[k]] = cast(short) (dc.receive_and_extend(ssss) * qtable[k]);
573         k += 1;
574     } while (k < 64);
575 
576     return res;
577 }
578 
579 int receive_and_extend(ref JPEG_Decoder dc, ubyte s) {
580     // receive
581     int symbol = 0;
582     foreach (_; 0..s)
583         symbol = (symbol << 1) + nextbit(dc);
584     // extend
585     int vt = 1 << (s-1);
586     if (symbol < vt)
587         return symbol + (-1 << s) + 1;
588     return symbol;
589 }
590 
591 // F.16 -- the DECODE
592 ubyte decode_huff(ref JPEG_Decoder dc, in ref HuffTab tab) {
593     short code = nextbit(dc);
594 
595     int i = 0;
596     while (tab.maxcode[i] < code) {
597         code = cast(short) ((code << 1) + nextbit(dc));
598         i += 1;
599         if (tab.maxcode.length <= i)
600             throw new ImageIOException("corrupt huffman coding");
601     }
602     int j = tab.valptr[i] + code - tab.mincode[i];
603     if (tab.values.length <= cast(uint) j)
604         throw new ImageIOException("corrupt huffman coding");
605     return tab.values[j];
606 }
607 
608 // F.2.2.5 and F.18
609 ubyte nextbit(ref JPEG_Decoder dc) {
610     if (!dc.bits_left) {
611         ubyte[1] bytebuf;
612         dc.stream.readExact(bytebuf, 1);
613         dc.cb = bytebuf[0];
614         dc.bits_left = 8;
615 
616         if (dc.cb == 0xff) {
617             dc.stream.readExact(bytebuf, 1);
618             if (bytebuf[0] != 0x0)
619                 throw new ImageIOException("unexpected marker");
620         }
621     }
622 
623     ubyte r = dc.cb >> 7;
624     dc.cb <<= 1;
625     dc.bits_left -= 1;
626     return r;
627 }
628 
629 ubyte[] reconstruct(in ref JPEG_Decoder dc) {
630     auto result = new ubyte[dc.width * dc.height * dc.tgt_chans];
631 
632     switch (dc.num_comps * 10 + dc.tgt_chans) {
633         case 34, 33:
634             // Use specialized bilinear filtering functions for the frequent cases where
635             // Cb & Cr channels have half resolution.
636             if ((dc.comps[0].sfx <= 2 && dc.comps[0].sfy <= 2)
637             && (dc.comps[0].sfx + dc.comps[0].sfy >= 3)
638             && dc.comps[1].sfx == 1 && dc.comps[1].sfy == 1
639             && dc.comps[2].sfx == 1 && dc.comps[2].sfy == 1) {
640                 void function(in ubyte[], in ubyte[], ubyte[]) resample;
641                 switch (dc.comps[0].sfx * 10 + dc.comps[0].sfy) {
642                     case 22: resample = &upsample_h2_v2; break;
643                     case 21: resample = &upsample_h2_v1; break;
644                     case 12: resample = &upsample_h1_v2; break;
645                     default: throw new ImageIOException("bug");
646                 }
647 
648                 auto comp1 = new ubyte[dc.width];
649                 auto comp2 = new ubyte[dc.width];
650 
651                 size_t s = 0;
652                 size_t di = 0;
653                 foreach (j; 0 .. dc.height) {
654                     size_t mi = j / dc.comps[0].sfy;
655                     size_t si = (mi == 0 || mi >= (dc.height-1)/dc.comps[0].sfy)
656                               ? mi : mi - 1 + s * 2;
657                     s = s ^ 1;
658 
659                     size_t cs = dc.num_mcu_x * dc.comps[1].sfx * 8;
660                     size_t cl0 = mi * cs;
661                     size_t cl1 = si * cs;
662                     resample(dc.comps[1].data[cl0 .. cl0 + dc.comps[1].x],
663                              dc.comps[1].data[cl1 .. cl1 + dc.comps[1].x],
664                              comp1[]);
665                     resample(dc.comps[2].data[cl0 .. cl0 + dc.comps[2].x],
666                              dc.comps[2].data[cl1 .. cl1 + dc.comps[2].x],
667                              comp2[]);
668 
669                     foreach (i; 0 .. dc.width) {
670                         result[di .. di+3] = ycbcr_to_rgb(
671                             dc.comps[0].data[j * dc.num_mcu_x * dc.comps[0].sfx * 8 + i],
672                             comp1[i],
673                             comp2[i],
674                         );
675                         if (dc.tgt_chans == 4)
676                             result[di+3] = 255;
677                         di += dc.tgt_chans;
678                     }
679                 }
680 
681                 return result;
682             }
683 
684             foreach (const ref comp; dc.comps[0..dc.num_comps]) {
685                 if (comp.sfx != dc.hmax || comp.sfy != dc.vmax)
686                     return dc.upsample(result);
687             }
688 
689             size_t si, di;
690             foreach (j; 0 .. dc.height) {
691                 foreach (i; 0 .. dc.width) {
692                     result[di .. di+3] = ycbcr_to_rgb(
693                         dc.comps[0].data[si+i],
694                         dc.comps[1].data[si+i],
695                         dc.comps[2].data[si+i],
696                     );
697                     if (dc.tgt_chans == 4)
698                         result[di+3] = 255;
699                     di += dc.tgt_chans;
700                 }
701                 si += dc.num_mcu_x * dc.comps[0].sfx * 8;
702             }
703             return result;
704         case 32, 12, 31, 11:
705             const comp = &dc.comps[0];
706             if (comp.sfx == dc.hmax && comp.sfy == dc.vmax) {
707                 size_t si, di;
708                 if (dc.tgt_chans == 2) {
709                     foreach (j; 0 .. dc.height) {
710                         foreach (i; 0 .. dc.width) {
711                             result[di++] = comp.data[si+i];
712                             result[di++] = 255;
713                         }
714                         si += dc.num_mcu_x * comp.sfx * 8;
715                     }
716                 } else {
717                     foreach (j; 0 .. dc.height) {
718                         result[di .. di+dc.width] = comp.data[si .. si+dc.width];
719                         si += dc.num_mcu_x * comp.sfx * 8;
720                         di += dc.width;
721                     }
722                 }
723                 return result;
724             } else {
725                 // need to resample (haven't tested this...)
726                 return dc.upsample_luma(result);
727             }
728         case 14, 13:
729             const comp = &dc.comps[0];
730             size_t si, di;
731             foreach (j; 0 .. dc.height) {
732                 foreach (i; 0 .. dc.width) {
733                     result[di .. di+3] = comp.data[si+i];
734                     if (dc.tgt_chans == 4)
735                         result[di+3] = 255;
736                     di += dc.tgt_chans;
737                 }
738                 si += dc.num_mcu_x * comp.sfx * 8;
739             }
740             return result;
741         default: assert(0);
742     }
743 }
744 
745 void upsample_h2_v2(in ubyte[] line0, in ubyte[] line1, ubyte[] result) {
746     ubyte mix(ubyte mm, ubyte ms, ubyte sm, ubyte ss) {
747         return cast(ubyte) (( cast(uint) mm * 3 * 3
748                             + cast(uint) ms * 3 * 1
749                             + cast(uint) sm * 1 * 3
750                             + cast(uint) ss * 1 * 1
751                             + 8) / 16);
752     }
753 
754     result[0] = cast(ubyte) (( cast(uint) line0[0] * 3
755                              + cast(uint) line1[0] * 1
756                              + 2) / 4);
757     if (line0.length == 1) return;
758     result[1] = mix(line0[0], line0[1], line1[0], line1[1]);
759 
760     size_t di = 2;
761     foreach (i; 1 .. line0.length) {
762         result[di] = mix(line0[i], line0[i-1], line1[i], line1[i-1]);
763         di += 1;
764         if (i == line0.length-1) {
765             if (di < result.length) {
766                 result[di] = cast(ubyte) (( cast(uint) line0[i] * 3
767                                           + cast(uint) line1[i] * 1
768                                           + 2) / 4);
769             }
770             return;
771         }
772         result[di] = mix(line0[i], line0[i+1], line1[i], line1[i+1]);
773         di += 1;
774     }
775 }
776 
777 void upsample_h2_v1(in ubyte[] line0, in ubyte[] _line1, ubyte[] result) {
778     result[0] = line0[0];
779     if (line0.length == 1) return;
780     result[1] = cast(ubyte) (( cast(uint) line0[0] * 3
781                              + cast(uint) line0[1] * 1
782                              + 2) / 4);
783     size_t di = 2;
784     foreach (i; 1 .. line0.length) {
785         result[di] = cast(ubyte) (( cast(uint) line0[i-1] * 1
786                                   + cast(uint) line0[i+0] * 3
787                                   + 2) / 4);
788         di += 1;
789         if (i == line0.length-1) {
790             if (di < result.length) result[di] = line0[i];
791             return;
792         }
793         result[di] = cast(ubyte) (( cast(uint) line0[i+0] * 3
794                                   + cast(uint) line0[i+1] * 1
795                                   + 2) / 4);
796         di += 1;
797     }
798 }
799 
800 void upsample_h1_v2(in ubyte[] line0, in ubyte[] line1, ubyte[] result) {
801     foreach (i; 0 .. result.length) {
802         result[i] = cast(ubyte) (( cast(uint) line0[i] * 3
803                                  + cast(uint) line1[i] * 1
804                                  + 2) / 4);
805     }
806 }
807 
808 // Nearest neighbor
809 ubyte[] upsample_luma(in ref JPEG_Decoder dc, ubyte[] result) {
810     const size_t stride0 = dc.num_mcu_x * dc.comps[0].sfx * 8;
811     const y_step0 = cast(float) dc.comps[0].sfy / cast(float) dc.vmax;
812     const x_step0 = cast(float) dc.comps[0].sfx / cast(float) dc.hmax;
813 
814     float y0 = y_step0 * 0.5f;
815     size_t y0i = 0;
816 
817     size_t di;
818 
819     foreach (j; 0 .. dc.height) {
820         float x0 = x_step0 * 0.5f;
821         size_t x0i = 0;
822         foreach (i; 0 .. dc.width) {
823             result[di] = dc.comps[0].data[y0i + x0i];
824             if (dc.tgt_chans == 2)
825                 result[di+1] = 255;
826             di += dc.tgt_chans;
827             x0 += x_step0;
828             if (x0 >= 1.0f) { x0 -= 1.0f; x0i += 1; }
829         }
830         y0 += y_step0;
831         if (y0 >= 1.0f) { y0 -= 1.0f; y0i += stride0; }
832     }
833     return result;
834 }
835 
836 // Nearest neighbor
837 ubyte[] upsample(in ref JPEG_Decoder dc, ubyte[] result) {
838     const size_t stride0 = dc.num_mcu_x * dc.comps[0].sfx * 8;
839     const size_t stride1 = dc.num_mcu_x * dc.comps[1].sfx * 8;
840     const size_t stride2 = dc.num_mcu_x * dc.comps[2].sfx * 8;
841 
842     const y_step0 = cast(float) dc.comps[0].sfy / cast(float) dc.vmax;
843     const y_step1 = cast(float) dc.comps[1].sfy / cast(float) dc.vmax;
844     const y_step2 = cast(float) dc.comps[2].sfy / cast(float) dc.vmax;
845     const x_step0 = cast(float) dc.comps[0].sfx / cast(float) dc.hmax;
846     const x_step1 = cast(float) dc.comps[1].sfx / cast(float) dc.hmax;
847     const x_step2 = cast(float) dc.comps[2].sfx / cast(float) dc.hmax;
848 
849     float y0 = y_step0 * 0.5f;
850     float y1 = y_step1 * 0.5f;
851     float y2 = y_step2 * 0.5f;
852     size_t y0i = 0;
853     size_t y1i = 0;
854     size_t y2i = 0;
855 
856     size_t di;
857 
858     foreach (_j; 0 .. dc.height) {
859         float x0 = x_step0 * 0.5f;
860         float x1 = x_step1 * 0.5f;
861         float x2 = x_step2 * 0.5f;
862         size_t x0i = 0;
863         size_t x1i = 0;
864         size_t x2i = 0;
865         foreach (i; 0 .. dc.width) {
866             result[di .. di+3] = ycbcr_to_rgb(
867                 dc.comps[0].data[y0i + x0i],
868                 dc.comps[1].data[y1i + x1i],
869                 dc.comps[2].data[y2i + x2i],
870             );
871             if (dc.tgt_chans == 4)
872                 result[di+3] = 255;
873             di += dc.tgt_chans;
874             x0 += x_step0;
875             x1 += x_step1;
876             x2 += x_step2;
877             if (x0 >= 1.0) { x0 -= 1.0f; x0i += 1; }
878             if (x1 >= 1.0) { x1 -= 1.0f; x1i += 1; }
879             if (x2 >= 1.0) { x2 -= 1.0f; x2i += 1; }
880         }
881         y0 += y_step0;
882         y1 += y_step1;
883         y2 += y_step2;
884         if (y0 >= 1.0) { y0 -= 1.0f; y0i += stride0; }
885         if (y1 >= 1.0) { y1 -= 1.0f; y1i += stride1; }
886         if (y2 >= 1.0) { y2 -= 1.0f; y2i += stride2; }
887     }
888     return result;
889 }
890 
891 ubyte[3] ycbcr_to_rgb(ubyte y, ubyte cb, ubyte cr) pure {
892     ubyte[3] rgb = void;
893     rgb[0] = clamp(y + 1.402*(cr-128));
894     rgb[1] = clamp(y - 0.34414*(cb-128) - 0.71414*(cr-128));
895     rgb[2] = clamp(y + 1.772*(cb-128));
896     return rgb;
897 }
898 
899 ubyte clamp(float x) pure {
900     if (x < 0) return 0;
901     if (255 < x) return 255;
902     return cast(ubyte) x;
903 }
904 
905 // ------------------------------------------------------------
906 // The IDCT stuff here (to the next dashed line) is copied and adapted from
907 // stb_image which is released under public domain.  Many thanks to stb_image
908 // author, Sean Barrett.
909 // Link: https://github.com/nothings/stb/blob/master/stb_image.h
910 
911 pure int f2f(float x) { return cast(int) (x * 4096 + 0.5); }
912 pure int fsh(int x) { return x << 12; }
913 
914 // from stb_image, derived from jidctint -- DCT_ISLOW
915 pure void STBI__IDCT_1D(ref int t0, ref int t1, ref int t2, ref int t3,
916                         ref int x0, ref int x1, ref int x2, ref int x3,
917         int s0, int s1, int s2, int s3, int s4, int s5, int s6, int s7)
918 {
919    int p1,p2,p3,p4,p5;
920    //int t0,t1,t2,t3,p1,p2,p3,p4,p5,x0,x1,x2,x3;
921    p2 = s2;
922    p3 = s6;
923    p1 = (p2+p3) * f2f(0.5411961f);
924    t2 = p1 + p3 * f2f(-1.847759065f);
925    t3 = p1 + p2 * f2f( 0.765366865f);
926    p2 = s0;
927    p3 = s4;
928    t0 = fsh(p2+p3);
929    t1 = fsh(p2-p3);
930    x0 = t0+t3;
931    x3 = t0-t3;
932    x1 = t1+t2;
933    x2 = t1-t2;
934    t0 = s7;
935    t1 = s5;
936    t2 = s3;
937    t3 = s1;
938    p3 = t0+t2;
939    p4 = t1+t3;
940    p1 = t0+t3;
941    p2 = t1+t2;
942    p5 = (p3+p4)*f2f( 1.175875602f);
943    t0 = t0*f2f( 0.298631336f);
944    t1 = t1*f2f( 2.053119869f);
945    t2 = t2*f2f( 3.072711026f);
946    t3 = t3*f2f( 1.501321110f);
947    p1 = p5 + p1*f2f(-0.899976223f);
948    p2 = p5 + p2*f2f(-2.562915447f);
949    p3 = p3*f2f(-1.961570560f);
950    p4 = p4*f2f(-0.390180644f);
951    t3 += p1+p4;
952    t2 += p2+p3;
953    t1 += p2+p4;
954    t0 += p1+p3;
955 }
956 
957 // idct and level-shift
958 pure void stbi__idct_block(ubyte* dst, int dst_stride, in ref short[64] data) {
959    int i;
960    int[64] val;
961    int* v = val.ptr;
962    const(short)* d = data.ptr;
963 
964    // columns
965    for (i=0; i < 8; ++i,++d, ++v) {
966       // if all zeroes, shortcut -- this avoids dequantizing 0s and IDCTing
967       if (d[ 8]==0 && d[16]==0 && d[24]==0 && d[32]==0
968            && d[40]==0 && d[48]==0 && d[56]==0) {
969          //    no shortcut                 0     seconds
970          //    (1|2|3|4|5|6|7)==0          0     seconds
971          //    all separate               -0.047 seconds
972          //    1 && 2|3 && 4|5 && 6|7:    -0.047 seconds
973          int dcterm = d[0] << 2;
974          v[0] = v[8] = v[16] = v[24] = v[32] = v[40] = v[48] = v[56] = dcterm;
975       } else {
976          int t0,t1,t2,t3,x0,x1,x2,x3;
977          STBI__IDCT_1D(
978              t0, t1, t2, t3,
979              x0, x1, x2, x3,
980              d[ 0], d[ 8], d[16], d[24],
981              d[32], d[40], d[48], d[56]
982          );
983          // constants scaled things up by 1<<12; let's bring them back
984          // down, but keep 2 extra bits of precision
985          x0 += 512; x1 += 512; x2 += 512; x3 += 512;
986          v[ 0] = (x0+t3) >> 10;
987          v[56] = (x0-t3) >> 10;
988          v[ 8] = (x1+t2) >> 10;
989          v[48] = (x1-t2) >> 10;
990          v[16] = (x2+t1) >> 10;
991          v[40] = (x2-t1) >> 10;
992          v[24] = (x3+t0) >> 10;
993          v[32] = (x3-t0) >> 10;
994       }
995    }
996 
997    ubyte* o = dst;
998    for (i=0, v=val.ptr; i < 8; ++i,v+=8,o+=dst_stride) {
999       // no fast case since the first 1D IDCT spread components out
1000       int t0,t1,t2,t3,x0,x1,x2,x3;
1001       STBI__IDCT_1D(
1002           t0, t1, t2, t3,
1003           x0, x1, x2, x3,
1004           v[0],v[1],v[2],v[3],v[4],v[5],v[6],v[7]
1005       );
1006       // constants scaled things up by 1<<12, plus we had 1<<2 from first
1007       // loop, plus horizontal and vertical each scale by sqrt(8) so together
1008       // we've got an extra 1<<3, so 1<<17 total we need to remove.
1009       // so we want to round that, which means adding 0.5 * 1<<17,
1010       // aka 65536. Also, we'll end up with -128 to 127 that we want
1011       // to encode as 0-255 by adding 128, so we'll add that before the shift
1012       x0 += 65536 + (128<<17);
1013       x1 += 65536 + (128<<17);
1014       x2 += 65536 + (128<<17);
1015       x3 += 65536 + (128<<17);
1016       // tried computing the shifts into temps, or'ing the temps to see
1017       // if any were out of range, but that was slower
1018       o[0] = stbi__clamp((x0+t3) >> 17);
1019       o[7] = stbi__clamp((x0-t3) >> 17);
1020       o[1] = stbi__clamp((x1+t2) >> 17);
1021       o[6] = stbi__clamp((x1-t2) >> 17);
1022       o[2] = stbi__clamp((x2+t1) >> 17);
1023       o[5] = stbi__clamp((x2-t1) >> 17);
1024       o[3] = stbi__clamp((x3+t0) >> 17);
1025       o[4] = stbi__clamp((x3-t0) >> 17);
1026    }
1027 }
1028 
1029 // clamp to 0-255
1030 pure ubyte stbi__clamp(int x) {
1031    if (cast(uint) x > 255) {
1032       if (x < 0) return 0;
1033       if (x > 255) return 255;
1034    }
1035    return cast(ubyte) x;
1036 }
1037 
1038 // ------------------------------------------------------------