1 // Copyright (c) 2014 Tero Hänninen
2 // Boost Software License - Version 1.0 - August 17th, 2003
3 module imageformats;
4 
5 import std.algorithm;   // min, reverse
6 import std.bitmanip;   // endianness stuff
7 import std.stdio;    // File
8 import std.string;  // toLower, lastIndexOf
9 
10 /// Image
11 struct IFImage {
12     long        w, h;
13     ColFmt      c;
14     ubyte[]     pixels;
15 }
16 
17 /// Color format
18 enum ColFmt {
19     Y = 1,
20     YA = 2,
21     RGB = 3,
22     RGBA = 4,
23 }
24 
25 /// Reads an image from file.
26 IFImage read_image(in char[] file, long req_chans = 0) {
27     scope reader = new FileReader(file);
28     return read_image_from_reader(reader, req_chans);
29 }
30 
31 /// Reads an image in memory.
32 IFImage read_image_from_mem(in ubyte[] source, long req_chans = 0) {
33     scope reader = new MemReader(source);
34     return read_image_from_reader(reader, req_chans);
35 }
36 
37 /// Writes an image to file.
38 void write_image(in char[] file, long w, long h, in ubyte[] data, long req_chans = 0) {
39     const(char)[] ext = extract_extension_lowercase(file);
40 
41     void function(Writer, long, long, in ubyte[], long) write_image;
42     switch (ext) {
43         case "png": write_image = &write_png; break;
44         case "tga": write_image = &write_tga; break;
45         default: throw new ImageIOException("unknown image extension/type");
46     }
47     scope writer = new FileWriter(file);
48     write_image(writer, w, h, data, req_chans);
49 }
50 
51 /// Returns basic info about an image.
52 /// If number of channels is unknown chans is set to zero.
53 void read_image_info(in char[] file, out long w, out long h, out long chans) {
54     scope reader = new FileReader(file);
55     try {
56         return read_png_info(reader, w, h, chans);
57     } catch {
58         reader.seek(0, SEEK_SET);
59     }
60     try {
61         return read_jpeg_info(reader, w, h, chans);
62     } catch {
63         reader.seek(0, SEEK_SET);
64     }
65     try {
66         return read_bmp_info(reader, w, h, chans);
67     } catch {
68         reader.seek(0, SEEK_SET);
69     }
70     try {
71         return read_tga_info(reader, w, h, chans);
72     } catch {
73         reader.seek(0, SEEK_SET);
74     }
75     throw new ImageIOException("unknown image type");
76 }
77 
78 ///
79 class ImageIOException : Exception {
80    @safe pure const
81    this(string msg, string file = __FILE__, size_t line = __LINE__) {
82        super(msg, file, line);
83    }
84 }
85 
86 private:
87 
88 IFImage read_image_from_reader(Reader reader, long req_chans) {
89     if (detect_png(reader)) return read_png(reader, req_chans);
90     if (detect_jpeg(reader)) return read_jpeg(reader, req_chans);
91     if (detect_bmp(reader)) return read_bmp(reader, req_chans);
92     if (detect_tga(reader)) return read_tga(reader, req_chans);
93     throw new ImageIOException("unknown image type");
94 }
95 
96 bool detect_png(Reader stream) {
97     try {
98         ubyte[8] tmp = void;
99         stream.readExact(tmp, tmp.length);
100         return (tmp[0..8] == png_file_header[0..$]);
101     } catch {
102         return false;
103     } finally {
104         stream.seek(0, SEEK_SET);
105     }
106 }
107 
108 bool detect_jpeg(Reader stream) {
109     try {
110         long w, h, c;
111         read_jpeg_info(stream, w, h, c);
112         return true;
113     } catch {
114         return false;
115     } finally {
116         stream.seek(0, SEEK_SET);
117     }
118 }
119 
120 bool detect_bmp(Reader stream) {
121     try {
122         ubyte[18] tmp = void;  // bmp header + size of dib header
123         stream.readExact(tmp, tmp.length);
124         size_t ds = littleEndianToNative!uint(tmp[14..18]);
125         return (tmp[0..2] == ['B', 'M']
126             && (ds == 12 || ds == 40 || ds == 52 || ds == 56 || ds == 108 || ds == 124));
127     } catch {
128         return false;
129     } finally {
130         stream.seek(0, SEEK_SET);
131     }
132 }
133 
134 bool detect_tga(Reader stream) {
135     try {
136         auto hdr = read_tga_header(stream);
137         return true;
138     } catch {
139         return false;
140     } finally {
141         stream.seek(0, SEEK_SET);
142     }
143 }
144 
145 // --------------------------------------------------------------------------------
146 // PNG
147 
148 import std.digest.crc;
149 import std.zlib;
150 
151 ///
152 public struct PNG_Header {
153     int     width;
154     int     height;
155     ubyte   bit_depth;
156     ubyte   color_type;
157     ubyte   compression_method;
158     ubyte   filter_method;
159     ubyte   interlace_method;
160 }
161 
162 ///
163 public PNG_Header read_png_header(in char[] filename) {
164     scope reader = new FileReader(filename);
165     return read_png_header(reader);
166 }
167 
168 PNG_Header read_png_header(Reader stream) {
169     ubyte[33] tmp = void;  // file header, IHDR len+type+data+crc
170     stream.readExact(tmp, tmp.length);
171 
172     ubyte[4] crc = crc32Of(tmp[12..29]);
173     reverse(crc[]);
174     if ( tmp[0..8] != png_file_header[0..$]              ||
175          tmp[8..16] != [0x0,0x0,0x0,0xd,'I','H','D','R'] ||
176          crc != tmp[29..33] )
177         throw new ImageIOException("corrupt header");
178 
179     PNG_Header header = {
180         width              : bigEndianToNative!int(tmp[16..20]),
181         height             : bigEndianToNative!int(tmp[20..24]),
182         bit_depth          : tmp[24],
183         color_type         : tmp[25],
184         compression_method : tmp[26],
185         filter_method      : tmp[27],
186         interlace_method   : tmp[28],
187     };
188     return header;
189 }
190 
191 ///
192 public IFImage read_png(in char[] filename, long req_chans = 0) {
193     scope reader = new FileReader(filename);
194     return read_png(reader, req_chans);
195 }
196 
197 ///
198 public IFImage read_png_from_mem(in ubyte[] source, long req_chans = 0) {
199     scope reader = new MemReader(source);
200     return read_png(reader, req_chans);
201 }
202 
203 IFImage read_png(Reader stream, long req_chans = 0) {
204     if (req_chans < 0 || 4 < req_chans)
205         throw new ImageIOException("come on...");
206 
207     PNG_Header hdr = read_png_header(stream);
208 
209     if (hdr.width < 1 || hdr.height < 1 || int.max < cast(ulong) hdr.width * hdr.height)
210         throw new ImageIOException("invalid dimensions");
211     if (hdr.bit_depth != 8)
212         throw new ImageIOException("only 8-bit images supported");
213     if (! (hdr.color_type == PNG_ColorType.Y    ||
214            hdr.color_type == PNG_ColorType.RGB  ||
215            hdr.color_type == PNG_ColorType.Idx  ||
216            hdr.color_type == PNG_ColorType.YA   ||
217            hdr.color_type == PNG_ColorType.RGBA) )
218         throw new ImageIOException("color type not supported");
219     if (hdr.compression_method != 0 || hdr.filter_method != 0 ||
220         (hdr.interlace_method != 0 && hdr.interlace_method != 1))
221         throw new ImageIOException("not supported");
222 
223     PNG_Decoder dc = {
224         stream      : stream,
225         src_indexed : (hdr.color_type == PNG_ColorType.Idx),
226         src_chans   : channels(cast(PNG_ColorType) hdr.color_type),
227         ilace       : hdr.interlace_method,
228         w           : hdr.width,
229         h           : hdr.height,
230     };
231     dc.tgt_chans = (req_chans == 0) ? dc.src_chans : cast(int) req_chans;
232 
233     IFImage result = {
234         w      : dc.w,
235         h      : dc.h,
236         c      : cast(ColFmt) dc.tgt_chans,
237         pixels : decode_png(dc)
238     };
239     return result;
240 }
241 
242 ///
243 public void write_png(in char[] file, long w, long h, in ubyte[] data, long tgt_chans = 0)
244 {
245     scope writer = new FileWriter(file);
246     write_png(writer, w, h, data, tgt_chans);
247 }
248 
249 ///
250 public ubyte[] write_png_to_mem(long w, long h, in ubyte[] data, long tgt_chans = 0) {
251     scope writer = new MemWriter();
252     write_png(writer, w, h, data, tgt_chans);
253     return writer.result;
254 }
255 
256 immutable ubyte[8] png_file_header =
257     [0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a];
258 
259 int channels(PNG_ColorType ct) pure nothrow {
260     final switch (ct) with (PNG_ColorType) {
261         case Y: return 1;
262         case RGB, Idx: return 3;
263         case YA: return 2;
264         case RGBA: return 4;
265     }
266 }
267 
268 PNG_ColorType color_type(long channels) pure nothrow {
269     switch (channels) {
270         case 1: return PNG_ColorType.Y;
271         case 2: return PNG_ColorType.YA;
272         case 3: return PNG_ColorType.RGB;
273         case 4: return PNG_ColorType.RGBA;
274         default: assert(0);
275     }
276 }
277 
278 struct PNG_Decoder {
279     Reader stream;
280     bool src_indexed;
281     int src_chans;
282     int tgt_chans;
283     size_t w, h;
284     ubyte ilace;
285 
286     UnCompress uc;
287     CRC32 crc;
288     ubyte[12] chunkmeta;  // crc | length and type
289     ubyte[] read_buf;
290     ubyte[] uc_buf;     // uncompressed
291     ubyte[] palette;
292 }
293 
294 ubyte[] decode_png(ref PNG_Decoder dc) {
295     dc.uc = new UnCompress(HeaderFormat.deflate);
296     dc.read_buf = new ubyte[4096];
297 
298     enum Stage {
299         IHDR_parsed,
300         PLTE_parsed,
301         IDAT_parsed,
302         IEND_parsed,
303     }
304 
305     ubyte[] result;
306     auto stage = Stage.IHDR_parsed;
307     dc.stream.readExact(dc.chunkmeta[4..$], 8);  // next chunk's len and type
308 
309     while (stage != Stage.IEND_parsed) {
310         int len = bigEndianToNative!int(dc.chunkmeta[4..8]);
311         if (len < 0)
312             throw new ImageIOException("chunk too long");
313 
314         // standard allows PLTE chunk for non-indexed images too but we don't
315         dc.crc.put(dc.chunkmeta[8..12]);  // type
316         switch (cast(char[]) dc.chunkmeta[8..12]) {    // chunk type
317             case "IDAT":
318                 if (! (stage == Stage.IHDR_parsed ||
319                       (stage == Stage.PLTE_parsed && dc.src_indexed)) )
320                     throw new ImageIOException("corrupt chunk stream");
321                 result = read_IDAT_stream(dc, len);
322                 stage = Stage.IDAT_parsed;
323                 break;
324             case "PLTE":
325                 if (stage != Stage.IHDR_parsed)
326                     throw new ImageIOException("corrupt chunk stream");
327                 int entries = len / 3;
328                 if (len % 3 != 0 || 256 < entries)
329                     throw new ImageIOException("corrupt chunk");
330                 dc.palette = new ubyte[len];
331                 dc.stream.readExact(dc.palette, dc.palette.length);
332                 dc.crc.put(dc.palette);
333                 dc.stream.readExact(dc.chunkmeta, 12); // crc | len, type
334                 ubyte[4] crc = dc.crc.finish;
335                 reverse(crc[]);
336                 if (crc != dc.chunkmeta[0..4])
337                     throw new ImageIOException("corrupt chunk");
338                 stage = Stage.PLTE_parsed;
339                 break;
340             case "IEND":
341                 if (stage != Stage.IDAT_parsed)
342                     throw new ImageIOException("corrupt chunk stream");
343                 dc.stream.readExact(dc.chunkmeta, 4); // crc
344                 if (len != 0 || dc.chunkmeta[0..4] != [0xae, 0x42, 0x60, 0x82])
345                     throw new ImageIOException("corrupt chunk");
346                 stage = Stage.IEND_parsed;
347                 break;
348             case "IHDR":
349                 throw new ImageIOException("corrupt chunk stream");
350             default:
351                 // unknown chunk, ignore but check crc
352                 while (0 < len) {
353                     size_t bytes = min(len, dc.read_buf.length);
354                     dc.stream.readExact(dc.read_buf, bytes);
355                     len -= bytes;
356                     dc.crc.put(dc.read_buf[0..bytes]);
357                 }
358                 dc.stream.readExact(dc.chunkmeta, 12); // crc | len, type
359                 ubyte[4] crc = dc.crc.finish;
360                 reverse(crc[]);
361                 if (crc != dc.chunkmeta[0..4])
362                     throw new ImageIOException("corrupt chunk");
363         }
364     }
365 
366     return result;
367 }
368 
369 enum PNG_ColorType : ubyte {
370     Y    = 0,
371     RGB  = 2,
372     Idx  = 3,
373     YA   = 4,
374     RGBA = 6,
375 }
376 
377 enum PNG_FilterType : ubyte {
378     None    = 0,
379     Sub     = 1,
380     Up      = 2,
381     Average = 3,
382     Paeth   = 4,
383 }
384 
385 enum InterlaceMethod {
386     None = 0, Adam7 = 1
387 }
388 
389 ubyte[] read_IDAT_stream(ref PNG_Decoder dc, int len) {
390     bool metaready = false;     // chunk len, type, crc
391 
392     immutable uint filter_step = dc.src_indexed ? 1 : dc.src_chans;
393     immutable size_t tgt_linesize = cast(size_t) (dc.w * dc.tgt_chans);
394 
395     ubyte[] depaletted_line = dc.src_indexed ? new ubyte[cast(size_t)dc.w * 3] : null;
396     ubyte[] result = new ubyte[cast(size_t)(dc.w * dc.h * dc.tgt_chans)];
397 
398     const LineConv chan_convert = get_converter(dc.src_chans, dc.tgt_chans);
399 
400     void depalette_convert(in ubyte[] src_line, ubyte[] tgt_line) {
401         for (size_t s, d;  s < src_line.length;  s+=1, d+=3) {
402             size_t pidx = src_line[s] * 3;
403             if (dc.palette.length < pidx + 3)
404                 throw new ImageIOException("palette idx wrong");
405             depaletted_line[d .. d+3] = dc.palette[pidx .. pidx+3];
406         }
407         chan_convert(depaletted_line[0 .. src_line.length*3], tgt_line);
408     }
409 
410     void simple_convert(in ubyte[] src_line, ubyte[] tgt_line) {
411         chan_convert(src_line, tgt_line);
412     }
413 
414     const convert = dc.src_indexed ? &depalette_convert : &simple_convert;
415 
416     if (dc.ilace == InterlaceMethod.None) {
417         immutable size_t src_sl_size = cast(size_t) dc.w * filter_step;
418         auto cline = new ubyte[src_sl_size+1];   // current line + filter byte
419         auto pline = new ubyte[src_sl_size+1];   // previous line, inited to 0
420         debug(DebugPNG) assert(pline[0] == 0);
421 
422         size_t tgt_si = 0;    // scanline index in target buffer
423         foreach (j; 0 .. dc.h) {
424             uncompress_line(dc, len, metaready, cline);
425             ubyte filter_type = cline[0];
426 
427             recon(cline[1..$], pline[1..$], filter_type, filter_step);
428             convert(cline[1 .. $], result[tgt_si .. tgt_si + tgt_linesize]);
429             tgt_si += tgt_linesize;
430 
431             ubyte[] _swap = pline;
432             pline = cline;
433             cline = _swap;
434         }
435     } else {
436         // Adam7 interlacing
437 
438         immutable size_t[7] redw = [
439             (dc.w + 7) / 8,
440             (dc.w + 3) / 8,
441             (dc.w + 3) / 4,
442             (dc.w + 1) / 4,
443             (dc.w + 1) / 2,
444             (dc.w + 0) / 2,
445             (dc.w + 0) / 1,
446         ];
447         immutable size_t[7] redh = [
448             (dc.h + 7) / 8,
449             (dc.h + 7) / 8,
450             (dc.h + 3) / 8,
451             (dc.h + 3) / 4,
452             (dc.h + 1) / 4,
453             (dc.h + 1) / 2,
454             (dc.h + 0) / 2,
455         ];
456 
457         const size_t max_scanline_size = cast(size_t) (dc.w * filter_step);
458         const linebuf0 = new ubyte[max_scanline_size+1]; // +1 for filter type byte
459         const linebuf1 = new ubyte[max_scanline_size+1]; // +1 for filter type byte
460         auto redlinebuf = new ubyte[cast(size_t) dc.w * dc.tgt_chans];
461 
462         foreach (pass; 0 .. 7) {
463             const A7_Catapult tgt_px = a7_catapults[pass];   // target pixel
464             const size_t src_linesize = redw[pass] * filter_step;
465             auto cline = cast(ubyte[]) linebuf0[0 .. src_linesize+1];
466             auto pline = cast(ubyte[]) linebuf1[0 .. src_linesize+1];
467 
468             foreach (j; 0 .. redh[pass]) {
469                 uncompress_line(dc, len, metaready, cline);
470                 ubyte filter_type = cline[0];
471 
472                 recon(cline[1..$], pline[1..$], filter_type, filter_step);
473                 convert(cline[1 .. $], redlinebuf[0 .. redw[pass]*dc.tgt_chans]);
474 
475                 for (size_t i, redi; i < redw[pass]; ++i, redi += dc.tgt_chans) {
476                     size_t tgt = tgt_px(i, j, dc.w) * dc.tgt_chans;
477                     result[tgt .. tgt + dc.tgt_chans] =
478                         redlinebuf[redi .. redi + dc.tgt_chans];
479                 }
480 
481                 ubyte[] _swap = pline;
482                 pline = cline;
483                 cline = _swap;
484             }
485         }
486     }
487 
488     if (!metaready) {
489         dc.stream.readExact(dc.chunkmeta, 12);   // crc | len & type
490         ubyte[4] crc = dc.crc.finish;
491         reverse(crc[]);
492         if (crc != dc.chunkmeta[0..4])
493             throw new ImageIOException("corrupt chunk");
494     }
495     return result;
496 }
497 
498 alias A7_Catapult = size_t function(size_t redx, size_t redy, size_t dstw);
499 immutable A7_Catapult[7] a7_catapults = [
500     &a7_red1_to_dst,
501     &a7_red2_to_dst,
502     &a7_red3_to_dst,
503     &a7_red4_to_dst,
504     &a7_red5_to_dst,
505     &a7_red6_to_dst,
506     &a7_red7_to_dst,
507 ];
508 
509 pure nothrow {
510   size_t a7_red1_to_dst(size_t redx, size_t redy, size_t dstw) { return redy*8*dstw + redx*8;     }
511   size_t a7_red2_to_dst(size_t redx, size_t redy, size_t dstw) { return redy*8*dstw + redx*8+4;   }
512   size_t a7_red3_to_dst(size_t redx, size_t redy, size_t dstw) { return (redy*8+4)*dstw + redx*4; }
513   size_t a7_red4_to_dst(size_t redx, size_t redy, size_t dstw) { return redy*4*dstw + redx*4+2;   }
514   size_t a7_red5_to_dst(size_t redx, size_t redy, size_t dstw) { return (redy*4+2)*dstw + redx*2; }
515   size_t a7_red6_to_dst(size_t redx, size_t redy, size_t dstw) { return redy*2*dstw + redx*2+1;   }
516   size_t a7_red7_to_dst(size_t redx, size_t redy, size_t dstw) { return (redy*2+1)*dstw + redx;   }
517 }
518 
519 void uncompress_line(ref PNG_Decoder dc, ref int length, ref bool metaready, ubyte[] dst) {
520     size_t readysize = min(dst.length, dc.uc_buf.length);
521     dst[0 .. readysize] = dc.uc_buf[0 .. readysize];
522     dc.uc_buf = dc.uc_buf[readysize .. $];
523 
524     if (readysize == dst.length)
525         return;
526 
527     while (readysize != dst.length) {
528         // need new data for dc.uc_buf...
529         if (length <= 0) {  // IDAT is read -> read next chunks meta
530             dc.stream.readExact(dc.chunkmeta, 12);   // crc | len & type
531             ubyte[4] crc = dc.crc.finish;
532             reverse(crc[]);
533             if (crc != dc.chunkmeta[0..4])
534                 throw new ImageIOException("corrupt chunk");
535 
536             length = bigEndianToNative!int(dc.chunkmeta[4..8]);
537             if (dc.chunkmeta[8..12] != "IDAT") {
538                 // no new IDAT chunk so flush, this is the end of the IDAT stream
539                 metaready = true;
540                 dc.uc_buf = cast(ubyte[]) dc.uc.flush();
541                 size_t part2 = dst.length - readysize;
542                 if (dc.uc_buf.length < part2)
543                     throw new ImageIOException("not enough data");
544                 dst[readysize .. readysize+part2] = dc.uc_buf[0 .. part2];
545                 dc.uc_buf = dc.uc_buf[part2 .. $];
546                 return;
547             }
548             if (length <= 0)    // empty IDAT chunk
549                 throw new ImageIOException("not enough data");
550             dc.crc.put(dc.chunkmeta[8..12]);  // type
551         }
552 
553         size_t bytes = min(length, dc.read_buf.length);
554         dc.stream.readExact(dc.read_buf, bytes);
555         length -= bytes;
556         dc.crc.put(dc.read_buf[0..bytes]);
557 
558         if (bytes <= 0)
559             throw new ImageIOException("not enough data");
560 
561         dc.uc_buf = cast(ubyte[]) dc.uc.uncompress(dc.read_buf[0..bytes].dup);
562 
563         size_t part2 = min(dst.length - readysize, dc.uc_buf.length);
564         dst[readysize .. readysize+part2] = dc.uc_buf[0 .. part2];
565         dc.uc_buf = dc.uc_buf[part2 .. $];
566         readysize += part2;
567     }
568 }
569 
570 void recon(ubyte[] cline, in ubyte[] pline, ubyte ftype, int fstep) pure {
571     switch (ftype) with (PNG_FilterType) {
572         case None:
573             break;
574         case Sub:
575             foreach (k; fstep .. cline.length)
576                 cline[k] += cline[k-fstep];
577             break;
578         case Up:
579             foreach (k; 0 .. cline.length)
580                 cline[k] += pline[k];
581             break;
582         case Average:
583             foreach (k; 0 .. fstep)
584                 cline[k] += pline[k] / 2;
585             foreach (k; fstep .. cline.length)
586                 cline[k] += cast(ubyte)
587                     ((cast(uint) cline[k-fstep] + cast(uint) pline[k]) / 2);
588             break;
589         case Paeth:
590             foreach (i; 0 .. fstep)
591                 cline[i] += paeth(0, pline[i], 0);
592             foreach (i; fstep .. cline.length)
593                 cline[i] += paeth(cline[i-fstep], pline[i], pline[i-fstep]);
594             break;
595         default:
596             throw new ImageIOException("filter type not supported");
597     }
598 }
599 
600 ubyte paeth(ubyte a, ubyte b, ubyte c) pure nothrow {
601     int pc = cast(int) c;
602     int pa = cast(int) b - pc;
603     int pb = cast(int) a - pc;
604     pc = pa + pb;
605     if (pa < 0) pa = -pa;
606     if (pb < 0) pb = -pb;
607     if (pc < 0) pc = -pc;
608 
609     if (pa <= pb && pa <= pc) {
610         return a;
611     } else if (pb <= pc) {
612         return b;
613     }
614     return c;
615 }
616 
617 // ----------------------------------------------------------------------
618 // PNG encoder
619 
620 void write_png(Writer stream, long w, long h, in ubyte[] data, long tgt_chans = 0) {
621     if (w < 1 || h < 1 || int.max < w || int.max < h)
622         throw new ImageIOException("invalid dimensions");
623     uint src_chans = cast(uint) (data.length / w / h);
624     if (src_chans < 1 || 4 < src_chans || tgt_chans < 0 || 4 < tgt_chans)
625         throw new ImageIOException("invalid channel count");
626     if (src_chans * w * h != data.length)
627         throw new ImageIOException("mismatching dimensions and length");
628 
629     PNG_Encoder ec = {
630         stream    : stream,
631         w         : cast(size_t) w,
632         h         : cast(size_t) h,
633         src_chans : src_chans,
634         tgt_chans : tgt_chans ? cast(uint) tgt_chans : src_chans,
635         data      : data,
636     };
637 
638     write_png(ec);
639     stream.flush();
640 }
641 
642 struct PNG_Encoder {
643     Writer stream;
644     size_t w, h;
645     uint src_chans;
646     uint tgt_chans;
647     const(ubyte)[] data;
648 
649     CRC32 crc;
650 
651     uint writelen;      // how much written of current idat data
652     ubyte[] chunk_buf;  // len type data crc
653     ubyte[] data_buf;   // slice of chunk_buf, for just chunk data
654 }
655 
656 void write_png(ref PNG_Encoder ec) {
657     ubyte[33] hdr = void;
658     hdr[ 0 ..  8] = png_file_header;
659     hdr[ 8 .. 16] = [0x0, 0x0, 0x0, 0xd, 'I','H','D','R'];
660     hdr[16 .. 20] = nativeToBigEndian(cast(uint) ec.w);
661     hdr[20 .. 24] = nativeToBigEndian(cast(uint) ec.h);
662     hdr[24      ] = 8;  // bit depth
663     hdr[25      ] = color_type(ec.tgt_chans);
664     hdr[26 .. 29] = 0;  // compression, filter and interlace methods
665     ec.crc.start();
666     ec.crc.put(hdr[12 .. 29]);
667     ubyte[4] crc = ec.crc.finish();
668     reverse(crc[]);
669     hdr[29 .. 33] = crc;
670     ec.stream.rawWrite(hdr);
671 
672     write_IDATs(ec);
673 
674     static immutable ubyte[12] iend =
675         [0, 0, 0, 0, 'I','E','N','D', 0xae, 0x42, 0x60, 0x82];
676     ec.stream.rawWrite(iend);
677 }
678 
679 void write_IDATs(ref PNG_Encoder ec) {
680     static immutable ubyte[4] IDAT_type = ['I','D','A','T'];
681     immutable long max_idatlen = 4 * 4096;
682     ec.writelen = 0;
683     ec.chunk_buf = new ubyte[8 + max_idatlen + 4];
684     ec.data_buf = ec.chunk_buf[8 .. 8 + max_idatlen];
685     ec.chunk_buf[4 .. 8] = IDAT_type;
686 
687     immutable size_t linesize = ec.w * ec.tgt_chans + 1; // +1 for filter type
688     ubyte[] cline = new ubyte[linesize];
689     ubyte[] pline = new ubyte[linesize];
690     debug(DebugPNG) assert(pline[0] == 0);
691 
692     ubyte[] filtered_line = new ubyte[linesize];
693     ubyte[] filtered_image;
694 
695     const LineConv convert = get_converter(ec.src_chans, ec.tgt_chans);
696 
697     immutable size_t filter_step = ec.tgt_chans;   // step between pixels, in bytes
698     immutable size_t src_linesize = ec.w * ec.src_chans;
699 
700     size_t si = 0;
701     foreach (j; 0 .. ec.h) {
702         convert(ec.data[si .. si+src_linesize], cline[1..$]);
703         si += src_linesize;
704 
705         foreach (i; 1 .. filter_step+1)
706             filtered_line[i] = cast(ubyte) (cline[i] - paeth(0, pline[i], 0));
707         foreach (i; filter_step+1 .. cline.length)
708             filtered_line[i] = cast(ubyte)
709                 (cline[i] - paeth(cline[i-filter_step], pline[i], pline[i-filter_step]));
710 
711         filtered_line[0] = PNG_FilterType.Paeth;
712 
713         filtered_image ~= filtered_line;
714 
715         ubyte[] _swap = pline;
716         pline = cline;
717         cline = _swap;
718     }
719 
720     const (void)[] xx = compress(filtered_image, 6);
721 
722     ec.write_to_IDAT_stream(xx);
723     if (0 < ec.writelen)
724         ec.write_IDAT_chunk();
725 }
726 
727 void write_to_IDAT_stream(ref PNG_Encoder ec, in void[] _compressed) {
728     ubyte[] compressed = cast(ubyte[]) _compressed;
729     while (compressed.length) {
730         size_t space_left = ec.data_buf.length - ec.writelen;
731         size_t writenow_len = min(space_left, compressed.length);
732         ec.data_buf[ec.writelen .. ec.writelen + writenow_len] =
733             compressed[0 .. writenow_len];
734         ec.writelen += writenow_len;
735         compressed = compressed[writenow_len .. $];
736         if (ec.writelen == ec.data_buf.length)
737             ec.write_IDAT_chunk();
738     }
739 }
740 
741 // chunk: len type data crc, type is already in buf
742 void write_IDAT_chunk(ref PNG_Encoder ec) {
743     ec.chunk_buf[0 .. 4] = nativeToBigEndian!uint(ec.writelen);
744     ec.crc.put(ec.chunk_buf[4 .. 8 + ec.writelen]);   // crc of type and data
745     ubyte[4] crc = ec.crc.finish();
746     reverse(crc[]);
747     ec.chunk_buf[8 + ec.writelen .. 8 + ec.writelen + 4] = crc;
748     ec.stream.rawWrite(ec.chunk_buf[0 .. 8 + ec.writelen + 4]);
749     ec.writelen = 0;
750 }
751 
752 ///
753 public void read_png_info(in char[] filename, out long w, out long h, out long chans) {
754     scope reader = new FileReader(filename);
755     return read_png_info(reader, w, h, chans);
756 }
757 
758 void read_png_info(Reader stream, out long w, out long h, out long chans) {
759     PNG_Header hdr = read_png_header(stream);
760     w = hdr.width;
761     h = hdr.height;
762     chans = channels(cast(PNG_ColorType) hdr.color_type);
763 }
764 
765 // --------------------------------------------------------------------------------
766 // TGA
767 
768 ///
769 public struct TGA_Header {
770    ubyte id_length;
771    ubyte palette_type;
772    ubyte data_type;
773    ushort palette_start;
774    ushort palette_length;
775    ubyte palette_bits;
776    ushort x_origin;
777    ushort y_origin;
778    ushort width;
779    ushort height;
780    ubyte bits_pp;
781    ubyte flags;
782 }
783 
784 ///
785 public TGA_Header read_tga_header(in char[] filename) {
786     scope reader = new FileReader(filename);
787     return read_tga_header(reader);
788 }
789 
790 TGA_Header read_tga_header(Reader stream) {
791     ubyte[18] tmp = void;
792     stream.readExact(tmp, tmp.length);
793 
794     TGA_Header hdr = {
795         id_length       : tmp[0],
796         palette_type    : tmp[1],
797         data_type       : tmp[2],
798         palette_start   : littleEndianToNative!ushort(tmp[3..5]),
799         palette_length  : littleEndianToNative!ushort(tmp[5..7]),
800         palette_bits    : tmp[7],
801         x_origin        : littleEndianToNative!ushort(tmp[8..10]),
802         y_origin        : littleEndianToNative!ushort(tmp[10..12]),
803         width           : littleEndianToNative!ushort(tmp[12..14]),
804         height          : littleEndianToNative!ushort(tmp[14..16]),
805         bits_pp         : tmp[16],
806         flags           : tmp[17],
807     };
808 
809     if (hdr.width < 1 || hdr.height < 1 || hdr.palette_type > 1
810         || (hdr.palette_type == 0 && (hdr.palette_start
811                                      || hdr.palette_length
812                                      || hdr.palette_bits))
813         || (4 <= hdr.data_type && hdr.data_type <= 8) || 12 <= hdr.data_type)
814         throw new ImageIOException("corrupt TGA header");
815 
816     return hdr;
817 }
818 
819 ///
820 public IFImage read_tga(in char[] filename, long req_chans = 0) {
821     scope reader = new FileReader(filename);
822     return read_tga(reader, req_chans);
823 }
824 
825 ///
826 public IFImage read_tga_from_mem(in ubyte[] source, long req_chans = 0) {
827     scope reader = new MemReader(source);
828     return read_tga(reader, req_chans);
829 }
830 
831 IFImage read_tga(Reader stream, long req_chans = 0) {
832     if (req_chans < 0 || 4 < req_chans)
833         throw new ImageIOException("come on...");
834 
835     TGA_Header hdr = read_tga_header(stream);
836 
837     if (hdr.width < 1 || hdr.height < 1)
838         throw new ImageIOException("invalid dimensions");
839     if (hdr.flags & 0xc0)   // two bits
840         throw new ImageIOException("interlaced TGAs not supported");
841     if (hdr.flags & 0x10)
842         throw new ImageIOException("right-to-left TGAs not supported");
843     ubyte attr_bits_pp = (hdr.flags & 0xf);
844     if (! (attr_bits_pp == 0 || attr_bits_pp == 8)) // some set it 0 although data has 8
845         throw new ImageIOException("only 8-bit alpha/attribute(s) supported");
846     if (hdr.palette_type)
847         throw new ImageIOException("paletted TGAs not supported");
848 
849     bool rle = false;
850     switch (hdr.data_type) with (TGA_DataType) {
851         //case 1: ;   // paletted, uncompressed
852         case TrueColor:
853             if (! (hdr.bits_pp == 24 || hdr.bits_pp == 32))
854                 throw new ImageIOException("not supported");
855             break;
856         case Gray:
857             if (! (hdr.bits_pp == 8 || (hdr.bits_pp == 16 && attr_bits_pp == 8)))
858                 throw new ImageIOException("not supported");
859             break;
860         //case 9: ;   // paletted, RLE
861         case TrueColor_RLE:
862             if (! (hdr.bits_pp == 24 || hdr.bits_pp == 32))
863                 throw new ImageIOException("not supported");
864             rle = true;
865             break;
866         case Gray_RLE:
867             if (! (hdr.bits_pp == 8 || (hdr.bits_pp == 16 && attr_bits_pp == 8)))
868                 throw new ImageIOException("not supported");
869             rle = true;
870             break;
871         default: throw new ImageIOException("data type not supported");
872     }
873 
874     int src_chans = hdr.bits_pp / 8;
875 
876     if (hdr.id_length)
877         stream.seek(hdr.id_length, SEEK_CUR);
878 
879     TGA_Decoder dc = {
880         stream         : stream,
881         w              : hdr.width,
882         h              : hdr.height,
883         origin_at_top  : cast(bool) (hdr.flags & 0x20),
884         bytes_pp       : hdr.bits_pp / 8,
885         rle            : rle,
886         tgt_chans      : (req_chans == 0) ? src_chans : cast(int) req_chans,
887     };
888 
889     switch (dc.bytes_pp) {
890         case 1: dc.src_fmt = _ColFmt.Y; break;
891         case 2: dc.src_fmt = _ColFmt.YA; break;
892         case 3: dc.src_fmt = _ColFmt.BGR; break;
893         case 4: dc.src_fmt = _ColFmt.BGRA; break;
894         default: throw new ImageIOException("TGA: format not supported");
895     }
896 
897     IFImage result = {
898         w      : dc.w,
899         h      : dc.h,
900         c      : cast(ColFmt) dc.tgt_chans,
901         pixels : decode_tga(dc),
902     };
903     return result;
904 }
905 
906 ///
907 public void write_tga(in char[] file, long w, long h, in ubyte[] data, long tgt_chans = 0)
908 {
909     scope writer = new FileWriter(file);
910     write_tga(writer, w, h, data, tgt_chans);
911 }
912 
913 ///
914 public ubyte[] write_tga_to_mem(long w, long h, in ubyte[] data, long tgt_chans = 0) {
915     scope writer = new MemWriter();
916     write_tga(writer, w, h, data, tgt_chans);
917     return writer.result;
918 }
919 
920 void write_tga(Writer stream, long w, long h, in ubyte[] data, long tgt_chans = 0) {
921     if (w < 1 || h < 1 || ushort.max < w || ushort.max < h)
922         throw new ImageIOException("invalid dimensions");
923     ulong src_chans = data.length / w / h;
924     if (src_chans < 1 || 4 < src_chans || tgt_chans < 0 || 4 < tgt_chans)
925         throw new ImageIOException("invalid channel count");
926     if (src_chans * w * h != data.length)
927         throw new ImageIOException("mismatching dimensions and length");
928 
929     TGA_Encoder ec = {
930         stream    : stream,
931         w         : cast(ushort) w,
932         h         : cast(ushort) h,
933         src_chans : cast(int) src_chans,
934         tgt_chans : cast(int) ((tgt_chans) ? tgt_chans : src_chans),
935         rle       : true,
936         data      : data,
937     };
938 
939     write_tga(ec);
940     stream.flush();
941 }
942 
943 struct TGA_Decoder {
944     Reader stream;
945     size_t w, h;
946     bool origin_at_top;    // src
947     uint bytes_pp;
948     bool rle;   // run length compressed
949     _ColFmt src_fmt;
950     uint tgt_chans;
951 }
952 
953 ubyte[] decode_tga(ref TGA_Decoder dc) {
954     auto result = new ubyte[dc.w * dc.h * dc.tgt_chans];
955 
956     immutable size_t tgt_linesize = dc.w * dc.tgt_chans;
957     immutable size_t src_linesize = dc.w * dc.bytes_pp;
958     auto src_line = new ubyte[src_linesize];
959 
960     immutable ptrdiff_t tgt_stride = (dc.origin_at_top) ? tgt_linesize : -tgt_linesize;
961     ptrdiff_t ti                   = (dc.origin_at_top) ? 0 : (dc.h-1) * tgt_linesize;
962 
963     const LineConv convert = get_converter(dc.src_fmt, dc.tgt_chans);
964 
965     if (!dc.rle) {
966         foreach (_j; 0 .. dc.h) {
967             dc.stream.readExact(src_line, src_linesize);
968             convert(src_line, result[ti .. ti + tgt_linesize]);
969             ti += tgt_stride;
970         }
971         return result;
972     }
973 
974     // ----- RLE  -----
975 
976     auto rbuf = new ubyte[src_linesize];
977     size_t plen = 0;      // packet length
978     bool its_rle = false;
979 
980     foreach (_j; 0 .. dc.h) {
981         // fill src_line with uncompressed data (this works like a stream)
982         size_t wanted = src_linesize;
983         while (wanted) {
984             if (plen == 0) {
985                 dc.stream.readExact(rbuf, 1);
986                 its_rle = cast(bool) (rbuf[0] & 0x80);
987                 plen = ((rbuf[0] & 0x7f) + 1) * dc.bytes_pp; // length in bytes
988             }
989             const size_t gotten = src_linesize - wanted;
990             const size_t copysize = min(plen, wanted);
991             if (its_rle) {
992                 dc.stream.readExact(rbuf, dc.bytes_pp);
993                 for (size_t p = gotten; p < gotten+copysize; p += dc.bytes_pp)
994                     src_line[p .. p+dc.bytes_pp] = rbuf[0 .. dc.bytes_pp];
995             } else {    // it's raw
996                 auto slice = src_line[gotten .. gotten+copysize];
997                 dc.stream.readExact(slice, copysize);
998             }
999             wanted -= copysize;
1000             plen -= copysize;
1001         }
1002 
1003         convert(src_line, result[ti .. ti + tgt_linesize]);
1004         ti += tgt_stride;
1005     }
1006 
1007     return result;
1008 }
1009 
1010 // ----------------------------------------------------------------------
1011 // TGA encoder
1012 
1013 immutable ubyte[18] tga_footer_sig =
1014     ['T','R','U','E','V','I','S','I','O','N','-','X','F','I','L','E','.', 0];
1015 
1016 struct TGA_Encoder {
1017     Writer stream;
1018     ushort w, h;
1019     int src_chans;
1020     int tgt_chans;
1021     bool rle;   // run length compression
1022     const(ubyte)[] data;
1023 }
1024 
1025 void write_tga(ref TGA_Encoder ec) {
1026     ubyte data_type;
1027     bool has_alpha = false;
1028     switch (ec.tgt_chans) with (TGA_DataType) {
1029         case 1: data_type = ec.rle ? Gray_RLE : Gray;                             break;
1030         case 2: data_type = ec.rle ? Gray_RLE : Gray;           has_alpha = true; break;
1031         case 3: data_type = ec.rle ? TrueColor_RLE : TrueColor;                   break;
1032         case 4: data_type = ec.rle ? TrueColor_RLE : TrueColor; has_alpha = true; break;
1033         default: throw new ImageIOException("internal error");
1034     }
1035 
1036     ubyte[18] hdr = void;
1037     hdr[0] = 0;         // id length
1038     hdr[1] = 0;         // palette type
1039     hdr[2] = data_type;
1040     hdr[3..8] = 0;         // palette start (2), len (2), bits per palette entry (1)
1041     hdr[8..12] = 0;     // x origin (2), y origin (2)
1042     hdr[12..14] = nativeToLittleEndian(ec.w);
1043     hdr[14..16] = nativeToLittleEndian(ec.h);
1044     hdr[16] = cast(ubyte) (ec.tgt_chans * 8);     // bits per pixel
1045     hdr[17] = (has_alpha) ? 0x8 : 0x0;     // flags: attr_bits_pp = 8
1046     ec.stream.rawWrite(hdr);
1047 
1048     write_image_data(ec);
1049 
1050     ubyte[26] ftr = void;
1051     ftr[0..4] = 0;   // extension area offset
1052     ftr[4..8] = 0;   // developer directory offset
1053     ftr[8..26] = tga_footer_sig;
1054     ec.stream.rawWrite(ftr);
1055 }
1056 
1057 void write_image_data(ref TGA_Encoder ec) {
1058     _ColFmt tgt_fmt;
1059     switch (ec.tgt_chans) {
1060         case 1: tgt_fmt = _ColFmt.Y; break;
1061         case 2: tgt_fmt = _ColFmt.YA; break;
1062         case 3: tgt_fmt = _ColFmt.BGR; break;
1063         case 4: tgt_fmt = _ColFmt.BGRA; break;
1064         default: throw new ImageIOException("internal error");
1065     }
1066 
1067     const LineConv convert = get_converter(ec.src_chans, tgt_fmt);
1068 
1069     immutable size_t src_linesize = ec.w * ec.src_chans;
1070     immutable size_t tgt_linesize = ec.w * ec.tgt_chans;
1071     auto tgt_line = new ubyte[tgt_linesize];
1072 
1073     ptrdiff_t si = (ec.h-1) * src_linesize;     // origin at bottom
1074 
1075     if (!ec.rle) {
1076         foreach (_; 0 .. ec.h) {
1077             convert(ec.data[si .. si + src_linesize], tgt_line);
1078             ec.stream.rawWrite(tgt_line);
1079             si -= src_linesize; // origin at bottom
1080         }
1081         return;
1082     }
1083 
1084     // ----- RLE  -----
1085 
1086     immutable bytes_pp = ec.tgt_chans;
1087     immutable size_t max_packets_per_line = (tgt_linesize+127) / 128;
1088     auto tgt_cmp = new ubyte[tgt_linesize + max_packets_per_line];  // compressed line
1089     foreach (_; 0 .. ec.h) {
1090         convert(ec.data[si .. si + src_linesize], tgt_line);
1091         ubyte[] compressed_line = rle_compress(tgt_line, tgt_cmp, ec.w, bytes_pp);
1092         ec.stream.rawWrite(compressed_line);
1093         si -= src_linesize; // origin at bottom
1094     }
1095 }
1096 
1097 ubyte[] rle_compress(in ubyte[] line, ubyte[] tgt_cmp, in size_t w, in int bytes_pp) pure {
1098     immutable int rle_limit = (1 < bytes_pp) ? 2 : 3;  // run len worth an RLE packet
1099     size_t runlen = 0;
1100     size_t rawlen = 0;
1101     size_t raw_i = 0; // start of raw packet data in line
1102     size_t cmp_i = 0;
1103     size_t pixels_left = w;
1104     const (ubyte)[] px;
1105     for (size_t i = bytes_pp; pixels_left; i += bytes_pp) {
1106         runlen = 1;
1107         px = line[i-bytes_pp .. i];
1108         while (i < line.length && line[i .. i+bytes_pp] == px[0..$] && runlen < 128) {
1109             ++runlen;
1110             i += bytes_pp;
1111         }
1112         pixels_left -= runlen;
1113 
1114         if (runlen < rle_limit) {
1115             // data goes to raw packet
1116             rawlen += runlen;
1117             if (128 <= rawlen) {     // full packet, need to store it
1118                 size_t copysize = 128 * bytes_pp;
1119                 tgt_cmp[cmp_i++] = 0x7f; // raw packet header
1120                 tgt_cmp[cmp_i .. cmp_i+copysize] = line[raw_i .. raw_i+copysize];
1121                 cmp_i += copysize;
1122                 raw_i += copysize;
1123                 rawlen -= 128;
1124             }
1125         } else {
1126             // RLE packet is worth it
1127 
1128             // store raw packet first, if any
1129             if (rawlen) {
1130                 assert(rawlen < 128);
1131                 size_t copysize = rawlen * bytes_pp;
1132                 tgt_cmp[cmp_i++] = cast(ubyte) (rawlen-1); // raw packet header
1133                 tgt_cmp[cmp_i .. cmp_i+copysize] = line[raw_i .. raw_i+copysize];
1134                 cmp_i += copysize;
1135                 rawlen = 0;
1136             }
1137 
1138             // store RLE packet
1139             tgt_cmp[cmp_i++] = cast(ubyte) (0x80 | (runlen-1)); // packet header
1140             tgt_cmp[cmp_i .. cmp_i+bytes_pp] = px[0..$];       // packet data
1141             cmp_i += bytes_pp;
1142             raw_i = i;
1143         }
1144     }   // for
1145 
1146     if (rawlen) {   // last packet of the line
1147         size_t copysize = rawlen * bytes_pp;
1148         tgt_cmp[cmp_i++] = cast(ubyte) (rawlen-1); // raw packet header
1149         tgt_cmp[cmp_i .. cmp_i+copysize] = line[raw_i .. raw_i+copysize];
1150         cmp_i += copysize;
1151     }
1152     return tgt_cmp[0 .. cmp_i];
1153 }
1154 
1155 enum TGA_DataType : ubyte {
1156     Idx           = 1,
1157     TrueColor     = 2,
1158     Gray          = 3,
1159     Idx_RLE       = 9,
1160     TrueColor_RLE = 10,
1161     Gray_RLE      = 11,
1162 }
1163 
1164 ///
1165 public void read_tga_info(in char[] filename, out long w, out long h, out long chans) {
1166     scope reader = new FileReader(filename);
1167     return read_tga_info(reader, w, h, chans);
1168 }
1169 
1170 void read_tga_info(Reader stream, out long w, out long h, out long chans) {
1171     TGA_Header hdr = read_tga_header(stream);
1172     w = hdr.width;
1173     h = hdr.height;
1174 
1175     // TGA is awkward...
1176     auto dt = hdr.data_type;
1177     if ((dt == TGA_DataType.TrueColor     || dt == TGA_DataType.Gray ||
1178          dt == TGA_DataType.TrueColor_RLE || dt == TGA_DataType.Gray_RLE)
1179          && (hdr.bits_pp % 8) == 0)
1180     {
1181         chans = hdr.bits_pp / 8;
1182         return;
1183     } else if (dt == TGA_DataType.Idx || dt == TGA_DataType.Idx_RLE) {
1184         switch (hdr.palette_bits) {
1185             case 15: chans = 3; return;
1186             case 16: chans = 3; return; // one bit could be for some "interrupt control"
1187             case 24: chans = 3; return;
1188             case 32: chans = 4; return;
1189             default:
1190         }
1191     }
1192     chans = 0;  // unknown
1193 }
1194 
1195 // --------------------------------------------------------------------------------
1196 // BMP
1197 
1198 ///
1199 public IFImage read_bmp(in char[] filename, long req_chans = 0) {
1200     scope reader = new FileReader(filename);
1201     return read_bmp(reader, req_chans);
1202 }
1203 
1204 ///
1205 public IFImage read_bmp_from_mem(in ubyte[] source, long req_chans = 0) {
1206     scope reader = new MemReader(source);
1207     return read_bmp(reader, req_chans);
1208 }
1209 
1210 ///
1211 public BMP_Header read_bmp_header(in char[] filename) {
1212     scope reader = new FileReader(filename);
1213     return read_bmp_header(reader);
1214 }
1215 
1216 ///
1217 public struct BMP_Header {
1218     size_t file_size;
1219     size_t pixel_data_offset;
1220 
1221     size_t dib_size;
1222     ptrdiff_t width;
1223     ptrdiff_t height;
1224     ushort planes;
1225     uint dib_version;
1226     DibV1 dib_v1;
1227     DibV2 dib_v2;
1228     uint dib_v3_alpha_mask;
1229     DibV4 dib_v4;
1230     DibV5 dib_v5;
1231 }
1232 
1233 /// Part of BMP header, not always present.
1234 public struct DibV1 {
1235     size_t bits_pp;
1236     uint compression;
1237     size_t idat_size;
1238     size_t pixels_per_meter_x;
1239     size_t pixels_per_meter_y;
1240     size_t palette_length;
1241     uint important_color_count;
1242 }
1243 
1244 /// Part of BMP header, not always present.
1245 public struct DibV2 {
1246     uint red_mask;
1247     uint green_mask;
1248     uint blue_mask;
1249 }
1250 
1251 /// Part of BMP header, not always present.
1252 public struct DibV4 {
1253     uint color_space_type;
1254     ubyte[36] color_space_endpoints;
1255     uint gamma_red;
1256     uint gamma_green;
1257     uint gamma_blue;
1258 }
1259 
1260 /// Part of BMP header, not always present.
1261 public struct DibV5 {
1262     uint icc_profile_data;
1263     uint icc_profile_size;
1264 }
1265 
1266 BMP_Header read_bmp_header(Reader stream) {
1267     ubyte[18] tmp = void;  // bmp header + size of dib header
1268     stream.readExact(tmp[], tmp.length);
1269 
1270     if (tmp[0..2] != ['B', 'M'])
1271         throw new ImageIOException("corrupt header");
1272 
1273     size_t dib_size = littleEndianToNative!uint(tmp[14..18]);
1274     uint dib_version;
1275     switch (dib_size) {
1276         case 12: dib_version = 0; break;
1277         case 40: dib_version = 1; break;
1278         case 52: dib_version = 2; break;
1279         case 56: dib_version = 3; break;
1280         case 108: dib_version = 4; break;
1281         case 124: dib_version = 5; break;
1282         default: throw new ImageIOException("unsupported dib version");
1283     }
1284     auto dib_header = new ubyte[dib_size-4];
1285     stream.readExact(dib_header[], dib_header.length);
1286 
1287     DibV1 dib_v1;
1288     DibV2 dib_v2;
1289     uint dib_v3_alpha_mask;
1290     DibV4 dib_v4;
1291     DibV5 dib_v5;
1292 
1293     if (1 <= dib_version) {
1294         DibV1 v1 = {
1295             bits_pp               : cast(size_t) littleEndianToNative!ushort(dib_header[10..12]),
1296             compression           : littleEndianToNative!uint(dib_header[12..16]),
1297             idat_size             : cast(size_t) littleEndianToNative!uint(dib_header[16..20]),
1298             pixels_per_meter_x    : cast(size_t) littleEndianToNative!uint(dib_header[20..24]),
1299             pixels_per_meter_y    : cast(size_t) littleEndianToNative!uint(dib_header[24..28]),
1300             palette_length        : cast(size_t) littleEndianToNative!uint(dib_header[28..32]),
1301             important_color_count : littleEndianToNative!uint(dib_header[32..36]),
1302         };
1303         dib_v1 = v1;
1304     }
1305 
1306     if (2 <= dib_version) {
1307         DibV2 v2 = {
1308             red_mask              : littleEndianToNative!uint(dib_header[36..40]),
1309             green_mask            : littleEndianToNative!uint(dib_header[40..44]),
1310             blue_mask             : littleEndianToNative!uint(dib_header[44..48]),
1311         };
1312         dib_v2 = v2;
1313     }
1314 
1315     if (3 <= dib_version) {
1316         dib_v3_alpha_mask = littleEndianToNative!uint(dib_header[48..52]);
1317     }
1318 
1319     if (4 <= dib_version) {
1320         DibV4 v4 = {
1321             color_space_type      : littleEndianToNative!uint(dib_header[52..56]),
1322             color_space_endpoints : dib_header[56..92],
1323             gamma_red             : littleEndianToNative!uint(dib_header[92..96]),
1324             gamma_green           : littleEndianToNative!uint(dib_header[96..100]),
1325             gamma_blue            : littleEndianToNative!uint(dib_header[100..104]),
1326         };
1327         dib_v4 = v4;
1328     }
1329 
1330     if (5 <= dib_version) {
1331         DibV5 v5 = {
1332             icc_profile_data      : littleEndianToNative!uint(dib_header[108..112]),
1333             icc_profile_size      : littleEndianToNative!uint(dib_header[112..116]),
1334         };
1335         dib_v5 = v5;
1336     }
1337 
1338     BMP_Header header = {
1339         file_size             : cast(size_t) littleEndianToNative!uint(tmp[2..6]),
1340         pixel_data_offset     : cast(size_t) littleEndianToNative!uint(tmp[10..14]),
1341         width                 : littleEndianToNative!int(dib_header[0..4]),
1342         height                : littleEndianToNative!int(dib_header[4..8]),
1343         planes                : littleEndianToNative!ushort(dib_header[8..10]),
1344         dib_version           : dib_version,
1345         dib_v1                : dib_v1,
1346         dib_v2                : dib_v2,
1347         dib_v3_alpha_mask     : dib_v3_alpha_mask,
1348         dib_v4                : dib_v4,
1349         dib_v5                : dib_v5,
1350     };
1351     return header;
1352 }
1353 
1354 enum CMP_RGB  = 0;
1355 enum CMP_BITS = 3;
1356 
1357 IFImage read_bmp(Reader stream, long req_chans = 0) {
1358     if (req_chans < 0 || 4 < req_chans)
1359         throw new ImageIOException("unknown color format");
1360 
1361     BMP_Header hdr = read_bmp_header(stream);
1362 
1363     if (hdr.width < 1 || hdr.height == 0) { throw new ImageIOException("invalid dimensions"); }
1364     if (hdr.pixel_data_offset < (14 + hdr.dib_size)
1365     || hdr.pixel_data_offset > 0xffffff /* arbitrary */) {
1366         throw new ImageIOException("invalid pixel data offset");
1367     }
1368     if (hdr.planes != 1) { throw new ImageIOException("not supported"); }
1369 
1370     auto bytes_pp       = 1;
1371     bool paletted       = true;
1372     size_t palette_length = 256;
1373     bool rgb_masked     = false;
1374     auto pe_bytes_pp    = 3;
1375 
1376     if (1 <= hdr.dib_version) {
1377         if (256 < hdr.dib_v1.palette_length)
1378             throw new ImageIOException("ivnalid palette length");
1379         if (hdr.dib_v1.bits_pp <= 8 &&
1380            (hdr.dib_v1.palette_length == 0 || hdr.dib_v1.compression != CMP_RGB))
1381              throw new ImageIOException("invalid format");
1382         if (hdr.dib_v1.compression != CMP_RGB && hdr.dib_v1.compression != CMP_BITS)
1383              throw new ImageIOException("unsupported compression");
1384 
1385         switch (hdr.dib_v1.bits_pp) {
1386             case 8  : bytes_pp = 1; paletted = true; break;
1387             case 24 : bytes_pp = 3; paletted = false; break;
1388             case 32 : bytes_pp = 4; paletted = false; break;
1389             default: throw new ImageIOException("not supported");
1390         }
1391 
1392         palette_length = hdr.dib_v1.palette_length;
1393         rgb_masked = hdr.dib_v1.compression == CMP_BITS;
1394         pe_bytes_pp = 4;
1395     }
1396 
1397     size_t mask_to_idx(uint mask) {
1398         switch (mask) {
1399             case 0xff00_0000: return 3;
1400             case 0x00ff_0000: return 2;
1401             case 0x0000_ff00: return 1;
1402             case 0x0000_00ff: return 0;
1403             default: throw new ImageIOException("unsupported mask");
1404         }
1405     }
1406 
1407     size_t redi = 2;
1408     size_t greeni = 1;
1409     size_t bluei = 0;
1410     if (rgb_masked) {
1411         if (hdr.dib_version < 2)
1412             throw new ImageIOException("invalid format");
1413         redi = mask_to_idx(hdr.dib_v2.red_mask);
1414         greeni = mask_to_idx(hdr.dib_v2.green_mask);
1415         bluei = mask_to_idx(hdr.dib_v2.blue_mask);
1416     }
1417 
1418     bool alpha_masked = false;
1419     size_t alphai = 0;
1420     if (3 <= hdr.dib_version && hdr.dib_v3_alpha_mask != 0) {
1421         alpha_masked = true;
1422         alphai = mask_to_idx(hdr.dib_v3_alpha_mask);
1423     }
1424 
1425     ubyte[] depaletted_line = null;
1426     ubyte[] palette = null;
1427     if (paletted) {
1428         depaletted_line = new ubyte[hdr.width * pe_bytes_pp];
1429         palette = new ubyte[palette_length * pe_bytes_pp];
1430         stream.readExact(palette[], palette.length);
1431     }
1432 
1433     stream.seek(hdr.pixel_data_offset, SEEK_SET);
1434 
1435     immutable tgt_chans = (0 < req_chans) ? req_chans
1436                                           : (alpha_masked) ? _ColFmt.RGBA
1437                                                            : _ColFmt.RGB;
1438 
1439     const src_fmt = (!paletted || pe_bytes_pp == 4) ? _ColFmt.BGRA : _ColFmt.BGR;
1440     const LineConv convert = get_converter(src_fmt, tgt_chans);
1441 
1442     immutable size_t src_linesize = hdr.width * bytes_pp;  // without padding
1443     immutable size_t src_pad = 3 - ((src_linesize-1) % 4);
1444     immutable ptrdiff_t tgt_linesize = (hdr.width * cast(int) tgt_chans);
1445 
1446     immutable ptrdiff_t tgt_stride = (hdr.height < 0) ? tgt_linesize : -tgt_linesize;
1447     ptrdiff_t ti                   = (hdr.height < 0) ? 0 : (hdr.height-1) * tgt_linesize;
1448 
1449     auto src_line_buf  = new ubyte[src_linesize + src_pad];
1450     auto bgra_line_buf = (paletted) ? null : new ubyte[hdr.width * 4];
1451     auto result        = new ubyte[hdr.width * abs(hdr.height) * cast(int) tgt_chans];
1452 
1453     foreach (_; 0 .. abs(hdr.height)) {
1454         stream.readExact(src_line_buf[], src_line_buf.length);
1455         auto src_line = src_line_buf[0..src_linesize];
1456 
1457         if (paletted) {
1458             size_t ps = pe_bytes_pp;
1459             size_t di = 0;
1460             foreach (idx; src_line[]) {
1461                 if (idx > palette_length)
1462                     throw new ImageIOException("invalid palette index");
1463                 size_t i = idx * ps;
1464                 depaletted_line[di .. di+ps] = palette[i .. i+ps];
1465                 if (ps == 4) {
1466                     depaletted_line[di+3] = 255;
1467                 }
1468                 di += ps;
1469             }
1470             convert(depaletted_line[], result[ti .. (ti+tgt_linesize)]);
1471         } else {
1472             for (size_t si, di;   si < src_line.length;   si+=bytes_pp, di+=4) {
1473                 bgra_line_buf[di + 0] = src_line[si + bluei];
1474                 bgra_line_buf[di + 1] = src_line[si + greeni];
1475                 bgra_line_buf[di + 2] = src_line[si + redi];
1476                 bgra_line_buf[di + 3] = (alpha_masked) ? src_line[si + alphai]
1477                                                        : 255;
1478             }
1479             convert(bgra_line_buf[], result[ti .. (ti+tgt_linesize)]);
1480         }
1481 
1482         ti += tgt_stride;
1483     }
1484 
1485     IFImage ret = {
1486         w      : hdr.width,
1487         h      : abs(hdr.height),
1488         c      : cast(ColFmt) tgt_chans,
1489         pixels : result,
1490     };
1491     return ret;
1492 }
1493 
1494 ///
1495 public void read_bmp_info(in char[] filename, out long w, out long h, out long chans) {
1496     scope reader = new FileReader(filename);
1497     return read_bmp_info(reader, w, h, chans);
1498 }
1499 
1500 void read_bmp_info(Reader stream, out long w, out long h, out long chans) {
1501     BMP_Header hdr = read_bmp_header(stream);
1502     w = abs(hdr.width);
1503     h = abs(hdr.height);
1504     chans = (hdr.dib_version >= 3 && hdr.dib_v3_alpha_mask != 0) ? ColFmt.RGBA
1505                                                                  : ColFmt.RGB;
1506 }
1507 
1508 // --------------------------------------------------------------------------------
1509 // Baseline JPEG decoder
1510 
1511 import std.math;    // floor, ceil
1512 import core.stdc.stdlib : alloca;
1513 
1514 //debug = DebugJPEG;
1515 
1516 ///
1517 public IFImage read_jpeg(in char[] filename, long req_chans = 0) {
1518     scope reader = new FileReader(filename);
1519     return read_jpeg(reader, req_chans);
1520 }
1521 
1522 ///
1523 public IFImage read_jpeg_from_mem(in ubyte[] source, long req_chans = 0) {
1524     scope reader = new MemReader(source);
1525     return read_jpeg(reader, req_chans);
1526 }
1527 
1528 IFImage read_jpeg(Reader stream, long req_chans = 0) {
1529     if (req_chans < 0 || 4 < req_chans)
1530         throw new ImageIOException("come on...");
1531 
1532     // SOI
1533     ubyte[2] tmp = void;
1534     stream.readExact(tmp, tmp.length);
1535     if (tmp[0..2] != [0xff, 0xd8])
1536         throw new ImageIOException("not JPEG");
1537 
1538     JPEG_Decoder dc = { stream: stream };
1539 
1540     read_markers(dc);   // reads until first scan header or eoi
1541     if (dc.eoi_reached)
1542         throw new ImageIOException("no image data");
1543 
1544     dc.tgt_chans = (req_chans == 0) ? dc.num_comps : cast(int) req_chans;
1545 
1546     IFImage result = {
1547         w      : dc.width,
1548         h      : dc.height,
1549         c      : cast(ColFmt) dc.tgt_chans,
1550         pixels : decode_jpeg(dc),
1551     };
1552     return result;
1553 }
1554 
1555 struct JPEG_Decoder {
1556     Reader stream;
1557 
1558     bool has_frame_header = false;
1559     bool eoi_reached = false;
1560 
1561     ubyte[64][4] qtables;
1562     HuffTab[2] ac_tables;
1563     HuffTab[2] dc_tables;
1564 
1565     ubyte cb;  // current byte (next bit always at MSB)
1566     int bits_left;   // num of unused bits in cb
1567 
1568     bool correct_comp_ids;
1569     Component[3] comps;
1570     ubyte num_comps;
1571     int[3] index_for;   // index_for[0] is index of comp that comes first in stream
1572     int tgt_chans;
1573 
1574     size_t width, height;
1575 
1576     int hmax, vmax;
1577 
1578     ushort restart_interval;    // number of MCUs in restart interval
1579 
1580     // image component
1581     struct Component {
1582         ubyte id;
1583         ubyte sfx, sfy;   // sampling factors, aka. h and v
1584         long x, y;       // total num of samples, without fill samples
1585         ubyte qtable;
1586         ubyte ac_table;
1587         ubyte dc_table;
1588         int pred;                // dc prediction
1589         ubyte[] data;   // reconstructed samples
1590     }
1591 
1592     int num_mcu_x;
1593     int num_mcu_y;
1594 }
1595 
1596 struct HuffTab {
1597     // TODO where in the spec does it say 256 values/codes at most?
1598     ubyte[256] values;
1599     ubyte[257] sizes;
1600     short[16] mincode, maxcode;
1601     short[16] valptr;
1602 }
1603 
1604 enum Marker : ubyte {
1605     SOI = 0xd8,     // start of image
1606     SOF0 = 0xc0,    // start of frame / baseline DCT
1607     //SOF1 = 0xc1,    // start of frame / extended seq.
1608     //SOF2 = 0xc2,    // start of frame / progressive DCT
1609     SOF3 = 0xc3,    // start of frame / lossless
1610     SOF9 = 0xc9,    // start of frame / extended seq., arithmetic
1611     SOF11 = 0xcb,    // start of frame / lossless, arithmetic
1612     DHT = 0xc4,     // define huffman tables
1613     DQT = 0xdb,     // define quantization tables
1614     DRI = 0xdd,     // define restart interval
1615     SOS = 0xda,     // start of scan
1616     DNL = 0xdc,     // define number of lines
1617     RST0 = 0xd0,    // restart entropy coded data
1618     // ...
1619     RST7 = 0xd7,    // restart entropy coded data
1620     APP0 = 0xe0,    // application 0 segment
1621     // ...
1622     APPf = 0xef,    // application f segment
1623     //DAC = 0xcc,     // define arithmetic conditioning table
1624     COM = 0xfe,     // comment
1625     EOI = 0xd9,     // end of image
1626 }
1627 
1628 void read_markers(ref JPEG_Decoder dc) {
1629     bool has_next_scan_header = false;
1630     while (!has_next_scan_header && !dc.eoi_reached) {
1631         ubyte[2] marker;
1632         dc.stream.readExact(marker, 2);
1633 
1634         if (marker[0] != 0xff)
1635             throw new ImageIOException("no marker");
1636         while (marker[1] == 0xff)
1637             dc.stream.readExact(marker[1..$], 1);
1638 
1639         debug(DebugJPEG) writefln("marker: %s (%1$x)\t", cast(Marker) marker[1]);
1640         switch (marker[1]) with (Marker) {
1641             case DHT: dc.read_huffman_tables(); break;
1642             case DQT: dc.read_quantization_tables(); break;
1643             case SOF0:
1644                 if (dc.has_frame_header)
1645                     throw new ImageIOException("extra frame header");
1646                 debug(DebugJPEG) writeln();
1647                 dc.read_frame_header();
1648                 dc.has_frame_header = true;
1649                 break;
1650             case SOS:
1651                 if (!dc.has_frame_header)
1652                     throw new ImageIOException("no frame header");
1653                 dc.read_scan_header();
1654                 has_next_scan_header = true;
1655                 break;
1656             case DRI: dc.read_restart_interval(); break;
1657             case EOI: dc.eoi_reached = true; break;
1658             case APP0: .. case APPf: goto case;
1659             case COM:
1660                 debug(DebugJPEG) writefln("-> skipping segment");
1661                 ubyte[2] lenbuf = void;
1662                 dc.stream.readExact(lenbuf, lenbuf.length);
1663                 int len = bigEndianToNative!ushort(lenbuf) - 2;
1664                 dc.stream.seek(len, SEEK_CUR);
1665                 break;
1666             default: throw new ImageIOException("invalid / unsupported marker");
1667         }
1668     }
1669 }
1670 
1671 // DHT -- define huffman tables
1672 void read_huffman_tables(ref JPEG_Decoder dc) {
1673     ubyte[19] tmp = void;
1674     dc.stream.readExact(tmp, 2);
1675     int len = bigEndianToNative!ushort(tmp[0..2]);
1676     len -= 2;
1677 
1678     while (0 < len) {
1679         dc.stream.readExact(tmp, 17);   // info byte & the BITS
1680         ubyte table_slot = tmp[0] & 0xf; // must be 0 or 1 for baseline
1681         ubyte table_class = tmp[0] >> 4;  // 0 = dc table, 1 = ac table
1682         if (1 < table_slot || 1 < table_class)
1683             throw new ImageIOException("invalid / not supported");
1684 
1685         // compute total number of huffman codes
1686         int mt = 0;
1687         foreach (i; 1..17)
1688             mt += tmp[i];
1689         if (256 < mt)   // TODO where in the spec?
1690             throw new ImageIOException("invalid / not supported");
1691 
1692         if (table_class == 0) {
1693             dc.stream.readExact(dc.dc_tables[table_slot].values, mt);
1694             derive_table(dc.dc_tables[table_slot], tmp[1..17]);
1695         } else {
1696             dc.stream.readExact(dc.ac_tables[table_slot].values, mt);
1697             derive_table(dc.ac_tables[table_slot], tmp[1..17]);
1698         }
1699 
1700         len -= 17 + mt;
1701     }
1702 }
1703 
1704 // num_values is the BITS
1705 void derive_table(ref HuffTab table, in ref ubyte[16] num_values) {
1706     short[256] codes;
1707 
1708     int k = 0;
1709     foreach (i; 0..16) {
1710         foreach (j; 0..num_values[i]) {
1711             table.sizes[k] = cast(ubyte) (i + 1);
1712             ++k;
1713         }
1714     }
1715     table.sizes[k] = 0;
1716 
1717     k = 0;
1718     short code = 0;
1719     ubyte si = table.sizes[k];
1720     while (true) {
1721         do {
1722             codes[k] = code;
1723             ++code;
1724             ++k;
1725         } while (si == table.sizes[k]);
1726 
1727         if (table.sizes[k] == 0)
1728             break;
1729 
1730         debug(DebugJPEG) assert(si < table.sizes[k]);
1731         do {
1732             code <<= 1;
1733             ++si;
1734         } while (si != table.sizes[k]);
1735     }
1736 
1737     derive_mincode_maxcode_valptr(
1738         table.mincode, table.maxcode, table.valptr,
1739         codes, num_values
1740     );
1741 }
1742 
1743 // F.15
1744 void derive_mincode_maxcode_valptr(
1745         ref short[16] mincode, ref short[16] maxcode, ref short[16] valptr,
1746         in ref short[256] codes, in ref ubyte[16] num_values) pure
1747 {
1748     mincode[] = -1;
1749     maxcode[] = -1;
1750     valptr[] = -1;
1751 
1752     int j = 0;
1753     foreach (i; 0..16) {
1754         if (num_values[i] != 0) {
1755             valptr[i] = cast(short) j;
1756             mincode[i] = codes[j];
1757             j += num_values[i] - 1;
1758             maxcode[i] = codes[j];
1759             j += 1;
1760         }
1761     }
1762 }
1763 
1764 // DQT -- define quantization tables
1765 void read_quantization_tables(ref JPEG_Decoder dc) {
1766     ubyte[2] tmp = void;
1767     dc.stream.readExact(tmp, 2);
1768     int len = bigEndianToNative!ushort(tmp[0..2]);
1769     if (len % 65 != 2)
1770         throw new ImageIOException("invalid / not supported");
1771     len -= 2;
1772     while (0 < len) {
1773         dc.stream.readExact(tmp, 1);
1774         ubyte table_info = tmp[0];
1775         ubyte table_slot = table_info & 0xf;
1776         ubyte precision = table_info >> 4;  // 0 = 8 bit, 1 = 16 bit
1777         if (3 < table_slot || precision != 0)    // only 8 bit for baseline
1778             throw new ImageIOException("invalid / not supported");
1779 
1780         dc.stream.readExact(dc.qtables[table_slot], 64);
1781         len -= 1 + 64;
1782     }
1783 }
1784 
1785 // SOF0 -- start of frame
1786 void read_frame_header(ref JPEG_Decoder dc) {
1787     ubyte[9] tmp = void;
1788     dc.stream.readExact(tmp, 8);
1789     int len = bigEndianToNative!ushort(tmp[0..2]);  // 8 + num_comps*3
1790     ubyte precision = tmp[2];
1791     dc.height = bigEndianToNative!ushort(tmp[3..5]);
1792     dc.width = bigEndianToNative!ushort(tmp[5..7]);
1793     dc.num_comps = tmp[7];
1794 
1795     if ( precision != 8 ||
1796          (dc.num_comps != 1 && dc.num_comps != 3) ||
1797          len != 8 + dc.num_comps*3 )
1798         throw new ImageIOException("invalid / not supported");
1799 
1800     dc.hmax = 0;
1801     dc.vmax = 0;
1802     int mcu_du = 0; // data units in one mcu
1803     dc.stream.readExact(tmp, dc.num_comps*3);
1804     foreach (i; 0..dc.num_comps) {
1805         ubyte ci = tmp[i*3];
1806         // JFIF says ci should be i+1, but there are images where ci is i. Normalize ids
1807         // so that ci == i, always. So much for standards...
1808         if (i == 0) { dc.correct_comp_ids = ci == i+1; }
1809         if ((dc.correct_comp_ids && ci != i+1)
1810         || (!dc.correct_comp_ids && ci != i))
1811             throw new ImageIOException("invalid component id");
1812         if (dc.correct_comp_ids) { ci -= 1; }
1813 
1814         dc.index_for[i] = ci;
1815         auto comp = &dc.comps[ci];
1816         comp.id = ci;
1817         ubyte sampling_factors = tmp[i*3 + 1];
1818         comp.sfx = sampling_factors >> 4;
1819         comp.sfy = sampling_factors & 0xf;
1820         comp.qtable = tmp[i*3 + 2];
1821         if ( comp.sfy < 1 || 4 < comp.sfy ||
1822              comp.sfx < 1 || 4 < comp.sfx ||
1823              3 < comp.qtable )
1824             throw new ImageIOException("invalid / not supported");
1825 
1826         if (dc.hmax < comp.sfx) dc.hmax = comp.sfx;
1827         if (dc.vmax < comp.sfy) dc.vmax = comp.sfy;
1828 
1829         mcu_du += comp.sfx * comp.sfy;
1830     }
1831     if (10 < mcu_du)
1832         throw new ImageIOException("invalid / not supported");
1833 
1834     foreach (i; 0..dc.num_comps) {
1835         dc.comps[i].x = cast(long) ceil(dc.width * (cast(double) dc.comps[i].sfx / dc.hmax));
1836         dc.comps[i].y = cast(long) ceil(dc.height * (cast(double) dc.comps[i].sfy / dc.vmax));
1837 
1838         debug(DebugJPEG) writefln("%d comp %d sfx/sfy: %d/%d", i, dc.comps[i].id,
1839                                                                   dc.comps[i].sfx,
1840                                                                   dc.comps[i].sfy);
1841     }
1842 
1843     long mcu_w = dc.hmax * 8;
1844     long mcu_h = dc.vmax * 8;
1845     dc.num_mcu_x = cast(int) ((dc.width + mcu_w-1) / mcu_w);
1846     dc.num_mcu_y = cast(int) ((dc.height + mcu_h-1) / mcu_h);
1847 
1848     debug(DebugJPEG) {
1849         writefln("\tlen: %s", len);
1850         writefln("\tprecision: %s", precision);
1851         writefln("\tdimensions: %s x %s", dc.width, dc.height);
1852         writefln("\tnum_comps: %s", dc.num_comps);
1853         writefln("\tnum_mcu_x: %s", dc.num_mcu_x);
1854         writefln("\tnum_mcu_y: %s", dc.num_mcu_y);
1855     }
1856 
1857 }
1858 
1859 // SOS -- start of scan
1860 void read_scan_header(ref JPEG_Decoder dc) {
1861     ubyte[3] tmp = void;
1862     dc.stream.readExact(tmp, tmp.length);
1863     ushort len = bigEndianToNative!ushort(tmp[0..2]);
1864     ubyte num_scan_comps = tmp[2];
1865 
1866     if ( num_scan_comps != dc.num_comps ||
1867          len != (6+num_scan_comps*2) )
1868         throw new ImageIOException("invalid / not supported");
1869 
1870     auto buf = (cast(ubyte*) alloca((len-3) * ubyte.sizeof))[0..len-3];
1871     dc.stream.readExact(buf, buf.length);
1872 
1873     foreach (i; 0..num_scan_comps) {
1874         uint comp_id = buf[i*2] - ((dc.correct_comp_ids) ? 1 : 0);
1875         int ci;    // component index
1876         while (ci < dc.num_comps && dc.comps[ci].id != comp_id) ++ci;
1877         if (dc.num_comps <= ci)
1878             throw new ImageIOException("invalid component id");
1879 
1880         ubyte tables = buf[i*2+1];
1881         dc.comps[ci].dc_table = tables >> 4;
1882         dc.comps[ci].ac_table = tables & 0xf;
1883         if ( 1 < dc.comps[ci].dc_table ||
1884              1 < dc.comps[ci].ac_table )
1885             throw new ImageIOException("invalid / not supported");
1886     }
1887 
1888     // ignore these
1889     //ubyte spectral_start = buf[$-3];
1890     //ubyte spectral_end = buf[$-2];
1891     //ubyte approx = buf[$-1];
1892 }
1893 
1894 void read_restart_interval(ref JPEG_Decoder dc) {
1895     ubyte[4] tmp = void;
1896     dc.stream.readExact(tmp, tmp.length);
1897     ushort len = bigEndianToNative!ushort(tmp[0..2]);
1898     if (len != 4)
1899         throw new ImageIOException("invalid / not supported");
1900     dc.restart_interval = bigEndianToNative!ushort(tmp[2..4]);
1901     debug(DebugJPEG) writeln("restart interval set to: ", dc.restart_interval);
1902 }
1903 
1904 // reads data after the SOS segment
1905 ubyte[] decode_jpeg(ref JPEG_Decoder dc) {
1906     foreach (ref comp; dc.comps[0..dc.num_comps])
1907         comp.data = new ubyte[dc.num_mcu_x*comp.sfx*8*dc.num_mcu_y*comp.sfy*8];
1908 
1909     // E.7 -- Multiple scans are for progressive images which are not supported
1910     //while (!dc.eoi_reached) {
1911         decode_scan(dc);    // E.2.3
1912         //read_markers(dc);   // reads until next scan header or eoi
1913     //}
1914 
1915     // throw away fill samples and convert to target format
1916     return dc.reconstruct();
1917 }
1918 
1919 // E.2.3 and E.8 and E.9
1920 void decode_scan(ref JPEG_Decoder dc) {
1921     debug(DebugJPEG) writeln("decode scan...");
1922 
1923     int intervals, mcus;
1924     if (0 < dc.restart_interval) {
1925         int total_mcus = dc.num_mcu_x * dc.num_mcu_y;
1926         intervals = (total_mcus + dc.restart_interval-1) / dc.restart_interval;
1927         mcus = dc.restart_interval;
1928     } else {
1929         intervals = 1;
1930         mcus = dc.num_mcu_x * dc.num_mcu_y;
1931     }
1932     debug(DebugJPEG) writeln("intervals: ", intervals);
1933 
1934     foreach (mcu_j; 0 .. dc.num_mcu_y) {
1935         foreach (mcu_i; 0 .. dc.num_mcu_x) {
1936 
1937             // decode mcu
1938             foreach (_c; 0..dc.num_comps) {
1939                 auto comp = &dc.comps[dc.index_for[_c]];
1940                 foreach (du_j; 0 .. comp.sfy) {
1941                     foreach (du_i; 0 .. comp.sfx) {
1942                         // decode entropy, dequantize & dezigzag
1943                         short[64] data = decode_block(dc, *comp, dc.qtables[comp.qtable]);
1944                         // idct & level-shift
1945                         long outx = (mcu_i * comp.sfx + du_i) * 8;
1946                         long outy = (mcu_j * comp.sfy + du_j) * 8;
1947                         long dst_stride = dc.num_mcu_x * comp.sfx*8;
1948                         ubyte* dst = comp.data.ptr + outy*dst_stride + outx;
1949                         stbi__idct_block(dst, dst_stride, data);
1950                     }
1951                 }
1952             }
1953 
1954             --mcus;
1955 
1956             if (!mcus) {
1957                 --intervals;
1958                 if (!intervals)
1959                     return;
1960 
1961                 read_restart(dc.stream);    // RSTx marker
1962 
1963                 if (intervals == 1) {
1964                     // last interval, may have fewer MCUs than defined by DRI
1965                     mcus = (dc.num_mcu_y - mcu_j - 1) * dc.num_mcu_x + dc.num_mcu_x - mcu_i - 1;
1966                 } else {
1967                     mcus = dc.restart_interval;
1968                 }
1969 
1970                 // reset decoder
1971                 dc.cb = 0;
1972                 dc.bits_left = 0;
1973                 foreach (k; 0..dc.num_comps)
1974                     dc.comps[k].pred = 0;
1975             }
1976 
1977         }
1978     }
1979 }
1980 
1981 // RST0-RST7
1982 void read_restart(Reader stream) {
1983     ubyte[2] tmp = void;
1984     stream.readExact(tmp, tmp.length);
1985     if (tmp[0] != 0xff || tmp[1] < Marker.RST0 || Marker.RST7 < tmp[1])
1986         throw new ImageIOException("reset marker missing");
1987     // the markers should cycle 0 through 7, could check that here...
1988 }
1989 
1990 immutable ubyte[64] dezigzag = [
1991      0,  1,  8, 16,  9,  2,  3, 10,
1992     17, 24, 32, 25, 18, 11,  4,  5,
1993     12, 19, 26, 33, 40, 48, 41, 34,
1994     27, 20, 13,  6,  7, 14, 21, 28,
1995     35, 42, 49, 56, 57, 50, 43, 36,
1996     29, 22, 15, 23, 30, 37, 44, 51,
1997     58, 59, 52, 45, 38, 31, 39, 46,
1998     53, 60, 61, 54, 47, 55, 62, 63,
1999 ];
2000 
2001 // decode entropy, dequantize & dezigzag (see section F.2)
2002 short[64] decode_block(ref JPEG_Decoder dc, ref JPEG_Decoder.Component comp,
2003                                                     in ref ubyte[64] qtable)
2004 {
2005     short[64] res = 0;
2006 
2007     ubyte t = decode_huff(dc, dc.dc_tables[comp.dc_table]);
2008     int diff = t ? dc.receive_and_extend(t) : 0;
2009 
2010     comp.pred = comp.pred + diff;
2011     res[0] = cast(short) (comp.pred * qtable[0]);
2012 
2013     int k = 1;
2014     do {
2015         ubyte rs = decode_huff(dc, dc.ac_tables[comp.ac_table]);
2016         ubyte rrrr = rs >> 4;
2017         ubyte ssss = rs & 0xf;
2018 
2019         if (ssss == 0) {
2020             if (rrrr != 0xf)
2021                 break;      // end of block
2022             k += 16;    // run length is 16
2023             continue;
2024         }
2025 
2026         k += rrrr;
2027 
2028         if (63 < k)
2029             throw new ImageIOException("corrupt block");
2030         res[dezigzag[k]] = cast(short) (dc.receive_and_extend(ssss) * qtable[k]);
2031         k += 1;
2032     } while (k < 64);
2033 
2034     return res;
2035 }
2036 
2037 int receive_and_extend(ref JPEG_Decoder dc, ubyte s) {
2038     // receive
2039     int symbol = 0;
2040     foreach (_; 0..s)
2041         symbol = (symbol << 1) + nextbit(dc);
2042     // extend
2043     int vt = 1 << (s-1);
2044     if (symbol < vt)
2045         return symbol + (-1 << s) + 1;
2046     return symbol;
2047 }
2048 
2049 // F.16 -- the DECODE
2050 ubyte decode_huff(ref JPEG_Decoder dc, in ref HuffTab tab) {
2051     short code = nextbit(dc);
2052 
2053     int i = 0;
2054     while (tab.maxcode[i] < code) {
2055         code = cast(short) ((code << 1) + nextbit(dc));
2056         i += 1;
2057         if (tab.maxcode.length <= i)
2058             throw new ImageIOException("corrupt huffman coding");
2059     }
2060     int j = tab.valptr[i] + code - tab.mincode[i];
2061     if (tab.values.length <= cast(uint) j)
2062         throw new ImageIOException("corrupt huffman coding");
2063     return tab.values[j];
2064 }
2065 
2066 // F.2.2.5 and F.18
2067 ubyte nextbit(ref JPEG_Decoder dc) {
2068     if (!dc.bits_left) {
2069         ubyte[1] bytebuf;
2070         dc.stream.readExact(bytebuf, 1);
2071         dc.cb = bytebuf[0];
2072         dc.bits_left = 8;
2073 
2074         if (dc.cb == 0xff) {
2075             dc.stream.readExact(bytebuf, 1);
2076             if (bytebuf[0] != 0x0) {
2077                 throw new ImageIOException("unexpected marker");
2078             }
2079         }
2080     }
2081 
2082     ubyte r = dc.cb >> 7;
2083     dc.cb <<= 1;
2084     dc.bits_left -= 1;
2085     return r;
2086 }
2087 
2088 ubyte[] reconstruct(in ref JPEG_Decoder dc) {
2089     auto result = new ubyte[dc.width * dc.height * dc.tgt_chans];
2090 
2091     switch (dc.num_comps * 10 + dc.tgt_chans) {
2092         case 34, 33:
2093             // Use specialized bilinear filtering functions for the frequent cases where
2094             // Cb & Cr channels have half resolution.
2095             if ((dc.comps[0].sfx <= 2 && dc.comps[0].sfy <= 2)
2096             && (dc.comps[0].sfx + dc.comps[0].sfy >= 3)
2097             && dc.comps[1].sfx == 1 && dc.comps[1].sfy == 1
2098             && dc.comps[2].sfx == 1 && dc.comps[2].sfy == 1) {
2099                 void function(in ubyte[], in ubyte[], ubyte[]) resample;
2100                 switch (dc.comps[0].sfx * 10 + dc.comps[0].sfy) {
2101                     case 22: resample = &upsample_h2_v2; break;
2102                     case 21: resample = &upsample_h2_v1; break;
2103                     case 12: resample = &upsample_h1_v2; break;
2104                     default: throw new ImageIOException("bug");
2105                 }
2106 
2107                 auto comp1 = new ubyte[](dc.width);
2108                 auto comp2 = new ubyte[](dc.width);
2109 
2110                 size_t s = 0;
2111                 size_t di = 0;
2112                 foreach (j; 0 .. dc.height) {
2113                     size_t mi = j / dc.comps[0].sfy;
2114                     size_t si = (mi == 0 || mi >= (dc.height-1)/dc.comps[0].sfy)
2115                               ? mi : mi - 1 + s * 2;
2116                     s = s ^ 1;
2117 
2118                     size_t cs = dc.num_mcu_x * dc.comps[1].sfx * 8;
2119                     size_t cl0 = mi * cs;
2120                     size_t cl1 = si * cs;
2121                     resample(dc.comps[1].data[cl0 .. cl0 + dc.comps[1].x],
2122                              dc.comps[1].data[cl1 .. cl1 + dc.comps[1].x],
2123                              comp1[]);
2124                     resample(dc.comps[2].data[cl0 .. cl0 + dc.comps[2].x],
2125                              dc.comps[2].data[cl1 .. cl1 + dc.comps[2].x],
2126                              comp2[]);
2127 
2128                     foreach (i; 0 .. dc.width) {
2129                         result[di .. di+3] = ycbcr_to_rgb(
2130                             dc.comps[0].data[j * dc.num_mcu_x * dc.comps[0].sfx * 8 + i],
2131                             comp1[i],
2132                             comp2[i],
2133                         );
2134                         if (dc.tgt_chans == 4)
2135                             result[di+3] = 255;
2136                         di += dc.tgt_chans;
2137                     }
2138                 }
2139 
2140                 return result;
2141             }
2142 
2143             foreach (const ref comp; dc.comps[0..dc.num_comps]) {
2144                 if (comp.sfx != dc.hmax || comp.sfy != dc.vmax)
2145                     return dc.upsample(result);
2146             }
2147 
2148             size_t si, di;
2149             foreach (j; 0 .. dc.height) {
2150                 foreach (i; 0 .. dc.width) {
2151                     result[di .. di+3] = ycbcr_to_rgb(
2152                         dc.comps[0].data[si+i],
2153                         dc.comps[1].data[si+i],
2154                         dc.comps[2].data[si+i],
2155                     );
2156                     if (dc.tgt_chans == 4)
2157                         result[di+3] = 255;
2158                     di += dc.tgt_chans;
2159                 }
2160                 si += dc.num_mcu_x * dc.comps[0].sfx * 8;
2161             }
2162             return result;
2163         case 32, 12, 31, 11:
2164             const comp = &dc.comps[0];
2165             if (comp.sfx == dc.hmax && comp.sfy == dc.vmax) {
2166                 size_t si, di;
2167                 if (dc.tgt_chans == 2) {
2168                     foreach (j; 0 .. dc.height) {
2169                         foreach (i; 0 .. dc.width) {
2170                             result[di++] = comp.data[si+i];
2171                             result[di++] = 255;
2172                         }
2173                         si += dc.num_mcu_x * comp.sfx * 8;
2174                     }
2175                 } else {
2176                     foreach (j; 0 .. dc.height) {
2177                         result[di .. di+dc.width] = comp.data[si .. si+dc.width];
2178                         si += dc.num_mcu_x * comp.sfx * 8;
2179                         di += dc.width;
2180                     }
2181                 }
2182                 return result;
2183             } else {
2184                 // need to resample (haven't tested this...)
2185                 return dc.upsample_luma(result);
2186             }
2187         case 14, 13:
2188             const comp = &dc.comps[0];
2189             size_t si, di;
2190             foreach (j; 0 .. dc.height) {
2191                 foreach (i; 0 .. dc.width) {
2192                     result[di .. di+3] = comp.data[si+i];
2193                     if (dc.tgt_chans == 4)
2194                         result[di+3] = 255;
2195                     di += dc.tgt_chans;
2196                 }
2197                 si += dc.num_mcu_x * comp.sfx * 8;
2198             }
2199             return result;
2200         default: assert(0);
2201     }
2202 }
2203 
2204 void upsample_h2_v2(in ubyte[] line0, in ubyte[] line1, ubyte[] result) {
2205     ubyte mix(ubyte mm, ubyte ms, ubyte sm, ubyte ss) {
2206        return cast(ubyte) (( cast(uint) mm * 3 * 3
2207                            + cast(uint) ms * 3 * 1
2208                            + cast(uint) sm * 1 * 3
2209                            + cast(uint) ss * 1 * 1
2210                            + 8) / 16);
2211     }
2212 
2213     result[0] = cast(ubyte) (( cast(uint) line0[0] * 3
2214                              + cast(uint) line1[0] * 1
2215                              + 2) / 4);
2216     if (line0.length == 1) return;
2217     result[1] = mix(line0[0], line0[1], line1[0], line1[1]);
2218 
2219     size_t di = 2;
2220     foreach (i; 1 .. line0.length) {
2221         result[di] = mix(line0[i], line0[i-1], line1[i], line1[i-1]);
2222         di += 1;
2223         if (i == line0.length-1) {
2224             if (di < result.length) {
2225                 result[di] = cast(ubyte) (( cast(uint) line0[i] * 3
2226                                           + cast(uint) line1[i] * 1
2227                                           + 2) / 4);
2228             }
2229             return;
2230         }
2231         result[di] = mix(line0[i], line0[i+1], line1[i], line1[i+1]);
2232         di += 1;
2233     }
2234 }
2235 
2236 void upsample_h2_v1(in ubyte[] line0, in ubyte[] _line1, ubyte[] result) {
2237     result[0] = line0[0];
2238     if (line0.length == 1) return;
2239     result[1] = cast(ubyte) (( cast(uint) line0[0] * 3
2240                              + cast(uint) line0[1] * 1
2241                              + 2) / 4);
2242     size_t di = 2;
2243     foreach (i; 1 .. line0.length) {
2244         result[di] = cast(ubyte) (( cast(uint) line0[i-1] * 1
2245                                   + cast(uint) line0[i+0] * 3
2246                                   + 2) / 4);
2247         di += 1;
2248         if (i == line0.length-1) {
2249             if (di < result.length) result[di] = line0[i];
2250             return;
2251         }
2252         result[di] = cast(ubyte) (( cast(uint) line0[i+0] * 3
2253                                   + cast(uint) line0[i+1] * 1
2254                                   + 2) / 4);
2255         di += 1;
2256     }
2257 }
2258 
2259 void upsample_h1_v2(in ubyte[] line0, in ubyte[] line1, ubyte[] result) {
2260     foreach (i; 0 .. result.length) {
2261         result[i] = cast(ubyte) (( cast(uint) line0[i] * 3
2262                                  + cast(uint) line1[i] * 1
2263                                  + 2) / 4);
2264     }
2265 }
2266 
2267 // Nearest neighbor
2268 ubyte[] upsample_luma(in ref JPEG_Decoder dc, ubyte[] result) {
2269     const size_t stride0 = dc.num_mcu_x * dc.comps[0].sfx * 8;
2270     const y_step0 = cast(float) dc.comps[0].sfy / cast(float) dc.vmax;
2271     const x_step0 = cast(float) dc.comps[0].sfx / cast(float) dc.hmax;
2272 
2273     float y0 = y_step0 * 0.5;
2274     size_t y0i = 0;
2275 
2276     size_t di;
2277 
2278     foreach (j; 0 .. dc.height) {
2279         float x0 = x_step0 * 0.5;
2280         size_t x0i = 0;
2281         foreach (i; 0 .. dc.width) {
2282             result[di] = dc.comps[0].data[y0i + x0i];
2283             if (dc.tgt_chans == 2)
2284                 result[di+1] = 255;
2285             di += dc.tgt_chans;
2286             x0 += x_step0;
2287             if (x0 >= 1.0) { x0 -= 1.0; x0i += 1; }
2288         }
2289         y0 += y_step0;
2290         if (y0 >= 1.0) { y0 -= 1.0; y0i += stride0; }
2291     }
2292     return result;
2293 }
2294 
2295 // Nearest neighbor
2296 ubyte[] upsample(in ref JPEG_Decoder dc, ubyte[] result) {
2297     const size_t stride0 = dc.num_mcu_x * dc.comps[0].sfx * 8;
2298     const size_t stride1 = dc.num_mcu_x * dc.comps[1].sfx * 8;
2299     const size_t stride2 = dc.num_mcu_x * dc.comps[2].sfx * 8;
2300 
2301     const y_step0 = cast(float) dc.comps[0].sfy / cast(float) dc.vmax;
2302     const y_step1 = cast(float) dc.comps[1].sfy / cast(float) dc.vmax;
2303     const y_step2 = cast(float) dc.comps[2].sfy / cast(float) dc.vmax;
2304     const x_step0 = cast(float) dc.comps[0].sfx / cast(float) dc.hmax;
2305     const x_step1 = cast(float) dc.comps[1].sfx / cast(float) dc.hmax;
2306     const x_step2 = cast(float) dc.comps[2].sfx / cast(float) dc.hmax;
2307 
2308     float y0 = y_step0 * 0.5;
2309     float y1 = y_step1 * 0.5;
2310     float y2 = y_step2 * 0.5;
2311     size_t y0i = 0;
2312     size_t y1i = 0;
2313     size_t y2i = 0;
2314 
2315     size_t di;
2316 
2317     foreach (_j; 0 .. dc.height) {
2318         float x0 = x_step0 * 0.5;
2319         float x1 = x_step1 * 0.5;
2320         float x2 = x_step2 * 0.5;
2321         size_t x0i = 0;
2322         size_t x1i = 0;
2323         size_t x2i = 0;
2324         foreach (i; 0 .. dc.width) {
2325             result[di .. di+3] = ycbcr_to_rgb(
2326                 dc.comps[0].data[y0i + x0i],
2327                 dc.comps[1].data[y1i + x1i],
2328                 dc.comps[2].data[y2i + x2i],
2329             );
2330             if (dc.tgt_chans == 4)
2331                 result[di+3] = 255;
2332             di += dc.tgt_chans;
2333             x0 += x_step0;
2334             x1 += x_step1;
2335             x2 += x_step2;
2336             if (x0 >= 1.0) { x0 -= 1.0; x0i += 1; }
2337             if (x1 >= 1.0) { x1 -= 1.0; x1i += 1; }
2338             if (x2 >= 1.0) { x2 -= 1.0; x2i += 1; }
2339         }
2340         y0 += y_step0;
2341         y1 += y_step1;
2342         y2 += y_step2;
2343         if (y0 >= 1.0) { y0 -= 1.0; y0i += stride0; }
2344         if (y1 >= 1.0) { y1 -= 1.0; y1i += stride1; }
2345         if (y2 >= 1.0) { y2 -= 1.0; y2i += stride2; }
2346     }
2347     return result;
2348 }
2349 
2350 ubyte[3] ycbcr_to_rgb(ubyte y, ubyte cb, ubyte cr) pure {
2351     ubyte[3] rgb = void;
2352     rgb[0] = clamp(y + 1.402*(cr-128));
2353     rgb[1] = clamp(y - 0.34414*(cb-128) - 0.71414*(cr-128));
2354     rgb[2] = clamp(y + 1.772*(cb-128));
2355     return rgb;
2356 }
2357 
2358 ubyte clamp(float x) pure {
2359     if (x < 0) return 0;
2360     if (255 < x) return 255;
2361     return cast(ubyte) x;
2362 }
2363 
2364 // ------------------------------------------------------------
2365 // The IDCT stuff here (to the next dashed line) is copied and adapted from
2366 // stb_image which is released under public domain.  Many thanks to stb_image
2367 // author, Sean Barrett.
2368 // Link: https://github.com/nothings/stb/blob/master/stb_image.h
2369 
2370 pure int f2f(float x) { return cast(int) (x * 4096 + 0.5); }
2371 pure int fsh(int x) { return x << 12; }
2372 
2373 // from stb_image, derived from jidctint -- DCT_ISLOW
2374 pure void STBI__IDCT_1D(ref int t0, ref int t1, ref int t2, ref int t3,
2375                         ref int x0, ref int x1, ref int x2, ref int x3,
2376         int s0, int s1, int s2, int s3, int s4, int s5, int s6, int s7)
2377 {
2378    int p1,p2,p3,p4,p5;
2379    //int t0,t1,t2,t3,p1,p2,p3,p4,p5,x0,x1,x2,x3;
2380    p2 = s2;
2381    p3 = s6;
2382    p1 = (p2+p3) * f2f(0.5411961f);
2383    t2 = p1 + p3 * f2f(-1.847759065f);
2384    t3 = p1 + p2 * f2f( 0.765366865f);
2385    p2 = s0;
2386    p3 = s4;
2387    t0 = fsh(p2+p3);
2388    t1 = fsh(p2-p3);
2389    x0 = t0+t3;
2390    x3 = t0-t3;
2391    x1 = t1+t2;
2392    x2 = t1-t2;
2393    t0 = s7;
2394    t1 = s5;
2395    t2 = s3;
2396    t3 = s1;
2397    p3 = t0+t2;
2398    p4 = t1+t3;
2399    p1 = t0+t3;
2400    p2 = t1+t2;
2401    p5 = (p3+p4)*f2f( 1.175875602f);
2402    t0 = t0*f2f( 0.298631336f);
2403    t1 = t1*f2f( 2.053119869f);
2404    t2 = t2*f2f( 3.072711026f);
2405    t3 = t3*f2f( 1.501321110f);
2406    p1 = p5 + p1*f2f(-0.899976223f);
2407    p2 = p5 + p2*f2f(-2.562915447f);
2408    p3 = p3*f2f(-1.961570560f);
2409    p4 = p4*f2f(-0.390180644f);
2410    t3 += p1+p4;
2411    t2 += p2+p3;
2412    t1 += p2+p4;
2413    t0 += p1+p3;
2414 }
2415 
2416 // idct and level-shift
2417 pure void stbi__idct_block(ubyte* dst, long dst_stride, in ref short[64] data) {
2418    int i;
2419    int[64] val;
2420    int* v = val.ptr;
2421    const(short)* d = data.ptr;
2422 
2423    // columns
2424    for (i=0; i < 8; ++i,++d, ++v) {
2425       // if all zeroes, shortcut -- this avoids dequantizing 0s and IDCTing
2426       if (d[ 8]==0 && d[16]==0 && d[24]==0 && d[32]==0
2427            && d[40]==0 && d[48]==0 && d[56]==0) {
2428          //    no shortcut                 0     seconds
2429          //    (1|2|3|4|5|6|7)==0          0     seconds
2430          //    all separate               -0.047 seconds
2431          //    1 && 2|3 && 4|5 && 6|7:    -0.047 seconds
2432          int dcterm = d[0] << 2;
2433          v[0] = v[8] = v[16] = v[24] = v[32] = v[40] = v[48] = v[56] = dcterm;
2434       } else {
2435          int t0,t1,t2,t3,x0,x1,x2,x3;
2436          STBI__IDCT_1D(
2437              t0, t1, t2, t3,
2438              x0, x1, x2, x3,
2439              d[ 0], d[ 8], d[16], d[24],
2440              d[32], d[40], d[48], d[56]
2441          );
2442          // constants scaled things up by 1<<12; let's bring them back
2443          // down, but keep 2 extra bits of precision
2444          x0 += 512; x1 += 512; x2 += 512; x3 += 512;
2445          v[ 0] = (x0+t3) >> 10;
2446          v[56] = (x0-t3) >> 10;
2447          v[ 8] = (x1+t2) >> 10;
2448          v[48] = (x1-t2) >> 10;
2449          v[16] = (x2+t1) >> 10;
2450          v[40] = (x2-t1) >> 10;
2451          v[24] = (x3+t0) >> 10;
2452          v[32] = (x3-t0) >> 10;
2453       }
2454    }
2455 
2456    ubyte* o = dst;
2457    for (i=0, v=val.ptr; i < 8; ++i,v+=8,o+=dst_stride) {
2458       // no fast case since the first 1D IDCT spread components out
2459       int t0,t1,t2,t3,x0,x1,x2,x3;
2460       STBI__IDCT_1D(
2461           t0, t1, t2, t3,
2462           x0, x1, x2, x3,
2463           v[0],v[1],v[2],v[3],v[4],v[5],v[6],v[7]
2464       );
2465       // constants scaled things up by 1<<12, plus we had 1<<2 from first
2466       // loop, plus horizontal and vertical each scale by sqrt(8) so together
2467       // we've got an extra 1<<3, so 1<<17 total we need to remove.
2468       // so we want to round that, which means adding 0.5 * 1<<17,
2469       // aka 65536. Also, we'll end up with -128 to 127 that we want
2470       // to encode as 0-255 by adding 128, so we'll add that before the shift
2471       x0 += 65536 + (128<<17);
2472       x1 += 65536 + (128<<17);
2473       x2 += 65536 + (128<<17);
2474       x3 += 65536 + (128<<17);
2475       // tried computing the shifts into temps, or'ing the temps to see
2476       // if any were out of range, but that was slower
2477       o[0] = stbi__clamp((x0+t3) >> 17);
2478       o[7] = stbi__clamp((x0-t3) >> 17);
2479       o[1] = stbi__clamp((x1+t2) >> 17);
2480       o[6] = stbi__clamp((x1-t2) >> 17);
2481       o[2] = stbi__clamp((x2+t1) >> 17);
2482       o[5] = stbi__clamp((x2-t1) >> 17);
2483       o[3] = stbi__clamp((x3+t0) >> 17);
2484       o[4] = stbi__clamp((x3-t0) >> 17);
2485    }
2486 }
2487 
2488 // clamp to 0-255
2489 pure ubyte stbi__clamp(int x) {
2490    if (cast(uint) x > 255) {
2491       if (x < 0) return 0;
2492       if (x > 255) return 255;
2493    }
2494    return cast(ubyte) x;
2495 }
2496 
2497 // the above is adapted from stb_image
2498 // ------------------------------------------------------------
2499 
2500 ///
2501 public void read_jpeg_info(in char[] filename, out long w, out long h, out long chans) {
2502     scope reader = new FileReader(filename);
2503     return read_jpeg_info(reader, w, h, chans);
2504 }
2505 
2506 void read_jpeg_info(Reader stream, out long w, out long h, out long chans) {
2507     ubyte[2] marker = void;
2508     stream.readExact(marker, 2);
2509 
2510     // SOI
2511     if (marker[0..2] != [0xff, 0xd8])
2512         throw new ImageIOException("not JPEG");
2513 
2514     while (true) {
2515         stream.readExact(marker, 2);
2516 
2517         if (marker[0] != 0xff)
2518             throw new ImageIOException("no frame header");
2519         while (marker[1] == 0xff)
2520             stream.readExact(marker[1..$], 1);
2521 
2522         enum SKIP = 0xff;
2523         switch (marker[1]) with (Marker) {
2524             case SOF0: .. case SOF3: goto case;
2525             case SOF9: .. case SOF11:
2526                 ubyte[8] tmp;
2527                 stream.readExact(tmp[0..8], 8);
2528                 //int len = bigEndianToNative!ushort(tmp[0..2]);
2529                 w = bigEndianToNative!ushort(tmp[5..7]);
2530                 h = bigEndianToNative!ushort(tmp[3..5]);
2531                 chans = tmp[7];
2532                 return;
2533             case SOS, EOI: throw new ImageIOException("no frame header");
2534             case DRI, DHT, DQT, COM: goto case SKIP;
2535             case APP0: .. case APPf: goto case SKIP;
2536             case SKIP:
2537                 ubyte[2] lenbuf = void;
2538                 stream.readExact(lenbuf, 2);
2539                 int skiplen = bigEndianToNative!ushort(lenbuf) - 2;
2540                 stream.seek(skiplen, SEEK_CUR);
2541                 break;
2542             default: throw new ImageIOException("unsupported marker");
2543         }
2544     }
2545     assert(0);
2546 }
2547 
2548 // --------------------------------------------------------------------------------
2549 // Conversions
2550 
2551 enum _ColFmt : int {
2552     Unknown = 0,
2553     Y = 1,
2554     YA,
2555     RGB,
2556     RGBA,
2557     BGR,
2558     BGRA,
2559 }
2560 
2561 alias LineConv = void function(in ubyte[] src, ubyte[] tgt);
2562 
2563 LineConv get_converter(long src_chans, long tgt_chans) pure {
2564     long combo(long a, long b) pure nothrow { return a*16 + b; }
2565 
2566     if (src_chans == tgt_chans)
2567         return &copy_line;
2568 
2569     switch (combo(src_chans, tgt_chans)) with (_ColFmt) {
2570         case combo(Y, YA)      : return &Y_to_YA;
2571         case combo(Y, RGB)     : return &Y_to_RGB;
2572         case combo(Y, RGBA)    : return &Y_to_RGBA;
2573         case combo(Y, BGR)     : return &Y_to_BGR;
2574         case combo(Y, BGRA)    : return &Y_to_BGRA;
2575         case combo(YA, Y)      : return &YA_to_Y;
2576         case combo(YA, RGB)    : return &YA_to_RGB;
2577         case combo(YA, RGBA)   : return &YA_to_RGBA;
2578         case combo(YA, BGR)    : return &YA_to_BGR;
2579         case combo(YA, BGRA)   : return &YA_to_BGRA;
2580         case combo(RGB, Y)     : return &RGB_to_Y;
2581         case combo(RGB, YA)    : return &RGB_to_YA;
2582         case combo(RGB, RGBA)  : return &RGB_to_RGBA;
2583         case combo(RGB, BGR)   : return &RGB_to_BGR;
2584         case combo(RGB, BGRA)  : return &RGB_to_BGRA;
2585         case combo(RGBA, Y)    : return &RGBA_to_Y;
2586         case combo(RGBA, YA)   : return &RGBA_to_YA;
2587         case combo(RGBA, RGB)  : return &RGBA_to_RGB;
2588         case combo(RGBA, BGR)  : return &RGBA_to_BGR;
2589         case combo(RGBA, BGRA) : return &RGBA_to_BGRA;
2590         case combo(BGR, Y)     : return &BGR_to_Y;
2591         case combo(BGR, YA)    : return &BGR_to_YA;
2592         case combo(BGR, RGB)   : return &BGR_to_RGB;
2593         case combo(BGR, RGBA)  : return &BGR_to_RGBA;
2594         case combo(BGRA, Y)    : return &BGRA_to_Y;
2595         case combo(BGRA, YA)   : return &BGRA_to_YA;
2596         case combo(BGRA, RGB)  : return &BGRA_to_RGB;
2597         case combo(BGRA, RGBA) : return &BGRA_to_RGBA;
2598         default                : throw new ImageIOException("internal error");
2599     }
2600 }
2601 
2602 void copy_line(in ubyte[] src, ubyte[] tgt) pure nothrow {
2603     tgt[0..$] = src[0..$];
2604 }
2605 
2606 ubyte luminance(ubyte r, ubyte g, ubyte b) pure nothrow {
2607     return cast(ubyte) (0.21*r + 0.64*g + 0.15*b); // somewhat arbitrary weights
2608 }
2609 
2610 void Y_to_YA(in ubyte[] src, ubyte[] tgt) pure nothrow {
2611     for (size_t k, t;   k < src.length;   k+=1, t+=2) {
2612         tgt[t] = src[k];
2613         tgt[t+1] = 255;
2614     }
2615 }
2616 
2617 alias Y_to_BGR = Y_to_RGB;
2618 void Y_to_RGB(in ubyte[] src, ubyte[] tgt) pure nothrow {
2619     for (size_t k, t;   k < src.length;   k+=1, t+=3)
2620         tgt[t .. t+3] = src[k];
2621 }
2622 
2623 alias Y_to_BGRA = Y_to_RGBA;
2624 void Y_to_RGBA(in ubyte[] src, ubyte[] tgt) pure nothrow {
2625     for (size_t k, t;   k < src.length;   k+=1, t+=4) {
2626         tgt[t .. t+3] = src[k];
2627         tgt[t+3] = 255;
2628     }
2629 }
2630 
2631 void YA_to_Y(in ubyte[] src, ubyte[] tgt) pure nothrow {
2632     for (size_t k, t;   k < src.length;   k+=2, t+=1)
2633         tgt[t] = src[k];
2634 }
2635 
2636 alias YA_to_BGR = YA_to_RGB;
2637 void YA_to_RGB(in ubyte[] src, ubyte[] tgt) pure nothrow {
2638     for (size_t k, t;   k < src.length;   k+=2, t+=3)
2639         tgt[t .. t+3] = src[k];
2640 }
2641 
2642 alias YA_to_BGRA = YA_to_RGBA;
2643 void YA_to_RGBA(in ubyte[] src, ubyte[] tgt) pure nothrow {
2644     for (size_t k, t;   k < src.length;   k+=2, t+=4) {
2645         tgt[t .. t+3] = src[k];
2646         tgt[t+3] = src[k+1];
2647     }
2648 }
2649 
2650 void RGB_to_Y(in ubyte[] src, ubyte[] tgt) pure nothrow {
2651     for (size_t k, t;   k < src.length;   k+=3, t+=1)
2652         tgt[t] = luminance(src[k], src[k+1], src[k+2]);
2653 }
2654 
2655 void RGB_to_YA(in ubyte[] src, ubyte[] tgt) pure nothrow {
2656     for (size_t k, t;   k < src.length;   k+=3, t+=2) {
2657         tgt[t] = luminance(src[k], src[k+1], src[k+2]);
2658         tgt[t+1] = 255;
2659     }
2660 }
2661 
2662 void RGB_to_RGBA(in ubyte[] src, ubyte[] tgt) pure nothrow {
2663     for (size_t k, t;   k < src.length;   k+=3, t+=4) {
2664         tgt[t .. t+3] = src[k .. k+3];
2665         tgt[t+3] = 255;
2666     }
2667 }
2668 
2669 void RGBA_to_Y(in ubyte[] src, ubyte[] tgt) pure nothrow {
2670     for (size_t k, t;   k < src.length;   k+=4, t+=1)
2671         tgt[t] = luminance(src[k], src[k+1], src[k+2]);
2672 }
2673 
2674 void RGBA_to_YA(in ubyte[] src, ubyte[] tgt) pure nothrow {
2675     for (size_t k, t;   k < src.length;   k+=4, t+=2) {
2676         tgt[t] = luminance(src[k], src[k+1], src[k+2]);
2677         tgt[t+1] = src[k+3];
2678     }
2679 }
2680 
2681 void RGBA_to_RGB(in ubyte[] src, ubyte[] tgt) pure nothrow {
2682     for (size_t k, t;   k < src.length;   k+=4, t+=3)
2683         tgt[t .. t+3] = src[k .. k+3];
2684 }
2685 
2686 void BGR_to_Y(in ubyte[] src, ubyte[] tgt) pure nothrow {
2687     for (size_t k, t;   k < src.length;   k+=3, t+=1)
2688         tgt[t] = luminance(src[k+2], src[k+1], src[k+1]);
2689 }
2690 
2691 void BGR_to_YA(in ubyte[] src, ubyte[] tgt) pure nothrow {
2692     for (size_t k, t;   k < src.length;   k+=3, t+=2) {
2693         tgt[t] = luminance(src[k+2], src[k+1], src[k+1]);
2694         tgt[t+1] = 255;
2695     }
2696 }
2697 
2698 alias RGB_to_BGR = BGR_to_RGB;
2699 void BGR_to_RGB(in ubyte[] src, ubyte[] tgt) pure nothrow {
2700     for (size_t k;   k < src.length;   k+=3) {
2701         tgt[k  ] = src[k+2];
2702         tgt[k+1] = src[k+1];
2703         tgt[k+2] = src[k  ];
2704     }
2705 }
2706 
2707 alias RGB_to_BGRA = BGR_to_RGBA;
2708 void BGR_to_RGBA(in ubyte[] src, ubyte[] tgt) pure nothrow {
2709     for (size_t k, t;   k < src.length;   k+=3, t+=4) {
2710         tgt[t  ] = src[k+2];
2711         tgt[t+1] = src[k+1];
2712         tgt[t+2] = src[k  ];
2713         tgt[t+3] = 255;
2714     }
2715 }
2716 
2717 void BGRA_to_Y(in ubyte[] src, ubyte[] tgt) pure nothrow {
2718     for (size_t k, t;   k < src.length;   k+=4, t+=1)
2719         tgt[t] = luminance(src[k+2], src[k+1], src[k]);
2720 }
2721 
2722 void BGRA_to_YA(in ubyte[] src, ubyte[] tgt) pure nothrow {
2723     for (size_t k, t;   k < src.length;   k+=4, t+=2) {
2724         tgt[t] = luminance(src[k+2], src[k+1], src[k]);
2725         tgt[t+1] = 255;
2726     }
2727 }
2728 
2729 alias RGBA_to_BGR = BGRA_to_RGB;
2730 void BGRA_to_RGB(in ubyte[] src, ubyte[] tgt) pure nothrow {
2731     for (size_t k, t;   k < src.length;   k+=4, t+=3) {
2732         tgt[t  ] = src[k+2];
2733         tgt[t+1] = src[k+1];
2734         tgt[t+2] = src[k  ];
2735     }
2736 }
2737 
2738 alias RGBA_to_BGRA = BGRA_to_RGBA;
2739 void BGRA_to_RGBA(in ubyte[] src, ubyte[] tgt) pure nothrow {
2740     for (size_t k, t;   k < src.length;   k+=4, t+=4) {
2741         tgt[t  ] = src[k+2];
2742         tgt[t+1] = src[k+1];
2743         tgt[t+2] = src[k  ];
2744         tgt[t+3] = src[k+3];
2745     }
2746 }
2747 
2748 // --------------------------------------------------------------------------------
2749 
2750 interface Reader {
2751     void readExact(ubyte[], size_t);
2752     void seek(ptrdiff_t, int);
2753 }
2754 
2755 interface Writer {
2756     void rawWrite(in ubyte[]);
2757     void flush();
2758 }
2759 
2760 class FileReader : Reader {
2761     this(in char[] filename) {
2762         this(File(filename.idup, "rb"));
2763     }
2764 
2765     this(File f) {
2766         if (!f.isOpen) throw new ImageIOException("File not open");
2767         this.f = f;
2768     }
2769 
2770     void readExact(ubyte[] buffer, size_t bytes) {
2771         auto slice = this.f.rawRead(buffer[0..bytes]);
2772         if (slice.length != bytes)
2773             throw new Exception("not enough data");
2774     }
2775 
2776     void seek(ptrdiff_t offset, int origin) { this.f.seek(offset, origin); }
2777 
2778     private File f;
2779 }
2780 
2781 class MemReader : Reader {
2782     this(in ubyte[] source) {
2783         this.source = source;
2784     }
2785 
2786     void readExact(ubyte[] buffer, size_t bytes) {
2787         if (source.length - cursor < bytes)
2788             throw new Exception("not enough data");
2789         buffer[0..bytes] = source[cursor .. cursor+bytes];
2790         cursor += bytes;
2791     }
2792 
2793     void seek(ptrdiff_t offset, int origin) {
2794         switch (origin) {
2795             case SEEK_SET:
2796                 if (offset < 0 || source.length <= offset)
2797                     throw new Exception("seek error");
2798                 cursor = offset;
2799                 break;
2800             case SEEK_CUR:
2801                 ptrdiff_t dst = cursor + offset;
2802                 if (dst < 0 || source.length <= dst)
2803                     throw new Exception("seek error");
2804                 cursor = dst;
2805                 break;
2806             case SEEK_END:
2807                 if (0 <= offset || source.length < -offset)
2808                     throw new Exception("seek error");
2809                 cursor = cast(ptrdiff_t) source.length + offset;
2810                 break;
2811             default: assert(0);
2812         }
2813     }
2814 
2815     private const ubyte[] source;
2816     private ptrdiff_t cursor;
2817 }
2818 
2819 class FileWriter : Writer {
2820     this(in char[] filename) {
2821         this(File(filename.idup, "wb"));
2822     }
2823 
2824     this(File f) {
2825         if (!f.isOpen) throw new ImageIOException("File not open");
2826         this.f = f;
2827     }
2828 
2829     void rawWrite(in ubyte[] block) { this.f.rawWrite(block); }
2830     void flush() { this.f.flush(); }
2831 
2832     private File f;
2833 }
2834 
2835 class MemWriter : Writer {
2836     this() { }
2837 
2838     ubyte[] result() { return buffer; }
2839 
2840     void rawWrite(in ubyte[] block) { this.buffer ~= block; }
2841     void flush() { }
2842 
2843     private ubyte[] buffer;
2844 }
2845 
2846 const(char)[] extract_extension_lowercase(in char[] filename) {
2847     ptrdiff_t di = filename.lastIndexOf('.');
2848     return (0 < di && di+1 < filename.length) ? filename[di+1..$].toLower() : "";
2849 }