1 // Copyright (c) 2014 Tero Hänninen
2 // Boost Software License - Version 1.0 - August 17th, 2003
3 module imageformats;
4 
5 import std.algorithm;   // min, reverse
6 import std.bitmanip;   // endianness stuff
7 import std.stdio;    // File
8 import std.string;  // toLower, lastIndexOf
9 
10 /// Image
11 struct IFImage {
12     long        w, h;
13     ColFmt      c;
14     ubyte[]     pixels;
15 }
16 
17 /// Color format
18 enum ColFmt {
19     Y = 1,
20     YA = 2,
21     RGB = 3,
22     RGBA = 4,
23 }
24 
25 /// Reads an image from file.
26 IFImage read_image(in char[] file, long req_chans = 0) {
27     scope reader = new FileReader(file);
28     return read_image_from_reader(reader, req_chans);
29 }
30 
31 /// Reads an image in memory.
32 IFImage read_image_from_mem(in ubyte[] source, long req_chans = 0) {
33     scope reader = new MemReader(source);
34     return read_image_from_reader(reader, req_chans);
35 }
36 
37 /// Writes an image to file.
38 void write_image(in char[] file, long w, long h, in ubyte[] data, long req_chans = 0) {
39     const(char)[] ext = extract_extension_lowercase(file);
40 
41     void function(Writer, long, long, in ubyte[], long) write_image;
42     switch (ext) {
43         case "png": write_image = &write_png; break;
44         case "tga": write_image = &write_tga; break;
45         default: throw new ImageIOException("unknown image extension/type");
46     }
47     scope writer = new FileWriter(file);
48     write_image(writer, w, h, data, req_chans);
49 }
50 
51 /// Returns basic info about an image.
52 /// If number of channels is unknown chans is set to zero.
53 void read_image_info(in char[] file, out long w, out long h, out long chans) {
54     scope reader = new FileReader(file);
55     try {
56         return read_png_info(reader, w, h, chans);
57     } catch {
58         reader.seek(0, SEEK_SET);
59     }
60     try {
61         return read_jpeg_info(reader, w, h, chans);
62     } catch {
63         reader.seek(0, SEEK_SET);
64     }
65     try {
66         return read_bmp_info(reader, w, h, chans);
67     } catch {
68         reader.seek(0, SEEK_SET);
69     }
70     try {
71         return read_tga_info(reader, w, h, chans);
72     } catch {
73         reader.seek(0, SEEK_SET);
74     }
75     throw new ImageIOException("unknown image type");
76 }
77 
78 ///
79 class ImageIOException : Exception {
80    @safe pure const
81    this(string msg, string file = __FILE__, size_t line = __LINE__) {
82        super(msg, file, line);
83    }
84 }
85 
86 private:
87 
88 IFImage read_image_from_reader(Reader reader, long req_chans) {
89     if (detect_png(reader)) return read_png(reader, req_chans);
90     if (detect_jpeg(reader)) return read_jpeg(reader, req_chans);
91     if (detect_bmp(reader)) return read_bmp(reader, req_chans);
92     if (detect_tga(reader)) return read_tga(reader, req_chans);
93     throw new ImageIOException("unknown image type");
94 }
95 
96 bool detect_png(Reader stream) {
97     try {
98         ubyte[8] tmp = void;
99         stream.readExact(tmp, tmp.length);
100         return (tmp[0..8] == png_file_header[0..$]);
101     } catch {
102         return false;
103     } finally {
104         stream.seek(0, SEEK_SET);
105     }
106 }
107 
108 bool detect_jpeg(Reader stream) {
109     try {
110         long w, h, c;
111         read_jpeg_info(stream, w, h, c);
112         return true;
113     } catch {
114         return false;
115     } finally {
116         stream.seek(0, SEEK_SET);
117     }
118 }
119 
120 bool detect_bmp(Reader stream) {
121     try {
122         ubyte[18] tmp = void;  // bmp header + size of dib header
123         stream.readExact(tmp, tmp.length);
124         size_t ds = littleEndianToNative!uint(tmp[14..18]);
125         return (tmp[0..2] == ['B', 'M']
126             && (ds == 12 || ds == 40 || ds == 52 || ds == 56 || ds == 108 || ds == 124));
127     } catch {
128         return false;
129     } finally {
130         stream.seek(0, SEEK_SET);
131     }
132 }
133 
134 bool detect_tga(Reader stream) {
135     try {
136         auto hdr = read_tga_header(stream);
137         return true;
138     } catch {
139         return false;
140     } finally {
141         stream.seek(0, SEEK_SET);
142     }
143 }
144 
145 // --------------------------------------------------------------------------------
146 // PNG
147 
148 import std.digest.crc;
149 import std.zlib;
150 
151 ///
152 public struct PNG_Header {
153     int     width;
154     int     height;
155     ubyte   bit_depth;
156     ubyte   color_type;
157     ubyte   compression_method;
158     ubyte   filter_method;
159     ubyte   interlace_method;
160 }
161 
162 ///
163 public PNG_Header read_png_header(in char[] filename) {
164     scope reader = new FileReader(filename);
165     return read_png_header(reader);
166 }
167 
168 PNG_Header read_png_header(Reader stream) {
169     ubyte[33] tmp = void;  // file header, IHDR len+type+data+crc
170     stream.readExact(tmp, tmp.length);
171 
172     ubyte[4] crc = crc32Of(tmp[12..29]);
173     reverse(crc[]);
174     if ( tmp[0..8] != png_file_header[0..$]              ||
175          tmp[8..16] != [0x0,0x0,0x0,0xd,'I','H','D','R'] ||
176          crc != tmp[29..33] )
177         throw new ImageIOException("corrupt header");
178 
179     PNG_Header header = {
180         width              : bigEndianToNative!int(tmp[16..20]),
181         height             : bigEndianToNative!int(tmp[20..24]),
182         bit_depth          : tmp[24],
183         color_type         : tmp[25],
184         compression_method : tmp[26],
185         filter_method      : tmp[27],
186         interlace_method   : tmp[28],
187     };
188     return header;
189 }
190 
191 ///
192 public IFImage read_png(in char[] filename, long req_chans = 0) {
193     scope reader = new FileReader(filename);
194     return read_png(reader, req_chans);
195 }
196 
197 ///
198 public IFImage read_png_from_mem(in ubyte[] source, long req_chans = 0) {
199     scope reader = new MemReader(source);
200     return read_png(reader, req_chans);
201 }
202 
203 IFImage read_png(Reader stream, long req_chans = 0) {
204     if (req_chans < 0 || 4 < req_chans)
205         throw new ImageIOException("come on...");
206 
207     PNG_Header hdr = read_png_header(stream);
208 
209     if (hdr.width < 1 || hdr.height < 1 || int.max < cast(ulong) hdr.width * hdr.height)
210         throw new ImageIOException("invalid dimensions");
211     if (hdr.bit_depth != 8)
212         throw new ImageIOException("only 8-bit images supported");
213     if (! (hdr.color_type == PNG_ColorType.Y    ||
214            hdr.color_type == PNG_ColorType.RGB  ||
215            hdr.color_type == PNG_ColorType.Idx  ||
216            hdr.color_type == PNG_ColorType.YA   ||
217            hdr.color_type == PNG_ColorType.RGBA) )
218         throw new ImageIOException("color type not supported");
219     if (hdr.compression_method != 0 || hdr.filter_method != 0 ||
220         (hdr.interlace_method != 0 && hdr.interlace_method != 1))
221         throw new ImageIOException("not supported");
222 
223     PNG_Decoder dc = {
224         stream      : stream,
225         src_indexed : (hdr.color_type == PNG_ColorType.Idx),
226         src_chans   : channels(cast(PNG_ColorType) hdr.color_type),
227         ilace       : hdr.interlace_method,
228         w           : hdr.width,
229         h           : hdr.height,
230     };
231     dc.tgt_chans = (req_chans == 0) ? dc.src_chans : cast(int) req_chans;
232 
233     IFImage result = {
234         w      : dc.w,
235         h      : dc.h,
236         c      : cast(ColFmt) dc.tgt_chans,
237         pixels : decode_png(dc)
238     };
239     return result;
240 }
241 
242 ///
243 public void write_png(in char[] file, long w, long h, in ubyte[] data, long tgt_chans = 0)
244 {
245     scope writer = new FileWriter(file);
246     write_png(writer, w, h, data, tgt_chans);
247 }
248 
249 ///
250 public ubyte[] write_png_to_mem(long w, long h, in ubyte[] data, long tgt_chans = 0) {
251     scope writer = new MemWriter();
252     write_png(writer, w, h, data, tgt_chans);
253     return writer.result;
254 }
255 
256 immutable ubyte[8] png_file_header =
257     [0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a];
258 
259 int channels(PNG_ColorType ct) pure nothrow {
260     final switch (ct) with (PNG_ColorType) {
261         case Y: return 1;
262         case RGB, Idx: return 3;
263         case YA: return 2;
264         case RGBA: return 4;
265     }
266 }
267 
268 PNG_ColorType color_type(long channels) pure nothrow {
269     switch (channels) {
270         case 1: return PNG_ColorType.Y;
271         case 2: return PNG_ColorType.YA;
272         case 3: return PNG_ColorType.RGB;
273         case 4: return PNG_ColorType.RGBA;
274         default: assert(0);
275     }
276 }
277 
278 struct PNG_Decoder {
279     Reader stream;
280     bool src_indexed;
281     int src_chans;
282     int tgt_chans;
283     size_t w, h;
284     ubyte ilace;
285 
286     UnCompress uc;
287     CRC32 crc;
288     ubyte[12] chunkmeta;  // crc | length and type
289     ubyte[] read_buf;
290     ubyte[] uc_buf;     // uncompressed
291     ubyte[] palette;
292 }
293 
294 ubyte[] decode_png(ref PNG_Decoder dc) {
295     dc.uc = new UnCompress(HeaderFormat.deflate);
296     dc.read_buf = new ubyte[4096];
297 
298     enum Stage {
299         IHDR_parsed,
300         PLTE_parsed,
301         IDAT_parsed,
302         IEND_parsed,
303     }
304 
305     ubyte[] result;
306     auto stage = Stage.IHDR_parsed;
307     dc.stream.readExact(dc.chunkmeta[4..$], 8);  // next chunk's len and type
308 
309     while (stage != Stage.IEND_parsed) {
310         int len = bigEndianToNative!int(dc.chunkmeta[4..8]);
311         if (len < 0)
312             throw new ImageIOException("chunk too long");
313 
314         // standard allows PLTE chunk for non-indexed images too but we don't
315         dc.crc.put(dc.chunkmeta[8..12]);  // type
316         switch (cast(char[]) dc.chunkmeta[8..12]) {    // chunk type
317             case "IDAT":
318                 if (! (stage == Stage.IHDR_parsed ||
319                       (stage == Stage.PLTE_parsed && dc.src_indexed)) )
320                     throw new ImageIOException("corrupt chunk stream");
321                 result = read_IDAT_stream(dc, len);
322                 stage = Stage.IDAT_parsed;
323                 break;
324             case "PLTE":
325                 if (stage != Stage.IHDR_parsed)
326                     throw new ImageIOException("corrupt chunk stream");
327                 int entries = len / 3;
328                 if (len % 3 != 0 || 256 < entries)
329                     throw new ImageIOException("corrupt chunk");
330                 dc.palette = new ubyte[len];
331                 dc.stream.readExact(dc.palette, dc.palette.length);
332                 dc.crc.put(dc.palette);
333                 dc.stream.readExact(dc.chunkmeta, 12); // crc | len, type
334                 ubyte[4] crc = dc.crc.finish;
335                 reverse(crc[]);
336                 if (crc != dc.chunkmeta[0..4])
337                     throw new ImageIOException("corrupt chunk");
338                 stage = Stage.PLTE_parsed;
339                 break;
340             case "IEND":
341                 if (stage != Stage.IDAT_parsed)
342                     throw new ImageIOException("corrupt chunk stream");
343                 dc.stream.readExact(dc.chunkmeta, 4); // crc
344                 if (len != 0 || dc.chunkmeta[0..4] != [0xae, 0x42, 0x60, 0x82])
345                     throw new ImageIOException("corrupt chunk");
346                 stage = Stage.IEND_parsed;
347                 break;
348             case "IHDR":
349                 throw new ImageIOException("corrupt chunk stream");
350             default:
351                 // unknown chunk, ignore but check crc
352                 while (0 < len) {
353                     size_t bytes = min(len, dc.read_buf.length);
354                     dc.stream.readExact(dc.read_buf, bytes);
355                     len -= bytes;
356                     dc.crc.put(dc.read_buf[0..bytes]);
357                 }
358                 dc.stream.readExact(dc.chunkmeta, 12); // crc | len, type
359                 ubyte[4] crc = dc.crc.finish;
360                 reverse(crc[]);
361                 if (crc != dc.chunkmeta[0..4])
362                     throw new ImageIOException("corrupt chunk");
363         }
364     }
365 
366     return result;
367 }
368 
369 enum PNG_ColorType : ubyte {
370     Y    = 0,
371     RGB  = 2,
372     Idx  = 3,
373     YA   = 4,
374     RGBA = 6,
375 }
376 
377 enum PNG_FilterType : ubyte {
378     None    = 0,
379     Sub     = 1,
380     Up      = 2,
381     Average = 3,
382     Paeth   = 4,
383 }
384 
385 enum InterlaceMethod {
386     None = 0, Adam7 = 1
387 }
388 
389 ubyte[] read_IDAT_stream(ref PNG_Decoder dc, int len) {
390     bool metaready = false;     // chunk len, type, crc
391 
392     immutable uint filter_step = dc.src_indexed ? 1 : dc.src_chans;
393     immutable size_t tgt_linesize = cast(size_t) (dc.w * dc.tgt_chans);
394 
395     ubyte[] depaletted_line = dc.src_indexed ? new ubyte[cast(size_t)dc.w * 3] : null;
396     ubyte[] result = new ubyte[cast(size_t)(dc.w * dc.h * dc.tgt_chans)];
397 
398     const LineConv chan_convert = get_converter(dc.src_chans, dc.tgt_chans);
399 
400     void depalette_convert(in ubyte[] src_line, ubyte[] tgt_line) {
401         for (size_t s, d;  s < src_line.length;  s+=1, d+=3) {
402             size_t pidx = src_line[s] * 3;
403             if (dc.palette.length < pidx + 3)
404                 throw new ImageIOException("palette idx wrong");
405             depaletted_line[d .. d+3] = dc.palette[pidx .. pidx+3];
406         }
407         chan_convert(depaletted_line[0 .. src_line.length*3], tgt_line);
408     }
409 
410     void simple_convert(in ubyte[] src_line, ubyte[] tgt_line) {
411         chan_convert(src_line, tgt_line);
412     }
413 
414     const convert = dc.src_indexed ? &depalette_convert : &simple_convert;
415 
416     if (dc.ilace == InterlaceMethod.None) {
417         immutable size_t src_sl_size = cast(size_t) dc.w * filter_step;
418         auto cline = new ubyte[src_sl_size+1];   // current line + filter byte
419         auto pline = new ubyte[src_sl_size+1];   // previous line, inited to 0
420         debug(DebugPNG) assert(pline[0] == 0);
421 
422         size_t tgt_si = 0;    // scanline index in target buffer
423         foreach (j; 0 .. dc.h) {
424             uncompress_line(dc, len, metaready, cline);
425             ubyte filter_type = cline[0];
426 
427             recon(cline[1..$], pline[1..$], filter_type, filter_step);
428             convert(cline[1 .. $], result[tgt_si .. tgt_si + tgt_linesize]);
429             tgt_si += tgt_linesize;
430 
431             ubyte[] _swap = pline;
432             pline = cline;
433             cline = _swap;
434         }
435     } else {
436         // Adam7 interlacing
437 
438         immutable size_t[7] redw = [
439             (dc.w + 7) / 8,
440             (dc.w + 3) / 8,
441             (dc.w + 3) / 4,
442             (dc.w + 1) / 4,
443             (dc.w + 1) / 2,
444             (dc.w + 0) / 2,
445             (dc.w + 0) / 1,
446         ];
447         immutable size_t[7] redh = [
448             (dc.h + 7) / 8,
449             (dc.h + 7) / 8,
450             (dc.h + 3) / 8,
451             (dc.h + 3) / 4,
452             (dc.h + 1) / 4,
453             (dc.h + 1) / 2,
454             (dc.h + 0) / 2,
455         ];
456 
457         const size_t max_scanline_size = cast(size_t) (dc.w * filter_step);
458         const linebuf0 = new ubyte[max_scanline_size+1]; // +1 for filter type byte
459         const linebuf1 = new ubyte[max_scanline_size+1]; // +1 for filter type byte
460         auto redlinebuf = new ubyte[cast(size_t) dc.w * dc.tgt_chans];
461 
462         foreach (pass; 0 .. 7) {
463             const A7_Catapult tgt_px = a7_catapults[pass];   // target pixel
464             const size_t src_linesize = redw[pass] * filter_step;
465             auto cline = cast(ubyte[]) linebuf0[0 .. src_linesize+1];
466             auto pline = cast(ubyte[]) linebuf1[0 .. src_linesize+1];
467 
468             foreach (j; 0 .. redh[pass]) {
469                 uncompress_line(dc, len, metaready, cline);
470                 ubyte filter_type = cline[0];
471 
472                 recon(cline[1..$], pline[1..$], filter_type, filter_step);
473                 convert(cline[1 .. $], redlinebuf[0 .. redw[pass]*dc.tgt_chans]);
474 
475                 for (size_t i, redi; i < redw[pass]; ++i, redi += dc.tgt_chans) {
476                     size_t tgt = tgt_px(i, j, dc.w) * dc.tgt_chans;
477                     result[tgt .. tgt + dc.tgt_chans] =
478                         redlinebuf[redi .. redi + dc.tgt_chans];
479                 }
480 
481                 ubyte[] _swap = pline;
482                 pline = cline;
483                 cline = _swap;
484             }
485         }
486     }
487 
488     if (!metaready) {
489         dc.stream.readExact(dc.chunkmeta, 12);   // crc | len & type
490         ubyte[4] crc = dc.crc.finish;
491         reverse(crc[]);
492         if (crc != dc.chunkmeta[0..4])
493             throw new ImageIOException("corrupt chunk");
494     }
495     return result;
496 }
497 
498 alias A7_Catapult = size_t function(size_t redx, size_t redy, size_t dstw);
499 immutable A7_Catapult[7] a7_catapults = [
500     &a7_red1_to_dst,
501     &a7_red2_to_dst,
502     &a7_red3_to_dst,
503     &a7_red4_to_dst,
504     &a7_red5_to_dst,
505     &a7_red6_to_dst,
506     &a7_red7_to_dst,
507 ];
508 
509 pure nothrow {
510   size_t a7_red1_to_dst(size_t redx, size_t redy, size_t dstw) { return redy*8*dstw + redx*8;     }
511   size_t a7_red2_to_dst(size_t redx, size_t redy, size_t dstw) { return redy*8*dstw + redx*8+4;   }
512   size_t a7_red3_to_dst(size_t redx, size_t redy, size_t dstw) { return (redy*8+4)*dstw + redx*4; }
513   size_t a7_red4_to_dst(size_t redx, size_t redy, size_t dstw) { return redy*4*dstw + redx*4+2;   }
514   size_t a7_red5_to_dst(size_t redx, size_t redy, size_t dstw) { return (redy*4+2)*dstw + redx*2; }
515   size_t a7_red6_to_dst(size_t redx, size_t redy, size_t dstw) { return redy*2*dstw + redx*2+1;   }
516   size_t a7_red7_to_dst(size_t redx, size_t redy, size_t dstw) { return (redy*2+1)*dstw + redx;   }
517 }
518 
519 void uncompress_line(ref PNG_Decoder dc, ref int length, ref bool metaready, ubyte[] dst) {
520     size_t readysize = min(dst.length, dc.uc_buf.length);
521     dst[0 .. readysize] = dc.uc_buf[0 .. readysize];
522     dc.uc_buf = dc.uc_buf[readysize .. $];
523 
524     if (readysize == dst.length)
525         return;
526 
527     while (readysize != dst.length) {
528         // need new data for dc.uc_buf...
529         if (length <= 0) {  // IDAT is read -> read next chunks meta
530             dc.stream.readExact(dc.chunkmeta, 12);   // crc | len & type
531             ubyte[4] crc = dc.crc.finish;
532             reverse(crc[]);
533             if (crc != dc.chunkmeta[0..4])
534                 throw new ImageIOException("corrupt chunk");
535 
536             length = bigEndianToNative!int(dc.chunkmeta[4..8]);
537             if (dc.chunkmeta[8..12] != "IDAT") {
538                 // no new IDAT chunk so flush, this is the end of the IDAT stream
539                 metaready = true;
540                 dc.uc_buf = cast(ubyte[]) dc.uc.flush();
541                 size_t part2 = dst.length - readysize;
542                 if (dc.uc_buf.length < part2)
543                     throw new ImageIOException("not enough data");
544                 dst[readysize .. readysize+part2] = dc.uc_buf[0 .. part2];
545                 dc.uc_buf = dc.uc_buf[part2 .. $];
546                 return;
547             }
548             if (length <= 0)    // empty IDAT chunk
549                 throw new ImageIOException("not enough data");
550             dc.crc.put(dc.chunkmeta[8..12]);  // type
551         }
552 
553         size_t bytes = min(length, dc.read_buf.length);
554         dc.stream.readExact(dc.read_buf, bytes);
555         length -= bytes;
556         dc.crc.put(dc.read_buf[0..bytes]);
557 
558         if (bytes <= 0)
559             throw new ImageIOException("not enough data");
560 
561         dc.uc_buf = cast(ubyte[]) dc.uc.uncompress(dc.read_buf[0..bytes].dup);
562 
563         size_t part2 = min(dst.length - readysize, dc.uc_buf.length);
564         dst[readysize .. readysize+part2] = dc.uc_buf[0 .. part2];
565         dc.uc_buf = dc.uc_buf[part2 .. $];
566         readysize += part2;
567     }
568 }
569 
570 void recon(ubyte[] cline, in ubyte[] pline, ubyte ftype, int fstep) pure {
571     switch (ftype) with (PNG_FilterType) {
572         case None:
573             break;
574         case Sub:
575             foreach (k; fstep .. cline.length)
576                 cline[k] += cline[k-fstep];
577             break;
578         case Up:
579             foreach (k; 0 .. cline.length)
580                 cline[k] += pline[k];
581             break;
582         case Average:
583             foreach (k; 0 .. fstep)
584                 cline[k] += pline[k] / 2;
585             foreach (k; fstep .. cline.length)
586                 cline[k] += cast(ubyte)
587                     ((cast(uint) cline[k-fstep] + cast(uint) pline[k]) / 2);
588             break;
589         case Paeth:
590             foreach (i; 0 .. fstep)
591                 cline[i] += paeth(0, pline[i], 0);
592             foreach (i; fstep .. cline.length)
593                 cline[i] += paeth(cline[i-fstep], pline[i], pline[i-fstep]);
594             break;
595         default:
596             throw new ImageIOException("filter type not supported");
597     }
598 }
599 
600 ubyte paeth(ubyte a, ubyte b, ubyte c) pure nothrow {
601     int pc = cast(int) c;
602     int pa = cast(int) b - pc;
603     int pb = cast(int) a - pc;
604     pc = pa + pb;
605     if (pa < 0) pa = -pa;
606     if (pb < 0) pb = -pb;
607     if (pc < 0) pc = -pc;
608 
609     if (pa <= pb && pa <= pc) {
610         return a;
611     } else if (pb <= pc) {
612         return b;
613     }
614     return c;
615 }
616 
617 // ----------------------------------------------------------------------
618 // PNG encoder
619 
620 void write_png(Writer stream, long w, long h, in ubyte[] data, long tgt_chans = 0) {
621     if (w < 1 || h < 1 || int.max < w || int.max < h)
622         throw new ImageIOException("invalid dimensions");
623     uint src_chans = cast(uint) (data.length / w / h);
624     if (src_chans < 1 || 4 < src_chans || tgt_chans < 0 || 4 < tgt_chans)
625         throw new ImageIOException("invalid channel count");
626     if (src_chans * w * h != data.length)
627         throw new ImageIOException("mismatching dimensions and length");
628 
629     PNG_Encoder ec = {
630         stream    : stream,
631         w         : cast(size_t) w,
632         h         : cast(size_t) h,
633         src_chans : src_chans,
634         tgt_chans : tgt_chans ? cast(uint) tgt_chans : src_chans,
635         data      : data,
636     };
637 
638     write_png(ec);
639     stream.flush();
640 }
641 
642 struct PNG_Encoder {
643     Writer stream;
644     size_t w, h;
645     uint src_chans;
646     uint tgt_chans;
647     const(ubyte)[] data;
648 
649     CRC32 crc;
650 
651     uint writelen;      // how much written of current idat data
652     ubyte[] chunk_buf;  // len type data crc
653     ubyte[] data_buf;   // slice of chunk_buf, for just chunk data
654 }
655 
656 void write_png(ref PNG_Encoder ec) {
657     ubyte[33] hdr = void;
658     hdr[ 0 ..  8] = png_file_header;
659     hdr[ 8 .. 16] = [0x0, 0x0, 0x0, 0xd, 'I','H','D','R'];
660     hdr[16 .. 20] = nativeToBigEndian(cast(uint) ec.w);
661     hdr[20 .. 24] = nativeToBigEndian(cast(uint) ec.h);
662     hdr[24      ] = 8;  // bit depth
663     hdr[25      ] = color_type(ec.tgt_chans);
664     hdr[26 .. 29] = 0;  // compression, filter and interlace methods
665     ec.crc.start();
666     ec.crc.put(hdr[12 .. 29]);
667     ubyte[4] crc = ec.crc.finish();
668     reverse(crc[]);
669     hdr[29 .. 33] = crc;
670     ec.stream.rawWrite(hdr);
671 
672     write_IDATs(ec);
673 
674     static immutable ubyte[12] iend =
675         [0, 0, 0, 0, 'I','E','N','D', 0xae, 0x42, 0x60, 0x82];
676     ec.stream.rawWrite(iend);
677 }
678 
679 void write_IDATs(ref PNG_Encoder ec) {
680     static immutable ubyte[4] IDAT_type = ['I','D','A','T'];
681     immutable long max_idatlen = 4 * 4096;
682     ec.writelen = 0;
683     ec.chunk_buf = new ubyte[8 + max_idatlen + 4];
684     ec.data_buf = ec.chunk_buf[8 .. 8 + max_idatlen];
685     ec.chunk_buf[4 .. 8] = IDAT_type;
686 
687     immutable size_t linesize = ec.w * ec.tgt_chans + 1; // +1 for filter type
688     ubyte[] cline = new ubyte[linesize];
689     ubyte[] pline = new ubyte[linesize];
690     debug(DebugPNG) assert(pline[0] == 0);
691 
692     ubyte[] filtered_line = new ubyte[linesize];
693     ubyte[] filtered_image;
694 
695     const LineConv convert = get_converter(ec.src_chans, ec.tgt_chans);
696 
697     immutable size_t filter_step = ec.tgt_chans;   // step between pixels, in bytes
698     immutable size_t src_linesize = ec.w * ec.src_chans;
699 
700     size_t si = 0;
701     foreach (j; 0 .. ec.h) {
702         convert(ec.data[si .. si+src_linesize], cline[1..$]);
703         si += src_linesize;
704 
705         foreach (i; 1 .. filter_step+1)
706             filtered_line[i] = cast(ubyte) (cline[i] - paeth(0, pline[i], 0));
707         foreach (i; filter_step+1 .. cline.length)
708             filtered_line[i] = cast(ubyte)
709                 (cline[i] - paeth(cline[i-filter_step], pline[i], pline[i-filter_step]));
710 
711         filtered_line[0] = PNG_FilterType.Paeth;
712 
713         filtered_image ~= filtered_line;
714 
715         ubyte[] _swap = pline;
716         pline = cline;
717         cline = _swap;
718     }
719 
720     const (void)[] xx = compress(filtered_image, 6);
721 
722     ec.write_to_IDAT_stream(xx);
723     if (0 < ec.writelen)
724         ec.write_IDAT_chunk();
725 }
726 
727 void write_to_IDAT_stream(ref PNG_Encoder ec, in void[] _compressed) {
728     ubyte[] compressed = cast(ubyte[]) _compressed;
729     while (compressed.length) {
730         size_t space_left = ec.data_buf.length - ec.writelen;
731         size_t writenow_len = min(space_left, compressed.length);
732         ec.data_buf[ec.writelen .. ec.writelen + writenow_len] =
733             compressed[0 .. writenow_len];
734         ec.writelen += writenow_len;
735         compressed = compressed[writenow_len .. $];
736         if (ec.writelen == ec.data_buf.length)
737             ec.write_IDAT_chunk();
738     }
739 }
740 
741 // chunk: len type data crc, type is already in buf
742 void write_IDAT_chunk(ref PNG_Encoder ec) {
743     ec.chunk_buf[0 .. 4] = nativeToBigEndian!uint(ec.writelen);
744     ec.crc.put(ec.chunk_buf[4 .. 8 + ec.writelen]);   // crc of type and data
745     ubyte[4] crc = ec.crc.finish();
746     reverse(crc[]);
747     ec.chunk_buf[8 + ec.writelen .. 8 + ec.writelen + 4] = crc;
748     ec.stream.rawWrite(ec.chunk_buf[0 .. 8 + ec.writelen + 4]);
749     ec.writelen = 0;
750 }
751 
752 ///
753 public void read_png_info(in char[] filename, out long w, out long h, out long chans) {
754     scope reader = new FileReader(filename);
755     return read_png_info(reader, w, h, chans);
756 }
757 
758 void read_png_info(Reader stream, out long w, out long h, out long chans) {
759     PNG_Header hdr = read_png_header(stream);
760     w = hdr.width;
761     h = hdr.height;
762     chans = channels(cast(PNG_ColorType) hdr.color_type);
763 }
764 
765 // --------------------------------------------------------------------------------
766 // TGA
767 
768 ///
769 public struct TGA_Header {
770    ubyte id_length;
771    ubyte palette_type;
772    ubyte data_type;
773    ushort palette_start;
774    ushort palette_length;
775    ubyte palette_bits;
776    ushort x_origin;
777    ushort y_origin;
778    ushort width;
779    ushort height;
780    ubyte bits_pp;
781    ubyte flags;
782 }
783 
784 ///
785 public TGA_Header read_tga_header(in char[] filename) {
786     scope reader = new FileReader(filename);
787     return read_tga_header(reader);
788 }
789 
790 TGA_Header read_tga_header(Reader stream) {
791     ubyte[18] tmp = void;
792     stream.readExact(tmp, tmp.length);
793 
794     TGA_Header hdr = {
795         id_length       : tmp[0],
796         palette_type    : tmp[1],
797         data_type       : tmp[2],
798         palette_start   : littleEndianToNative!ushort(tmp[3..5]),
799         palette_length  : littleEndianToNative!ushort(tmp[5..7]),
800         palette_bits    : tmp[7],
801         x_origin        : littleEndianToNative!ushort(tmp[8..10]),
802         y_origin        : littleEndianToNative!ushort(tmp[10..12]),
803         width           : littleEndianToNative!ushort(tmp[12..14]),
804         height          : littleEndianToNative!ushort(tmp[14..16]),
805         bits_pp         : tmp[16],
806         flags           : tmp[17],
807     };
808 
809     if (hdr.width < 1 || hdr.height < 1 || hdr.palette_type > 1
810         || (hdr.palette_type == 0 && (hdr.palette_start
811                                      || hdr.palette_length
812                                      || hdr.palette_bits))
813         || (4 <= hdr.data_type && hdr.data_type <= 8) || 12 <= hdr.data_type)
814         throw new ImageIOException("corrupt TGA header");
815 
816     return hdr;
817 }
818 
819 ///
820 public IFImage read_tga(in char[] filename, long req_chans = 0) {
821     scope reader = new FileReader(filename);
822     return read_tga(reader, req_chans);
823 }
824 
825 ///
826 public IFImage read_tga_from_mem(in ubyte[] source, long req_chans = 0) {
827     scope reader = new MemReader(source);
828     return read_tga(reader, req_chans);
829 }
830 
831 IFImage read_tga(Reader stream, long req_chans = 0) {
832     if (req_chans < 0 || 4 < req_chans)
833         throw new ImageIOException("come on...");
834 
835     TGA_Header hdr = read_tga_header(stream);
836 
837     if (hdr.width < 1 || hdr.height < 1)
838         throw new ImageIOException("invalid dimensions");
839     if (hdr.flags & 0xc0)   // two bits
840         throw new ImageIOException("interlaced TGAs not supported");
841     if (hdr.flags & 0x10)
842         throw new ImageIOException("right-to-left TGAs not supported");
843     ubyte attr_bits_pp = (hdr.flags & 0xf);
844     if (! (attr_bits_pp == 0 || attr_bits_pp == 8)) // some set it 0 although data has 8
845         throw new ImageIOException("only 8-bit alpha/attribute(s) supported");
846     if (hdr.palette_type)
847         throw new ImageIOException("paletted TGAs not supported");
848 
849     bool rle = false;
850     switch (hdr.data_type) with (TGA_DataType) {
851         //case 1: ;   // paletted, uncompressed
852         case TrueColor:
853             if (! (hdr.bits_pp == 24 || hdr.bits_pp == 32))
854                 throw new ImageIOException("not supported");
855             break;
856         case Gray:
857             if (! (hdr.bits_pp == 8 || (hdr.bits_pp == 16 && attr_bits_pp == 8)))
858                 throw new ImageIOException("not supported");
859             break;
860         //case 9: ;   // paletted, RLE
861         case TrueColor_RLE:
862             if (! (hdr.bits_pp == 24 || hdr.bits_pp == 32))
863                 throw new ImageIOException("not supported");
864             rle = true;
865             break;
866         case Gray_RLE:
867             if (! (hdr.bits_pp == 8 || (hdr.bits_pp == 16 && attr_bits_pp == 8)))
868                 throw new ImageIOException("not supported");
869             rle = true;
870             break;
871         default: throw new ImageIOException("data type not supported");
872     }
873 
874     int src_chans = hdr.bits_pp / 8;
875 
876     if (hdr.id_length)
877         stream.seek(hdr.id_length, SEEK_CUR);
878 
879     TGA_Decoder dc = {
880         stream         : stream,
881         w              : hdr.width,
882         h              : hdr.height,
883         origin_at_top  : cast(bool) (hdr.flags & 0x20),
884         bytes_pp       : hdr.bits_pp / 8,
885         rle            : rle,
886         tgt_chans      : (req_chans == 0) ? src_chans : cast(int) req_chans,
887     };
888 
889     switch (dc.bytes_pp) {
890         case 1: dc.src_fmt = _ColFmt.Y; break;
891         case 2: dc.src_fmt = _ColFmt.YA; break;
892         case 3: dc.src_fmt = _ColFmt.BGR; break;
893         case 4: dc.src_fmt = _ColFmt.BGRA; break;
894         default: throw new ImageIOException("TGA: format not supported");
895     }
896 
897     IFImage result = {
898         w      : dc.w,
899         h      : dc.h,
900         c      : cast(ColFmt) dc.tgt_chans,
901         pixels : decode_tga(dc),
902     };
903     return result;
904 }
905 
906 ///
907 public void write_tga(in char[] file, long w, long h, in ubyte[] data, long tgt_chans = 0)
908 {
909     scope writer = new FileWriter(file);
910     write_tga(writer, w, h, data, tgt_chans);
911 }
912 
913 ///
914 public ubyte[] write_tga_to_mem(long w, long h, in ubyte[] data, long tgt_chans = 0) {
915     scope writer = new MemWriter();
916     write_tga(writer, w, h, data, tgt_chans);
917     return writer.result;
918 }
919 
920 void write_tga(Writer stream, long w, long h, in ubyte[] data, long tgt_chans = 0) {
921     if (w < 1 || h < 1 || ushort.max < w || ushort.max < h)
922         throw new ImageIOException("invalid dimensions");
923     ulong src_chans = data.length / w / h;
924     if (src_chans < 1 || 4 < src_chans || tgt_chans < 0 || 4 < tgt_chans)
925         throw new ImageIOException("invalid channel count");
926     if (src_chans * w * h != data.length)
927         throw new ImageIOException("mismatching dimensions and length");
928 
929     TGA_Encoder ec = {
930         stream    : stream,
931         w         : cast(ushort) w,
932         h         : cast(ushort) h,
933         src_chans : cast(int) src_chans,
934         tgt_chans : cast(int) ((tgt_chans) ? tgt_chans : src_chans),
935         rle       : true,
936         data      : data,
937     };
938 
939     write_tga(ec);
940     stream.flush();
941 }
942 
943 struct TGA_Decoder {
944     Reader stream;
945     size_t w, h;
946     bool origin_at_top;    // src
947     uint bytes_pp;
948     bool rle;   // run length compressed
949     _ColFmt src_fmt;
950     uint tgt_chans;
951 }
952 
953 ubyte[] decode_tga(ref TGA_Decoder dc) {
954     auto result = new ubyte[dc.w * dc.h * dc.tgt_chans];
955 
956     immutable size_t tgt_linesize = dc.w * dc.tgt_chans;
957     immutable size_t src_linesize = dc.w * dc.bytes_pp;
958     auto src_line = new ubyte[src_linesize];
959 
960     immutable ptrdiff_t tgt_stride = (dc.origin_at_top) ? tgt_linesize : -tgt_linesize;
961     ptrdiff_t ti                   = (dc.origin_at_top) ? 0 : (dc.h-1) * tgt_linesize;
962 
963     const LineConv convert = get_converter(dc.src_fmt, dc.tgt_chans);
964 
965     if (!dc.rle) {
966         foreach (_j; 0 .. dc.h) {
967             dc.stream.readExact(src_line, src_linesize);
968             convert(src_line, result[ti .. ti + tgt_linesize]);
969             ti += tgt_stride;
970         }
971         return result;
972     }
973 
974     // ----- RLE  -----
975 
976     auto rbuf = new ubyte[src_linesize];
977     size_t plen = 0;      // packet length
978     bool its_rle = false;
979 
980     foreach (_j; 0 .. dc.h) {
981         // fill src_line with uncompressed data (this works like a stream)
982         size_t wanted = src_linesize;
983         while (wanted) {
984             if (plen == 0) {
985                 dc.stream.readExact(rbuf, 1);
986                 its_rle = cast(bool) (rbuf[0] & 0x80);
987                 plen = ((rbuf[0] & 0x7f) + 1) * dc.bytes_pp; // length in bytes
988             }
989             const size_t gotten = src_linesize - wanted;
990             const size_t copysize = min(plen, wanted);
991             if (its_rle) {
992                 dc.stream.readExact(rbuf, dc.bytes_pp);
993                 for (size_t p = gotten; p < gotten+copysize; p += dc.bytes_pp)
994                     src_line[p .. p+dc.bytes_pp] = rbuf[0 .. dc.bytes_pp];
995             } else {    // it's raw
996                 auto slice = src_line[gotten .. gotten+copysize];
997                 dc.stream.readExact(slice, copysize);
998             }
999             wanted -= copysize;
1000             plen -= copysize;
1001         }
1002 
1003         convert(src_line, result[ti .. ti + tgt_linesize]);
1004         ti += tgt_stride;
1005     }
1006 
1007     return result;
1008 }
1009 
1010 // ----------------------------------------------------------------------
1011 // TGA encoder
1012 
1013 immutable ubyte[18] tga_footer_sig =
1014     ['T','R','U','E','V','I','S','I','O','N','-','X','F','I','L','E','.', 0];
1015 
1016 struct TGA_Encoder {
1017     Writer stream;
1018     ushort w, h;
1019     int src_chans;
1020     int tgt_chans;
1021     bool rle;   // run length compression
1022     const(ubyte)[] data;
1023 }
1024 
1025 void write_tga(ref TGA_Encoder ec) {
1026     ubyte data_type;
1027     bool has_alpha = false;
1028     switch (ec.tgt_chans) with (TGA_DataType) {
1029         case 1: data_type = ec.rle ? Gray_RLE : Gray;                             break;
1030         case 2: data_type = ec.rle ? Gray_RLE : Gray;           has_alpha = true; break;
1031         case 3: data_type = ec.rle ? TrueColor_RLE : TrueColor;                   break;
1032         case 4: data_type = ec.rle ? TrueColor_RLE : TrueColor; has_alpha = true; break;
1033         default: throw new ImageIOException("internal error");
1034     }
1035 
1036     ubyte[18] hdr = void;
1037     hdr[0] = 0;         // id length
1038     hdr[1] = 0;         // palette type
1039     hdr[2] = data_type;
1040     hdr[3..8] = 0;         // palette start (2), len (2), bits per palette entry (1)
1041     hdr[8..12] = 0;     // x origin (2), y origin (2)
1042     hdr[12..14] = nativeToLittleEndian(ec.w);
1043     hdr[14..16] = nativeToLittleEndian(ec.h);
1044     hdr[16] = cast(ubyte) (ec.tgt_chans * 8);     // bits per pixel
1045     hdr[17] = (has_alpha) ? 0x8 : 0x0;     // flags: attr_bits_pp = 8
1046     ec.stream.rawWrite(hdr);
1047 
1048     write_image_data(ec);
1049 
1050     ubyte[26] ftr = void;
1051     ftr[0..4] = 0;   // extension area offset
1052     ftr[4..8] = 0;   // developer directory offset
1053     ftr[8..26] = tga_footer_sig;
1054     ec.stream.rawWrite(ftr);
1055 }
1056 
1057 void write_image_data(ref TGA_Encoder ec) {
1058     _ColFmt tgt_fmt;
1059     switch (ec.tgt_chans) {
1060         case 1: tgt_fmt = _ColFmt.Y; break;
1061         case 2: tgt_fmt = _ColFmt.YA; break;
1062         case 3: tgt_fmt = _ColFmt.BGR; break;
1063         case 4: tgt_fmt = _ColFmt.BGRA; break;
1064         default: throw new ImageIOException("internal error");
1065     }
1066 
1067     const LineConv convert = get_converter(ec.src_chans, tgt_fmt);
1068 
1069     immutable size_t src_linesize = ec.w * ec.src_chans;
1070     immutable size_t tgt_linesize = ec.w * ec.tgt_chans;
1071     auto tgt_line = new ubyte[tgt_linesize];
1072 
1073     ptrdiff_t si = (ec.h-1) * src_linesize;     // origin at bottom
1074 
1075     if (!ec.rle) {
1076         foreach (_; 0 .. ec.h) {
1077             convert(ec.data[si .. si + src_linesize], tgt_line);
1078             ec.stream.rawWrite(tgt_line);
1079             si -= src_linesize; // origin at bottom
1080         }
1081         return;
1082     }
1083 
1084     // ----- RLE  -----
1085 
1086     immutable bytes_pp = ec.tgt_chans;
1087     immutable size_t max_packets_per_line = (tgt_linesize+127) / 128;
1088     auto tgt_cmp = new ubyte[tgt_linesize + max_packets_per_line];  // compressed line
1089     foreach (_; 0 .. ec.h) {
1090         convert(ec.data[si .. si + src_linesize], tgt_line);
1091         ubyte[] compressed_line = rle_compress(tgt_line, tgt_cmp, ec.w, bytes_pp);
1092         ec.stream.rawWrite(compressed_line);
1093         si -= src_linesize; // origin at bottom
1094     }
1095 }
1096 
1097 ubyte[] rle_compress(in ubyte[] line, ubyte[] tgt_cmp, in size_t w, in int bytes_pp) pure {
1098     immutable int rle_limit = (1 < bytes_pp) ? 2 : 3;  // run len worth an RLE packet
1099     size_t runlen = 0;
1100     size_t rawlen = 0;
1101     size_t raw_i = 0; // start of raw packet data in line
1102     size_t cmp_i = 0;
1103     size_t pixels_left = w;
1104     const (ubyte)[] px;
1105     for (size_t i = bytes_pp; pixels_left; i += bytes_pp) {
1106         runlen = 1;
1107         px = line[i-bytes_pp .. i];
1108         while (i < line.length && line[i .. i+bytes_pp] == px[0..$] && runlen < 128) {
1109             ++runlen;
1110             i += bytes_pp;
1111         }
1112         pixels_left -= runlen;
1113 
1114         if (runlen < rle_limit) {
1115             // data goes to raw packet
1116             rawlen += runlen;
1117             if (128 <= rawlen) {     // full packet, need to store it
1118                 size_t copysize = 128 * bytes_pp;
1119                 tgt_cmp[cmp_i++] = 0x7f; // raw packet header
1120                 tgt_cmp[cmp_i .. cmp_i+copysize] = line[raw_i .. raw_i+copysize];
1121                 cmp_i += copysize;
1122                 raw_i += copysize;
1123                 rawlen -= 128;
1124             }
1125         } else {
1126             // RLE packet is worth it
1127 
1128             // store raw packet first, if any
1129             if (rawlen) {
1130                 assert(rawlen < 128);
1131                 size_t copysize = rawlen * bytes_pp;
1132                 tgt_cmp[cmp_i++] = cast(ubyte) (rawlen-1); // raw packet header
1133                 tgt_cmp[cmp_i .. cmp_i+copysize] = line[raw_i .. raw_i+copysize];
1134                 cmp_i += copysize;
1135                 rawlen = 0;
1136             }
1137 
1138             // store RLE packet
1139             tgt_cmp[cmp_i++] = cast(ubyte) (0x80 | (runlen-1)); // packet header
1140             tgt_cmp[cmp_i .. cmp_i+bytes_pp] = px[0..$];       // packet data
1141             cmp_i += bytes_pp;
1142             raw_i = i;
1143         }
1144     }   // for
1145 
1146     if (rawlen) {   // last packet of the line
1147         size_t copysize = rawlen * bytes_pp;
1148         tgt_cmp[cmp_i++] = cast(ubyte) (rawlen-1); // raw packet header
1149         tgt_cmp[cmp_i .. cmp_i+copysize] = line[raw_i .. raw_i+copysize];
1150         cmp_i += copysize;
1151     }
1152     return tgt_cmp[0 .. cmp_i];
1153 }
1154 
1155 enum TGA_DataType : ubyte {
1156     Idx           = 1,
1157     TrueColor     = 2,
1158     Gray          = 3,
1159     Idx_RLE       = 9,
1160     TrueColor_RLE = 10,
1161     Gray_RLE      = 11,
1162 }
1163 
1164 ///
1165 public void read_tga_info(in char[] filename, out long w, out long h, out long chans) {
1166     scope reader = new FileReader(filename);
1167     return read_tga_info(reader, w, h, chans);
1168 }
1169 
1170 void read_tga_info(Reader stream, out long w, out long h, out long chans) {
1171     TGA_Header hdr = read_tga_header(stream);
1172     w = hdr.width;
1173     h = hdr.height;
1174 
1175     // TGA is awkward...
1176     auto dt = hdr.data_type;
1177     if ((dt == TGA_DataType.TrueColor     || dt == TGA_DataType.Gray ||
1178          dt == TGA_DataType.TrueColor_RLE || dt == TGA_DataType.Gray_RLE)
1179          && (hdr.bits_pp % 8) == 0)
1180     {
1181         chans = hdr.bits_pp / 8;
1182         return;
1183     } else if (dt == TGA_DataType.Idx || dt == TGA_DataType.Idx_RLE) {
1184         switch (hdr.palette_bits) {
1185             case 15: chans = 3; return;
1186             case 16: chans = 3; return; // one bit could be for some "interrupt control"
1187             case 24: chans = 3; return;
1188             case 32: chans = 4; return;
1189             default:
1190         }
1191     }
1192     chans = 0;  // unknown
1193 }
1194 
1195 // --------------------------------------------------------------------------------
1196 // BMP
1197 
1198 ///
1199 public IFImage read_bmp(in char[] filename, long req_chans = 0) {
1200     scope reader = new FileReader(filename);
1201     return read_bmp(reader, req_chans);
1202 }
1203 
1204 ///
1205 public IFImage read_bmp_from_mem(in ubyte[] source, long req_chans = 0) {
1206     scope reader = new MemReader(source);
1207     return read_bmp(reader, req_chans);
1208 }
1209 
1210 ///
1211 public BMP_Header read_bmp_header(in char[] filename) {
1212     scope reader = new FileReader(filename);
1213     return read_bmp_header(reader);
1214 }
1215 
1216 ///
1217 public struct BMP_Header {
1218     size_t file_size;
1219     size_t pixel_data_offset;
1220 
1221     size_t dib_size;
1222     ptrdiff_t width;
1223     ptrdiff_t height;
1224     ushort planes;
1225     size_t bits_pp;
1226     uint dib_version;
1227     DibV1 dib_v1;
1228     DibV2 dib_v2;
1229     uint dib_v3_alpha_mask;
1230     DibV4 dib_v4;
1231     DibV5 dib_v5;
1232 }
1233 
1234 /// Part of BMP header, not always present.
1235 public struct DibV1 {
1236     uint compression;
1237     size_t idat_size;
1238     size_t pixels_per_meter_x;
1239     size_t pixels_per_meter_y;
1240     size_t palette_length;
1241     uint important_color_count;
1242 }
1243 
1244 /// Part of BMP header, not always present.
1245 public struct DibV2 {
1246     uint red_mask;
1247     uint green_mask;
1248     uint blue_mask;
1249 }
1250 
1251 /// Part of BMP header, not always present.
1252 public struct DibV4 {
1253     uint color_space_type;
1254     ubyte[36] color_space_endpoints;
1255     uint gamma_red;
1256     uint gamma_green;
1257     uint gamma_blue;
1258 }
1259 
1260 /// Part of BMP header, not always present.
1261 public struct DibV5 {
1262     uint icc_profile_data;
1263     uint icc_profile_size;
1264 }
1265 
1266 BMP_Header read_bmp_header(Reader stream) {
1267     ubyte[18] tmp = void;  // bmp header + size of dib header
1268     stream.readExact(tmp[], tmp.length);
1269 
1270     if (tmp[0..2] != ['B', 'M'])
1271         throw new ImageIOException("corrupt header");
1272 
1273     size_t dib_size = littleEndianToNative!uint(tmp[14..18]);
1274     uint dib_version;
1275     switch (dib_size) {
1276         case 12: dib_version = 0; break;
1277         case 40: dib_version = 1; break;
1278         case 52: dib_version = 2; break;
1279         case 56: dib_version = 3; break;
1280         case 108: dib_version = 4; break;
1281         case 124: dib_version = 5; break;
1282         default: throw new ImageIOException("unsupported dib version");
1283     }
1284     auto dib_header = new ubyte[dib_size-4];
1285     stream.readExact(dib_header[], dib_header.length);
1286 
1287     DibV1 dib_v1;
1288     DibV2 dib_v2;
1289     uint dib_v3_alpha_mask;
1290     DibV4 dib_v4;
1291     DibV5 dib_v5;
1292 
1293     if (1 <= dib_version) {
1294         DibV1 v1 = {
1295             compression           : littleEndianToNative!uint(dib_header[12..16]),
1296             idat_size             : cast(size_t) littleEndianToNative!uint(dib_header[16..20]),
1297             pixels_per_meter_x    : cast(size_t) littleEndianToNative!uint(dib_header[20..24]),
1298             pixels_per_meter_y    : cast(size_t) littleEndianToNative!uint(dib_header[24..28]),
1299             palette_length        : cast(size_t) littleEndianToNative!uint(dib_header[28..32]),
1300             important_color_count : littleEndianToNative!uint(dib_header[32..36]),
1301         };
1302         dib_v1 = v1;
1303     }
1304 
1305     if (2 <= dib_version) {
1306         DibV2 v2 = {
1307             red_mask              : littleEndianToNative!uint(dib_header[36..40]),
1308             green_mask            : littleEndianToNative!uint(dib_header[40..44]),
1309             blue_mask             : littleEndianToNative!uint(dib_header[44..48]),
1310         };
1311         dib_v2 = v2;
1312     }
1313 
1314     if (3 <= dib_version) {
1315         dib_v3_alpha_mask = littleEndianToNative!uint(dib_header[48..52]);
1316     }
1317 
1318     if (4 <= dib_version) {
1319         DibV4 v4 = {
1320             color_space_type      : littleEndianToNative!uint(dib_header[52..56]),
1321             color_space_endpoints : dib_header[56..92],
1322             gamma_red             : littleEndianToNative!uint(dib_header[92..96]),
1323             gamma_green           : littleEndianToNative!uint(dib_header[96..100]),
1324             gamma_blue            : littleEndianToNative!uint(dib_header[100..104]),
1325         };
1326         dib_v4 = v4;
1327     }
1328 
1329     if (5 <= dib_version) {
1330         DibV5 v5 = {
1331             icc_profile_data      : littleEndianToNative!uint(dib_header[108..112]),
1332             icc_profile_size      : littleEndianToNative!uint(dib_header[112..116]),
1333         };
1334         dib_v5 = v5;
1335     }
1336 
1337     ptrdiff_t width, height; ushort planes; size_t bits_pp;
1338     if (0 == dib_version) {
1339         width = littleEndianToNative!ushort(dib_header[0..2]);
1340         height = littleEndianToNative!ushort(dib_header[2..4]);
1341         planes = littleEndianToNative!ushort(dib_header[4..6]);
1342         bits_pp = cast(size_t) littleEndianToNative!ushort(dib_header[6..8]);
1343     } else {
1344         width = littleEndianToNative!int(dib_header[0..4]);
1345         height = littleEndianToNative!int(dib_header[4..8]);
1346         planes = littleEndianToNative!ushort(dib_header[8..10]);
1347         bits_pp = cast(size_t) littleEndianToNative!ushort(dib_header[10..12]);
1348     }
1349 
1350     BMP_Header header = {
1351         file_size             : cast(size_t) littleEndianToNative!uint(tmp[2..6]),
1352         pixel_data_offset     : cast(size_t) littleEndianToNative!uint(tmp[10..14]),
1353         width                 : width,
1354         height                : height,
1355         planes                : planes,
1356         bits_pp               : bits_pp,
1357         dib_version           : dib_version,
1358         dib_v1                : dib_v1,
1359         dib_v2                : dib_v2,
1360         dib_v3_alpha_mask     : dib_v3_alpha_mask,
1361         dib_v4                : dib_v4,
1362         dib_v5                : dib_v5,
1363     };
1364     return header;
1365 }
1366 
1367 enum CMP_RGB  = 0;
1368 enum CMP_BITS = 3;
1369 
1370 IFImage read_bmp(Reader stream, long req_chans = 0) {
1371     if (req_chans < 0 || 4 < req_chans)
1372         throw new ImageIOException("unknown color format");
1373 
1374     BMP_Header hdr = read_bmp_header(stream);
1375 
1376     if (hdr.width < 1 || hdr.height == 0) { throw new ImageIOException("invalid dimensions"); }
1377     if (hdr.pixel_data_offset < (14 + hdr.dib_size)
1378     || hdr.pixel_data_offset > 0xffffff /* arbitrary */) {
1379         throw new ImageIOException("invalid pixel data offset");
1380     }
1381     if (hdr.planes != 1) { throw new ImageIOException("not supported"); }
1382 
1383     auto bytes_pp       = 1;
1384     bool paletted       = true;
1385     size_t palette_length = 256;
1386     bool rgb_masked     = false;
1387     auto pe_bytes_pp    = 3;
1388 
1389     if (1 <= hdr.dib_version) {
1390         if (256 < hdr.dib_v1.palette_length)
1391             throw new ImageIOException("ivnalid palette length");
1392         if (hdr.bits_pp <= 8 &&
1393            (hdr.dib_v1.palette_length == 0 || hdr.dib_v1.compression != CMP_RGB))
1394              throw new ImageIOException("invalid format");
1395         if (hdr.dib_v1.compression != CMP_RGB && hdr.dib_v1.compression != CMP_BITS)
1396              throw new ImageIOException("unsupported compression");
1397 
1398         switch (hdr.bits_pp) {
1399             case 8  : bytes_pp = 1; paletted = true; break;
1400             case 24 : bytes_pp = 3; paletted = false; break;
1401             case 32 : bytes_pp = 4; paletted = false; break;
1402             default: throw new ImageIOException("not supported");
1403         }
1404 
1405         palette_length = hdr.dib_v1.palette_length;
1406         rgb_masked = hdr.dib_v1.compression == CMP_BITS;
1407         pe_bytes_pp = 4;
1408     }
1409 
1410     size_t mask_to_idx(uint mask) {
1411         switch (mask) {
1412             case 0xff00_0000: return 3;
1413             case 0x00ff_0000: return 2;
1414             case 0x0000_ff00: return 1;
1415             case 0x0000_00ff: return 0;
1416             default: throw new ImageIOException("unsupported mask");
1417         }
1418     }
1419 
1420     size_t redi = 2;
1421     size_t greeni = 1;
1422     size_t bluei = 0;
1423     if (rgb_masked) {
1424         if (hdr.dib_version < 2)
1425             throw new ImageIOException("invalid format");
1426         redi = mask_to_idx(hdr.dib_v2.red_mask);
1427         greeni = mask_to_idx(hdr.dib_v2.green_mask);
1428         bluei = mask_to_idx(hdr.dib_v2.blue_mask);
1429     }
1430 
1431     bool alpha_masked = false;
1432     size_t alphai = 0;
1433     if (3 <= hdr.dib_version && hdr.dib_v3_alpha_mask != 0) {
1434         alpha_masked = true;
1435         alphai = mask_to_idx(hdr.dib_v3_alpha_mask);
1436     }
1437 
1438     ubyte[] depaletted_line = null;
1439     ubyte[] palette = null;
1440     if (paletted) {
1441         depaletted_line = new ubyte[hdr.width * pe_bytes_pp];
1442         palette = new ubyte[palette_length * pe_bytes_pp];
1443         stream.readExact(palette[], palette.length);
1444     }
1445 
1446     stream.seek(hdr.pixel_data_offset, SEEK_SET);
1447 
1448     immutable tgt_chans = (0 < req_chans) ? req_chans
1449                                           : (alpha_masked) ? _ColFmt.RGBA
1450                                                            : _ColFmt.RGB;
1451 
1452     const src_fmt = (!paletted || pe_bytes_pp == 4) ? _ColFmt.BGRA : _ColFmt.BGR;
1453     const LineConv convert = get_converter(src_fmt, tgt_chans);
1454 
1455     immutable size_t src_linesize = hdr.width * bytes_pp;  // without padding
1456     immutable size_t src_pad = 3 - ((src_linesize-1) % 4);
1457     immutable ptrdiff_t tgt_linesize = (hdr.width * cast(int) tgt_chans);
1458 
1459     immutable ptrdiff_t tgt_stride = (hdr.height < 0) ? tgt_linesize : -tgt_linesize;
1460     ptrdiff_t ti                   = (hdr.height < 0) ? 0 : (hdr.height-1) * tgt_linesize;
1461 
1462     auto src_line_buf  = new ubyte[src_linesize + src_pad];
1463     auto bgra_line_buf = (paletted) ? null : new ubyte[hdr.width * 4];
1464     auto result        = new ubyte[hdr.width * abs(hdr.height) * cast(int) tgt_chans];
1465 
1466     foreach (_; 0 .. abs(hdr.height)) {
1467         stream.readExact(src_line_buf[], src_line_buf.length);
1468         auto src_line = src_line_buf[0..src_linesize];
1469 
1470         if (paletted) {
1471             size_t ps = pe_bytes_pp;
1472             size_t di = 0;
1473             foreach (idx; src_line[]) {
1474                 if (idx > palette_length)
1475                     throw new ImageIOException("invalid palette index");
1476                 size_t i = idx * ps;
1477                 depaletted_line[di .. di+ps] = palette[i .. i+ps];
1478                 if (ps == 4) {
1479                     depaletted_line[di+3] = 255;
1480                 }
1481                 di += ps;
1482             }
1483             convert(depaletted_line[], result[ti .. (ti+tgt_linesize)]);
1484         } else {
1485             for (size_t si, di;   si < src_line.length;   si+=bytes_pp, di+=4) {
1486                 bgra_line_buf[di + 0] = src_line[si + bluei];
1487                 bgra_line_buf[di + 1] = src_line[si + greeni];
1488                 bgra_line_buf[di + 2] = src_line[si + redi];
1489                 bgra_line_buf[di + 3] = (alpha_masked) ? src_line[si + alphai]
1490                                                        : 255;
1491             }
1492             convert(bgra_line_buf[], result[ti .. (ti+tgt_linesize)]);
1493         }
1494 
1495         ti += tgt_stride;
1496     }
1497 
1498     IFImage ret = {
1499         w      : hdr.width,
1500         h      : abs(hdr.height),
1501         c      : cast(ColFmt) tgt_chans,
1502         pixels : result,
1503     };
1504     return ret;
1505 }
1506 
1507 ///
1508 public void read_bmp_info(in char[] filename, out long w, out long h, out long chans) {
1509     scope reader = new FileReader(filename);
1510     return read_bmp_info(reader, w, h, chans);
1511 }
1512 
1513 void read_bmp_info(Reader stream, out long w, out long h, out long chans) {
1514     BMP_Header hdr = read_bmp_header(stream);
1515     w = abs(hdr.width);
1516     h = abs(hdr.height);
1517     chans = (hdr.dib_version >= 3 && hdr.dib_v3_alpha_mask != 0) ? ColFmt.RGBA
1518                                                                  : ColFmt.RGB;
1519 }
1520 
1521 // --------------------------------------------------------------------------------
1522 // Baseline JPEG decoder
1523 
1524 import std.math;    // floor, ceil
1525 import core.stdc.stdlib : alloca;
1526 
1527 //debug = DebugJPEG;
1528 
1529 ///
1530 public IFImage read_jpeg(in char[] filename, long req_chans = 0) {
1531     scope reader = new FileReader(filename);
1532     return read_jpeg(reader, req_chans);
1533 }
1534 
1535 ///
1536 public IFImage read_jpeg_from_mem(in ubyte[] source, long req_chans = 0) {
1537     scope reader = new MemReader(source);
1538     return read_jpeg(reader, req_chans);
1539 }
1540 
1541 IFImage read_jpeg(Reader stream, long req_chans = 0) {
1542     if (req_chans < 0 || 4 < req_chans)
1543         throw new ImageIOException("come on...");
1544 
1545     // SOI
1546     ubyte[2] tmp = void;
1547     stream.readExact(tmp, tmp.length);
1548     if (tmp[0..2] != [0xff, 0xd8])
1549         throw new ImageIOException("not JPEG");
1550 
1551     JPEG_Decoder dc = { stream: stream };
1552 
1553     read_markers(dc);   // reads until first scan header or eoi
1554     if (dc.eoi_reached)
1555         throw new ImageIOException("no image data");
1556 
1557     dc.tgt_chans = (req_chans == 0) ? dc.num_comps : cast(int) req_chans;
1558 
1559     IFImage result = {
1560         w      : dc.width,
1561         h      : dc.height,
1562         c      : cast(ColFmt) dc.tgt_chans,
1563         pixels : decode_jpeg(dc),
1564     };
1565     return result;
1566 }
1567 
1568 struct JPEG_Decoder {
1569     Reader stream;
1570 
1571     bool has_frame_header = false;
1572     bool eoi_reached = false;
1573 
1574     ubyte[64][4] qtables;
1575     HuffTab[2] ac_tables;
1576     HuffTab[2] dc_tables;
1577 
1578     ubyte cb;  // current byte (next bit always at MSB)
1579     int bits_left;   // num of unused bits in cb
1580 
1581     bool correct_comp_ids;
1582     Component[3] comps;
1583     ubyte num_comps;
1584     int[3] index_for;   // index_for[0] is index of comp that comes first in stream
1585     int tgt_chans;
1586 
1587     size_t width, height;
1588 
1589     int hmax, vmax;
1590 
1591     ushort restart_interval;    // number of MCUs in restart interval
1592 
1593     // image component
1594     struct Component {
1595         ubyte id;
1596         ubyte sfx, sfy;   // sampling factors, aka. h and v
1597         size_t x, y;       // total num of samples, without fill samples
1598         ubyte qtable;
1599         ubyte ac_table;
1600         ubyte dc_table;
1601         int pred;                // dc prediction
1602         ubyte[] data;   // reconstructed samples
1603     }
1604 
1605     int num_mcu_x;
1606     int num_mcu_y;
1607 }
1608 
1609 struct HuffTab {
1610     // TODO where in the spec does it say 256 values/codes at most?
1611     ubyte[256] values;
1612     ubyte[257] sizes;
1613     short[16] mincode, maxcode;
1614     short[16] valptr;
1615 }
1616 
1617 enum Marker : ubyte {
1618     SOI = 0xd8,     // start of image
1619     SOF0 = 0xc0,    // start of frame / baseline DCT
1620     //SOF1 = 0xc1,    // start of frame / extended seq.
1621     //SOF2 = 0xc2,    // start of frame / progressive DCT
1622     SOF3 = 0xc3,    // start of frame / lossless
1623     SOF9 = 0xc9,    // start of frame / extended seq., arithmetic
1624     SOF11 = 0xcb,    // start of frame / lossless, arithmetic
1625     DHT = 0xc4,     // define huffman tables
1626     DQT = 0xdb,     // define quantization tables
1627     DRI = 0xdd,     // define restart interval
1628     SOS = 0xda,     // start of scan
1629     DNL = 0xdc,     // define number of lines
1630     RST0 = 0xd0,    // restart entropy coded data
1631     // ...
1632     RST7 = 0xd7,    // restart entropy coded data
1633     APP0 = 0xe0,    // application 0 segment
1634     // ...
1635     APPf = 0xef,    // application f segment
1636     //DAC = 0xcc,     // define arithmetic conditioning table
1637     COM = 0xfe,     // comment
1638     EOI = 0xd9,     // end of image
1639 }
1640 
1641 void read_markers(ref JPEG_Decoder dc) {
1642     bool has_next_scan_header = false;
1643     while (!has_next_scan_header && !dc.eoi_reached) {
1644         ubyte[2] marker;
1645         dc.stream.readExact(marker, 2);
1646 
1647         if (marker[0] != 0xff)
1648             throw new ImageIOException("no marker");
1649         while (marker[1] == 0xff)
1650             dc.stream.readExact(marker[1..$], 1);
1651 
1652         debug(DebugJPEG) writefln("marker: %s (%1$x)\t", cast(Marker) marker[1]);
1653         switch (marker[1]) with (Marker) {
1654             case DHT: dc.read_huffman_tables(); break;
1655             case DQT: dc.read_quantization_tables(); break;
1656             case SOF0:
1657                 if (dc.has_frame_header)
1658                     throw new ImageIOException("extra frame header");
1659                 debug(DebugJPEG) writeln();
1660                 dc.read_frame_header();
1661                 dc.has_frame_header = true;
1662                 break;
1663             case SOS:
1664                 if (!dc.has_frame_header)
1665                     throw new ImageIOException("no frame header");
1666                 dc.read_scan_header();
1667                 has_next_scan_header = true;
1668                 break;
1669             case DRI: dc.read_restart_interval(); break;
1670             case EOI: dc.eoi_reached = true; break;
1671             case APP0: .. case APPf: goto case;
1672             case COM:
1673                 debug(DebugJPEG) writefln("-> skipping segment");
1674                 ubyte[2] lenbuf = void;
1675                 dc.stream.readExact(lenbuf, lenbuf.length);
1676                 int len = bigEndianToNative!ushort(lenbuf) - 2;
1677                 dc.stream.seek(len, SEEK_CUR);
1678                 break;
1679             default: throw new ImageIOException("invalid / unsupported marker");
1680         }
1681     }
1682 }
1683 
1684 // DHT -- define huffman tables
1685 void read_huffman_tables(ref JPEG_Decoder dc) {
1686     ubyte[19] tmp = void;
1687     dc.stream.readExact(tmp, 2);
1688     int len = bigEndianToNative!ushort(tmp[0..2]);
1689     len -= 2;
1690 
1691     while (0 < len) {
1692         dc.stream.readExact(tmp, 17);   // info byte & the BITS
1693         ubyte table_slot = tmp[0] & 0xf; // must be 0 or 1 for baseline
1694         ubyte table_class = tmp[0] >> 4;  // 0 = dc table, 1 = ac table
1695         if (1 < table_slot || 1 < table_class)
1696             throw new ImageIOException("invalid / not supported");
1697 
1698         // compute total number of huffman codes
1699         int mt = 0;
1700         foreach (i; 1..17)
1701             mt += tmp[i];
1702         if (256 < mt)   // TODO where in the spec?
1703             throw new ImageIOException("invalid / not supported");
1704 
1705         if (table_class == 0) {
1706             dc.stream.readExact(dc.dc_tables[table_slot].values, mt);
1707             derive_table(dc.dc_tables[table_slot], tmp[1..17]);
1708         } else {
1709             dc.stream.readExact(dc.ac_tables[table_slot].values, mt);
1710             derive_table(dc.ac_tables[table_slot], tmp[1..17]);
1711         }
1712 
1713         len -= 17 + mt;
1714     }
1715 }
1716 
1717 // num_values is the BITS
1718 void derive_table(ref HuffTab table, in ref ubyte[16] num_values) {
1719     short[256] codes;
1720 
1721     int k = 0;
1722     foreach (i; 0..16) {
1723         foreach (j; 0..num_values[i]) {
1724             table.sizes[k] = cast(ubyte) (i + 1);
1725             ++k;
1726         }
1727     }
1728     table.sizes[k] = 0;
1729 
1730     k = 0;
1731     short code = 0;
1732     ubyte si = table.sizes[k];
1733     while (true) {
1734         do {
1735             codes[k] = code;
1736             ++code;
1737             ++k;
1738         } while (si == table.sizes[k]);
1739 
1740         if (table.sizes[k] == 0)
1741             break;
1742 
1743         debug(DebugJPEG) assert(si < table.sizes[k]);
1744         do {
1745             code <<= 1;
1746             ++si;
1747         } while (si != table.sizes[k]);
1748     }
1749 
1750     derive_mincode_maxcode_valptr(
1751         table.mincode, table.maxcode, table.valptr,
1752         codes, num_values
1753     );
1754 }
1755 
1756 // F.15
1757 void derive_mincode_maxcode_valptr(
1758         ref short[16] mincode, ref short[16] maxcode, ref short[16] valptr,
1759         in ref short[256] codes, in ref ubyte[16] num_values) pure
1760 {
1761     mincode[] = -1;
1762     maxcode[] = -1;
1763     valptr[] = -1;
1764 
1765     int j = 0;
1766     foreach (i; 0..16) {
1767         if (num_values[i] != 0) {
1768             valptr[i] = cast(short) j;
1769             mincode[i] = codes[j];
1770             j += num_values[i] - 1;
1771             maxcode[i] = codes[j];
1772             j += 1;
1773         }
1774     }
1775 }
1776 
1777 // DQT -- define quantization tables
1778 void read_quantization_tables(ref JPEG_Decoder dc) {
1779     ubyte[2] tmp = void;
1780     dc.stream.readExact(tmp, 2);
1781     int len = bigEndianToNative!ushort(tmp[0..2]);
1782     if (len % 65 != 2)
1783         throw new ImageIOException("invalid / not supported");
1784     len -= 2;
1785     while (0 < len) {
1786         dc.stream.readExact(tmp, 1);
1787         ubyte table_info = tmp[0];
1788         ubyte table_slot = table_info & 0xf;
1789         ubyte precision = table_info >> 4;  // 0 = 8 bit, 1 = 16 bit
1790         if (3 < table_slot || precision != 0)    // only 8 bit for baseline
1791             throw new ImageIOException("invalid / not supported");
1792 
1793         dc.stream.readExact(dc.qtables[table_slot], 64);
1794         len -= 1 + 64;
1795     }
1796 }
1797 
1798 // SOF0 -- start of frame
1799 void read_frame_header(ref JPEG_Decoder dc) {
1800     ubyte[9] tmp = void;
1801     dc.stream.readExact(tmp, 8);
1802     int len = bigEndianToNative!ushort(tmp[0..2]);  // 8 + num_comps*3
1803     ubyte precision = tmp[2];
1804     dc.height = bigEndianToNative!ushort(tmp[3..5]);
1805     dc.width = bigEndianToNative!ushort(tmp[5..7]);
1806     dc.num_comps = tmp[7];
1807 
1808     if ( precision != 8 ||
1809          (dc.num_comps != 1 && dc.num_comps != 3) ||
1810          len != 8 + dc.num_comps*3 )
1811         throw new ImageIOException("invalid / not supported");
1812 
1813     dc.hmax = 0;
1814     dc.vmax = 0;
1815     int mcu_du = 0; // data units in one mcu
1816     dc.stream.readExact(tmp, dc.num_comps*3);
1817     foreach (i; 0..dc.num_comps) {
1818         ubyte ci = tmp[i*3];
1819         // JFIF says ci should be i+1, but there are images where ci is i. Normalize ids
1820         // so that ci == i, always. So much for standards...
1821         if (i == 0) { dc.correct_comp_ids = ci == i+1; }
1822         if ((dc.correct_comp_ids && ci != i+1)
1823         || (!dc.correct_comp_ids && ci != i))
1824             throw new ImageIOException("invalid component id");
1825         if (dc.correct_comp_ids) { ci -= 1; }
1826 
1827         dc.index_for[i] = ci;
1828         auto comp = &dc.comps[ci];
1829         comp.id = ci;
1830         ubyte sampling_factors = tmp[i*3 + 1];
1831         comp.sfx = sampling_factors >> 4;
1832         comp.sfy = sampling_factors & 0xf;
1833         comp.qtable = tmp[i*3 + 2];
1834         if ( comp.sfy < 1 || 4 < comp.sfy ||
1835              comp.sfx < 1 || 4 < comp.sfx ||
1836              3 < comp.qtable )
1837             throw new ImageIOException("invalid / not supported");
1838 
1839         if (dc.hmax < comp.sfx) dc.hmax = comp.sfx;
1840         if (dc.vmax < comp.sfy) dc.vmax = comp.sfy;
1841 
1842         mcu_du += comp.sfx * comp.sfy;
1843     }
1844     if (10 < mcu_du)
1845         throw new ImageIOException("invalid / not supported");
1846 
1847     foreach (i; 0..dc.num_comps) {
1848         dc.comps[i].x = cast(size_t) ceil(dc.width * (cast(double) dc.comps[i].sfx / dc.hmax));
1849         dc.comps[i].y = cast(size_t) ceil(dc.height * (cast(double) dc.comps[i].sfy / dc.vmax));
1850 
1851         debug(DebugJPEG) writefln("%d comp %d sfx/sfy: %d/%d", i, dc.comps[i].id,
1852                                                                   dc.comps[i].sfx,
1853                                                                   dc.comps[i].sfy);
1854     }
1855 
1856     size_t mcu_w = dc.hmax * 8;
1857     size_t mcu_h = dc.vmax * 8;
1858     dc.num_mcu_x = cast(int) ((dc.width + mcu_w-1) / mcu_w);
1859     dc.num_mcu_y = cast(int) ((dc.height + mcu_h-1) / mcu_h);
1860 
1861     debug(DebugJPEG) {
1862         writefln("\tlen: %s", len);
1863         writefln("\tprecision: %s", precision);
1864         writefln("\tdimensions: %s x %s", dc.width, dc.height);
1865         writefln("\tnum_comps: %s", dc.num_comps);
1866         writefln("\tnum_mcu_x: %s", dc.num_mcu_x);
1867         writefln("\tnum_mcu_y: %s", dc.num_mcu_y);
1868     }
1869 
1870 }
1871 
1872 // SOS -- start of scan
1873 void read_scan_header(ref JPEG_Decoder dc) {
1874     ubyte[3] tmp = void;
1875     dc.stream.readExact(tmp, tmp.length);
1876     ushort len = bigEndianToNative!ushort(tmp[0..2]);
1877     ubyte num_scan_comps = tmp[2];
1878 
1879     if ( num_scan_comps != dc.num_comps ||
1880          len != (6+num_scan_comps*2) )
1881         throw new ImageIOException("invalid / not supported");
1882 
1883     auto buf = (cast(ubyte*) alloca((len-3) * ubyte.sizeof))[0..len-3];
1884     dc.stream.readExact(buf, buf.length);
1885 
1886     foreach (i; 0..num_scan_comps) {
1887         uint comp_id = buf[i*2] - ((dc.correct_comp_ids) ? 1 : 0);
1888         int ci;    // component index
1889         while (ci < dc.num_comps && dc.comps[ci].id != comp_id) ++ci;
1890         if (dc.num_comps <= ci)
1891             throw new ImageIOException("invalid component id");
1892 
1893         ubyte tables = buf[i*2+1];
1894         dc.comps[ci].dc_table = tables >> 4;
1895         dc.comps[ci].ac_table = tables & 0xf;
1896         if ( 1 < dc.comps[ci].dc_table ||
1897              1 < dc.comps[ci].ac_table )
1898             throw new ImageIOException("invalid / not supported");
1899     }
1900 
1901     // ignore these
1902     //ubyte spectral_start = buf[$-3];
1903     //ubyte spectral_end = buf[$-2];
1904     //ubyte approx = buf[$-1];
1905 }
1906 
1907 void read_restart_interval(ref JPEG_Decoder dc) {
1908     ubyte[4] tmp = void;
1909     dc.stream.readExact(tmp, tmp.length);
1910     ushort len = bigEndianToNative!ushort(tmp[0..2]);
1911     if (len != 4)
1912         throw new ImageIOException("invalid / not supported");
1913     dc.restart_interval = bigEndianToNative!ushort(tmp[2..4]);
1914     debug(DebugJPEG) writeln("restart interval set to: ", dc.restart_interval);
1915 }
1916 
1917 // reads data after the SOS segment
1918 ubyte[] decode_jpeg(ref JPEG_Decoder dc) {
1919     foreach (ref comp; dc.comps[0..dc.num_comps])
1920         comp.data = new ubyte[dc.num_mcu_x*comp.sfx*8*dc.num_mcu_y*comp.sfy*8];
1921 
1922     // E.7 -- Multiple scans are for progressive images which are not supported
1923     //while (!dc.eoi_reached) {
1924         decode_scan(dc);    // E.2.3
1925         //read_markers(dc);   // reads until next scan header or eoi
1926     //}
1927 
1928     // throw away fill samples and convert to target format
1929     return dc.reconstruct();
1930 }
1931 
1932 // E.2.3 and E.8 and E.9
1933 void decode_scan(ref JPEG_Decoder dc) {
1934     debug(DebugJPEG) writeln("decode scan...");
1935 
1936     int intervals, mcus;
1937     if (0 < dc.restart_interval) {
1938         int total_mcus = dc.num_mcu_x * dc.num_mcu_y;
1939         intervals = (total_mcus + dc.restart_interval-1) / dc.restart_interval;
1940         mcus = dc.restart_interval;
1941     } else {
1942         intervals = 1;
1943         mcus = dc.num_mcu_x * dc.num_mcu_y;
1944     }
1945     debug(DebugJPEG) writeln("intervals: ", intervals);
1946 
1947     foreach (mcu_j; 0 .. dc.num_mcu_y) {
1948         foreach (mcu_i; 0 .. dc.num_mcu_x) {
1949 
1950             // decode mcu
1951             foreach (_c; 0..dc.num_comps) {
1952                 auto comp = &dc.comps[dc.index_for[_c]];
1953                 foreach (du_j; 0 .. comp.sfy) {
1954                     foreach (du_i; 0 .. comp.sfx) {
1955                         // decode entropy, dequantize & dezigzag
1956                         short[64] data = decode_block(dc, *comp, dc.qtables[comp.qtable]);
1957                         // idct & level-shift
1958                         long outx = (mcu_i * comp.sfx + du_i) * 8;
1959                         long outy = (mcu_j * comp.sfy + du_j) * 8;
1960                         long dst_stride = dc.num_mcu_x * comp.sfx*8;
1961                         ubyte* dst = comp.data.ptr + outy*dst_stride + outx;
1962                         stbi__idct_block(dst, dst_stride, data);
1963                     }
1964                 }
1965             }
1966 
1967             --mcus;
1968 
1969             if (!mcus) {
1970                 --intervals;
1971                 if (!intervals)
1972                     return;
1973 
1974                 read_restart(dc.stream);    // RSTx marker
1975 
1976                 if (intervals == 1) {
1977                     // last interval, may have fewer MCUs than defined by DRI
1978                     mcus = (dc.num_mcu_y - mcu_j - 1) * dc.num_mcu_x + dc.num_mcu_x - mcu_i - 1;
1979                 } else {
1980                     mcus = dc.restart_interval;
1981                 }
1982 
1983                 // reset decoder
1984                 dc.cb = 0;
1985                 dc.bits_left = 0;
1986                 foreach (k; 0..dc.num_comps)
1987                     dc.comps[k].pred = 0;
1988             }
1989 
1990         }
1991     }
1992 }
1993 
1994 // RST0-RST7
1995 void read_restart(Reader stream) {
1996     ubyte[2] tmp = void;
1997     stream.readExact(tmp, tmp.length);
1998     if (tmp[0] != 0xff || tmp[1] < Marker.RST0 || Marker.RST7 < tmp[1])
1999         throw new ImageIOException("reset marker missing");
2000     // the markers should cycle 0 through 7, could check that here...
2001 }
2002 
2003 immutable ubyte[64] dezigzag = [
2004      0,  1,  8, 16,  9,  2,  3, 10,
2005     17, 24, 32, 25, 18, 11,  4,  5,
2006     12, 19, 26, 33, 40, 48, 41, 34,
2007     27, 20, 13,  6,  7, 14, 21, 28,
2008     35, 42, 49, 56, 57, 50, 43, 36,
2009     29, 22, 15, 23, 30, 37, 44, 51,
2010     58, 59, 52, 45, 38, 31, 39, 46,
2011     53, 60, 61, 54, 47, 55, 62, 63,
2012 ];
2013 
2014 // decode entropy, dequantize & dezigzag (see section F.2)
2015 short[64] decode_block(ref JPEG_Decoder dc, ref JPEG_Decoder.Component comp,
2016                                                     in ref ubyte[64] qtable)
2017 {
2018     short[64] res = 0;
2019 
2020     ubyte t = decode_huff(dc, dc.dc_tables[comp.dc_table]);
2021     int diff = t ? dc.receive_and_extend(t) : 0;
2022 
2023     comp.pred = comp.pred + diff;
2024     res[0] = cast(short) (comp.pred * qtable[0]);
2025 
2026     int k = 1;
2027     do {
2028         ubyte rs = decode_huff(dc, dc.ac_tables[comp.ac_table]);
2029         ubyte rrrr = rs >> 4;
2030         ubyte ssss = rs & 0xf;
2031 
2032         if (ssss == 0) {
2033             if (rrrr != 0xf)
2034                 break;      // end of block
2035             k += 16;    // run length is 16
2036             continue;
2037         }
2038 
2039         k += rrrr;
2040 
2041         if (63 < k)
2042             throw new ImageIOException("corrupt block");
2043         res[dezigzag[k]] = cast(short) (dc.receive_and_extend(ssss) * qtable[k]);
2044         k += 1;
2045     } while (k < 64);
2046 
2047     return res;
2048 }
2049 
2050 int receive_and_extend(ref JPEG_Decoder dc, ubyte s) {
2051     // receive
2052     int symbol = 0;
2053     foreach (_; 0..s)
2054         symbol = (symbol << 1) + nextbit(dc);
2055     // extend
2056     int vt = 1 << (s-1);
2057     if (symbol < vt)
2058         return symbol + (-1 << s) + 1;
2059     return symbol;
2060 }
2061 
2062 // F.16 -- the DECODE
2063 ubyte decode_huff(ref JPEG_Decoder dc, in ref HuffTab tab) {
2064     short code = nextbit(dc);
2065 
2066     int i = 0;
2067     while (tab.maxcode[i] < code) {
2068         code = cast(short) ((code << 1) + nextbit(dc));
2069         i += 1;
2070         if (tab.maxcode.length <= i)
2071             throw new ImageIOException("corrupt huffman coding");
2072     }
2073     int j = tab.valptr[i] + code - tab.mincode[i];
2074     if (tab.values.length <= cast(uint) j)
2075         throw new ImageIOException("corrupt huffman coding");
2076     return tab.values[j];
2077 }
2078 
2079 // F.2.2.5 and F.18
2080 ubyte nextbit(ref JPEG_Decoder dc) {
2081     if (!dc.bits_left) {
2082         ubyte[1] bytebuf;
2083         dc.stream.readExact(bytebuf, 1);
2084         dc.cb = bytebuf[0];
2085         dc.bits_left = 8;
2086 
2087         if (dc.cb == 0xff) {
2088             dc.stream.readExact(bytebuf, 1);
2089             if (bytebuf[0] != 0x0) {
2090                 throw new ImageIOException("unexpected marker");
2091             }
2092         }
2093     }
2094 
2095     ubyte r = dc.cb >> 7;
2096     dc.cb <<= 1;
2097     dc.bits_left -= 1;
2098     return r;
2099 }
2100 
2101 ubyte[] reconstruct(in ref JPEG_Decoder dc) {
2102     auto result = new ubyte[dc.width * dc.height * dc.tgt_chans];
2103 
2104     switch (dc.num_comps * 10 + dc.tgt_chans) {
2105         case 34, 33:
2106             // Use specialized bilinear filtering functions for the frequent cases where
2107             // Cb & Cr channels have half resolution.
2108             if ((dc.comps[0].sfx <= 2 && dc.comps[0].sfy <= 2)
2109             && (dc.comps[0].sfx + dc.comps[0].sfy >= 3)
2110             && dc.comps[1].sfx == 1 && dc.comps[1].sfy == 1
2111             && dc.comps[2].sfx == 1 && dc.comps[2].sfy == 1) {
2112                 void function(in ubyte[], in ubyte[], ubyte[]) resample;
2113                 switch (dc.comps[0].sfx * 10 + dc.comps[0].sfy) {
2114                     case 22: resample = &upsample_h2_v2; break;
2115                     case 21: resample = &upsample_h2_v1; break;
2116                     case 12: resample = &upsample_h1_v2; break;
2117                     default: throw new ImageIOException("bug");
2118                 }
2119 
2120                 auto comp1 = new ubyte[](dc.width);
2121                 auto comp2 = new ubyte[](dc.width);
2122 
2123                 size_t s = 0;
2124                 size_t di = 0;
2125                 foreach (j; 0 .. dc.height) {
2126                     size_t mi = j / dc.comps[0].sfy;
2127                     size_t si = (mi == 0 || mi >= (dc.height-1)/dc.comps[0].sfy)
2128                               ? mi : mi - 1 + s * 2;
2129                     s = s ^ 1;
2130 
2131                     size_t cs = dc.num_mcu_x * dc.comps[1].sfx * 8;
2132                     size_t cl0 = mi * cs;
2133                     size_t cl1 = si * cs;
2134                     resample(dc.comps[1].data[cl0 .. cl0 + dc.comps[1].x],
2135                              dc.comps[1].data[cl1 .. cl1 + dc.comps[1].x],
2136                              comp1[]);
2137                     resample(dc.comps[2].data[cl0 .. cl0 + dc.comps[2].x],
2138                              dc.comps[2].data[cl1 .. cl1 + dc.comps[2].x],
2139                              comp2[]);
2140 
2141                     foreach (i; 0 .. dc.width) {
2142                         result[di .. di+3] = ycbcr_to_rgb(
2143                             dc.comps[0].data[j * dc.num_mcu_x * dc.comps[0].sfx * 8 + i],
2144                             comp1[i],
2145                             comp2[i],
2146                         );
2147                         if (dc.tgt_chans == 4)
2148                             result[di+3] = 255;
2149                         di += dc.tgt_chans;
2150                     }
2151                 }
2152 
2153                 return result;
2154             }
2155 
2156             foreach (const ref comp; dc.comps[0..dc.num_comps]) {
2157                 if (comp.sfx != dc.hmax || comp.sfy != dc.vmax)
2158                     return dc.upsample(result);
2159             }
2160 
2161             size_t si, di;
2162             foreach (j; 0 .. dc.height) {
2163                 foreach (i; 0 .. dc.width) {
2164                     result[di .. di+3] = ycbcr_to_rgb(
2165                         dc.comps[0].data[si+i],
2166                         dc.comps[1].data[si+i],
2167                         dc.comps[2].data[si+i],
2168                     );
2169                     if (dc.tgt_chans == 4)
2170                         result[di+3] = 255;
2171                     di += dc.tgt_chans;
2172                 }
2173                 si += dc.num_mcu_x * dc.comps[0].sfx * 8;
2174             }
2175             return result;
2176         case 32, 12, 31, 11:
2177             const comp = &dc.comps[0];
2178             if (comp.sfx == dc.hmax && comp.sfy == dc.vmax) {
2179                 size_t si, di;
2180                 if (dc.tgt_chans == 2) {
2181                     foreach (j; 0 .. dc.height) {
2182                         foreach (i; 0 .. dc.width) {
2183                             result[di++] = comp.data[si+i];
2184                             result[di++] = 255;
2185                         }
2186                         si += dc.num_mcu_x * comp.sfx * 8;
2187                     }
2188                 } else {
2189                     foreach (j; 0 .. dc.height) {
2190                         result[di .. di+dc.width] = comp.data[si .. si+dc.width];
2191                         si += dc.num_mcu_x * comp.sfx * 8;
2192                         di += dc.width;
2193                     }
2194                 }
2195                 return result;
2196             } else {
2197                 // need to resample (haven't tested this...)
2198                 return dc.upsample_luma(result);
2199             }
2200         case 14, 13:
2201             const comp = &dc.comps[0];
2202             size_t si, di;
2203             foreach (j; 0 .. dc.height) {
2204                 foreach (i; 0 .. dc.width) {
2205                     result[di .. di+3] = comp.data[si+i];
2206                     if (dc.tgt_chans == 4)
2207                         result[di+3] = 255;
2208                     di += dc.tgt_chans;
2209                 }
2210                 si += dc.num_mcu_x * comp.sfx * 8;
2211             }
2212             return result;
2213         default: assert(0);
2214     }
2215 }
2216 
2217 void upsample_h2_v2(in ubyte[] line0, in ubyte[] line1, ubyte[] result) {
2218     ubyte mix(ubyte mm, ubyte ms, ubyte sm, ubyte ss) {
2219        return cast(ubyte) (( cast(uint) mm * 3 * 3
2220                            + cast(uint) ms * 3 * 1
2221                            + cast(uint) sm * 1 * 3
2222                            + cast(uint) ss * 1 * 1
2223                            + 8) / 16);
2224     }
2225 
2226     result[0] = cast(ubyte) (( cast(uint) line0[0] * 3
2227                              + cast(uint) line1[0] * 1
2228                              + 2) / 4);
2229     if (line0.length == 1) return;
2230     result[1] = mix(line0[0], line0[1], line1[0], line1[1]);
2231 
2232     size_t di = 2;
2233     foreach (i; 1 .. line0.length) {
2234         result[di] = mix(line0[i], line0[i-1], line1[i], line1[i-1]);
2235         di += 1;
2236         if (i == line0.length-1) {
2237             if (di < result.length) {
2238                 result[di] = cast(ubyte) (( cast(uint) line0[i] * 3
2239                                           + cast(uint) line1[i] * 1
2240                                           + 2) / 4);
2241             }
2242             return;
2243         }
2244         result[di] = mix(line0[i], line0[i+1], line1[i], line1[i+1]);
2245         di += 1;
2246     }
2247 }
2248 
2249 void upsample_h2_v1(in ubyte[] line0, in ubyte[] _line1, ubyte[] result) {
2250     result[0] = line0[0];
2251     if (line0.length == 1) return;
2252     result[1] = cast(ubyte) (( cast(uint) line0[0] * 3
2253                              + cast(uint) line0[1] * 1
2254                              + 2) / 4);
2255     size_t di = 2;
2256     foreach (i; 1 .. line0.length) {
2257         result[di] = cast(ubyte) (( cast(uint) line0[i-1] * 1
2258                                   + cast(uint) line0[i+0] * 3
2259                                   + 2) / 4);
2260         di += 1;
2261         if (i == line0.length-1) {
2262             if (di < result.length) result[di] = line0[i];
2263             return;
2264         }
2265         result[di] = cast(ubyte) (( cast(uint) line0[i+0] * 3
2266                                   + cast(uint) line0[i+1] * 1
2267                                   + 2) / 4);
2268         di += 1;
2269     }
2270 }
2271 
2272 void upsample_h1_v2(in ubyte[] line0, in ubyte[] line1, ubyte[] result) {
2273     foreach (i; 0 .. result.length) {
2274         result[i] = cast(ubyte) (( cast(uint) line0[i] * 3
2275                                  + cast(uint) line1[i] * 1
2276                                  + 2) / 4);
2277     }
2278 }
2279 
2280 // Nearest neighbor
2281 ubyte[] upsample_luma(in ref JPEG_Decoder dc, ubyte[] result) {
2282     const size_t stride0 = dc.num_mcu_x * dc.comps[0].sfx * 8;
2283     const y_step0 = cast(float) dc.comps[0].sfy / cast(float) dc.vmax;
2284     const x_step0 = cast(float) dc.comps[0].sfx / cast(float) dc.hmax;
2285 
2286     float y0 = y_step0 * 0.5;
2287     size_t y0i = 0;
2288 
2289     size_t di;
2290 
2291     foreach (j; 0 .. dc.height) {
2292         float x0 = x_step0 * 0.5;
2293         size_t x0i = 0;
2294         foreach (i; 0 .. dc.width) {
2295             result[di] = dc.comps[0].data[y0i + x0i];
2296             if (dc.tgt_chans == 2)
2297                 result[di+1] = 255;
2298             di += dc.tgt_chans;
2299             x0 += x_step0;
2300             if (x0 >= 1.0) { x0 -= 1.0; x0i += 1; }
2301         }
2302         y0 += y_step0;
2303         if (y0 >= 1.0) { y0 -= 1.0; y0i += stride0; }
2304     }
2305     return result;
2306 }
2307 
2308 // Nearest neighbor
2309 ubyte[] upsample(in ref JPEG_Decoder dc, ubyte[] result) {
2310     const size_t stride0 = dc.num_mcu_x * dc.comps[0].sfx * 8;
2311     const size_t stride1 = dc.num_mcu_x * dc.comps[1].sfx * 8;
2312     const size_t stride2 = dc.num_mcu_x * dc.comps[2].sfx * 8;
2313 
2314     const y_step0 = cast(float) dc.comps[0].sfy / cast(float) dc.vmax;
2315     const y_step1 = cast(float) dc.comps[1].sfy / cast(float) dc.vmax;
2316     const y_step2 = cast(float) dc.comps[2].sfy / cast(float) dc.vmax;
2317     const x_step0 = cast(float) dc.comps[0].sfx / cast(float) dc.hmax;
2318     const x_step1 = cast(float) dc.comps[1].sfx / cast(float) dc.hmax;
2319     const x_step2 = cast(float) dc.comps[2].sfx / cast(float) dc.hmax;
2320 
2321     float y0 = y_step0 * 0.5;
2322     float y1 = y_step1 * 0.5;
2323     float y2 = y_step2 * 0.5;
2324     size_t y0i = 0;
2325     size_t y1i = 0;
2326     size_t y2i = 0;
2327 
2328     size_t di;
2329 
2330     foreach (_j; 0 .. dc.height) {
2331         float x0 = x_step0 * 0.5;
2332         float x1 = x_step1 * 0.5;
2333         float x2 = x_step2 * 0.5;
2334         size_t x0i = 0;
2335         size_t x1i = 0;
2336         size_t x2i = 0;
2337         foreach (i; 0 .. dc.width) {
2338             result[di .. di+3] = ycbcr_to_rgb(
2339                 dc.comps[0].data[y0i + x0i],
2340                 dc.comps[1].data[y1i + x1i],
2341                 dc.comps[2].data[y2i + x2i],
2342             );
2343             if (dc.tgt_chans == 4)
2344                 result[di+3] = 255;
2345             di += dc.tgt_chans;
2346             x0 += x_step0;
2347             x1 += x_step1;
2348             x2 += x_step2;
2349             if (x0 >= 1.0) { x0 -= 1.0; x0i += 1; }
2350             if (x1 >= 1.0) { x1 -= 1.0; x1i += 1; }
2351             if (x2 >= 1.0) { x2 -= 1.0; x2i += 1; }
2352         }
2353         y0 += y_step0;
2354         y1 += y_step1;
2355         y2 += y_step2;
2356         if (y0 >= 1.0) { y0 -= 1.0; y0i += stride0; }
2357         if (y1 >= 1.0) { y1 -= 1.0; y1i += stride1; }
2358         if (y2 >= 1.0) { y2 -= 1.0; y2i += stride2; }
2359     }
2360     return result;
2361 }
2362 
2363 ubyte[3] ycbcr_to_rgb(ubyte y, ubyte cb, ubyte cr) pure {
2364     ubyte[3] rgb = void;
2365     rgb[0] = clamp(y + 1.402*(cr-128));
2366     rgb[1] = clamp(y - 0.34414*(cb-128) - 0.71414*(cr-128));
2367     rgb[2] = clamp(y + 1.772*(cb-128));
2368     return rgb;
2369 }
2370 
2371 ubyte clamp(float x) pure {
2372     if (x < 0) return 0;
2373     if (255 < x) return 255;
2374     return cast(ubyte) x;
2375 }
2376 
2377 // ------------------------------------------------------------
2378 // The IDCT stuff here (to the next dashed line) is copied and adapted from
2379 // stb_image which is released under public domain.  Many thanks to stb_image
2380 // author, Sean Barrett.
2381 // Link: https://github.com/nothings/stb/blob/master/stb_image.h
2382 
2383 pure int f2f(float x) { return cast(int) (x * 4096 + 0.5); }
2384 pure int fsh(int x) { return x << 12; }
2385 
2386 // from stb_image, derived from jidctint -- DCT_ISLOW
2387 pure void STBI__IDCT_1D(ref int t0, ref int t1, ref int t2, ref int t3,
2388                         ref int x0, ref int x1, ref int x2, ref int x3,
2389         int s0, int s1, int s2, int s3, int s4, int s5, int s6, int s7)
2390 {
2391    int p1,p2,p3,p4,p5;
2392    //int t0,t1,t2,t3,p1,p2,p3,p4,p5,x0,x1,x2,x3;
2393    p2 = s2;
2394    p3 = s6;
2395    p1 = (p2+p3) * f2f(0.5411961f);
2396    t2 = p1 + p3 * f2f(-1.847759065f);
2397    t3 = p1 + p2 * f2f( 0.765366865f);
2398    p2 = s0;
2399    p3 = s4;
2400    t0 = fsh(p2+p3);
2401    t1 = fsh(p2-p3);
2402    x0 = t0+t3;
2403    x3 = t0-t3;
2404    x1 = t1+t2;
2405    x2 = t1-t2;
2406    t0 = s7;
2407    t1 = s5;
2408    t2 = s3;
2409    t3 = s1;
2410    p3 = t0+t2;
2411    p4 = t1+t3;
2412    p1 = t0+t3;
2413    p2 = t1+t2;
2414    p5 = (p3+p4)*f2f( 1.175875602f);
2415    t0 = t0*f2f( 0.298631336f);
2416    t1 = t1*f2f( 2.053119869f);
2417    t2 = t2*f2f( 3.072711026f);
2418    t3 = t3*f2f( 1.501321110f);
2419    p1 = p5 + p1*f2f(-0.899976223f);
2420    p2 = p5 + p2*f2f(-2.562915447f);
2421    p3 = p3*f2f(-1.961570560f);
2422    p4 = p4*f2f(-0.390180644f);
2423    t3 += p1+p4;
2424    t2 += p2+p3;
2425    t1 += p2+p4;
2426    t0 += p1+p3;
2427 }
2428 
2429 // idct and level-shift
2430 pure void stbi__idct_block(ubyte* dst, long dst_stride, in ref short[64] data) {
2431    int i;
2432    int[64] val;
2433    int* v = val.ptr;
2434    const(short)* d = data.ptr;
2435 
2436    // columns
2437    for (i=0; i < 8; ++i,++d, ++v) {
2438       // if all zeroes, shortcut -- this avoids dequantizing 0s and IDCTing
2439       if (d[ 8]==0 && d[16]==0 && d[24]==0 && d[32]==0
2440            && d[40]==0 && d[48]==0 && d[56]==0) {
2441          //    no shortcut                 0     seconds
2442          //    (1|2|3|4|5|6|7)==0          0     seconds
2443          //    all separate               -0.047 seconds
2444          //    1 && 2|3 && 4|5 && 6|7:    -0.047 seconds
2445          int dcterm = d[0] << 2;
2446          v[0] = v[8] = v[16] = v[24] = v[32] = v[40] = v[48] = v[56] = dcterm;
2447       } else {
2448          int t0,t1,t2,t3,x0,x1,x2,x3;
2449          STBI__IDCT_1D(
2450              t0, t1, t2, t3,
2451              x0, x1, x2, x3,
2452              d[ 0], d[ 8], d[16], d[24],
2453              d[32], d[40], d[48], d[56]
2454          );
2455          // constants scaled things up by 1<<12; let's bring them back
2456          // down, but keep 2 extra bits of precision
2457          x0 += 512; x1 += 512; x2 += 512; x3 += 512;
2458          v[ 0] = (x0+t3) >> 10;
2459          v[56] = (x0-t3) >> 10;
2460          v[ 8] = (x1+t2) >> 10;
2461          v[48] = (x1-t2) >> 10;
2462          v[16] = (x2+t1) >> 10;
2463          v[40] = (x2-t1) >> 10;
2464          v[24] = (x3+t0) >> 10;
2465          v[32] = (x3-t0) >> 10;
2466       }
2467    }
2468 
2469    ubyte* o = dst;
2470    for (i=0, v=val.ptr; i < 8; ++i,v+=8,o+=dst_stride) {
2471       // no fast case since the first 1D IDCT spread components out
2472       int t0,t1,t2,t3,x0,x1,x2,x3;
2473       STBI__IDCT_1D(
2474           t0, t1, t2, t3,
2475           x0, x1, x2, x3,
2476           v[0],v[1],v[2],v[3],v[4],v[5],v[6],v[7]
2477       );
2478       // constants scaled things up by 1<<12, plus we had 1<<2 from first
2479       // loop, plus horizontal and vertical each scale by sqrt(8) so together
2480       // we've got an extra 1<<3, so 1<<17 total we need to remove.
2481       // so we want to round that, which means adding 0.5 * 1<<17,
2482       // aka 65536. Also, we'll end up with -128 to 127 that we want
2483       // to encode as 0-255 by adding 128, so we'll add that before the shift
2484       x0 += 65536 + (128<<17);
2485       x1 += 65536 + (128<<17);
2486       x2 += 65536 + (128<<17);
2487       x3 += 65536 + (128<<17);
2488       // tried computing the shifts into temps, or'ing the temps to see
2489       // if any were out of range, but that was slower
2490       o[0] = stbi__clamp((x0+t3) >> 17);
2491       o[7] = stbi__clamp((x0-t3) >> 17);
2492       o[1] = stbi__clamp((x1+t2) >> 17);
2493       o[6] = stbi__clamp((x1-t2) >> 17);
2494       o[2] = stbi__clamp((x2+t1) >> 17);
2495       o[5] = stbi__clamp((x2-t1) >> 17);
2496       o[3] = stbi__clamp((x3+t0) >> 17);
2497       o[4] = stbi__clamp((x3-t0) >> 17);
2498    }
2499 }
2500 
2501 // clamp to 0-255
2502 pure ubyte stbi__clamp(int x) {
2503    if (cast(uint) x > 255) {
2504       if (x < 0) return 0;
2505       if (x > 255) return 255;
2506    }
2507    return cast(ubyte) x;
2508 }
2509 
2510 // the above is adapted from stb_image
2511 // ------------------------------------------------------------
2512 
2513 ///
2514 public void read_jpeg_info(in char[] filename, out long w, out long h, out long chans) {
2515     scope reader = new FileReader(filename);
2516     return read_jpeg_info(reader, w, h, chans);
2517 }
2518 
2519 void read_jpeg_info(Reader stream, out long w, out long h, out long chans) {
2520     ubyte[2] marker = void;
2521     stream.readExact(marker, 2);
2522 
2523     // SOI
2524     if (marker[0..2] != [0xff, 0xd8])
2525         throw new ImageIOException("not JPEG");
2526 
2527     while (true) {
2528         stream.readExact(marker, 2);
2529 
2530         if (marker[0] != 0xff)
2531             throw new ImageIOException("no frame header");
2532         while (marker[1] == 0xff)
2533             stream.readExact(marker[1..$], 1);
2534 
2535         enum SKIP = 0xff;
2536         switch (marker[1]) with (Marker) {
2537             case SOF0: .. case SOF3: goto case;
2538             case SOF9: .. case SOF11:
2539                 ubyte[8] tmp;
2540                 stream.readExact(tmp[0..8], 8);
2541                 //int len = bigEndianToNative!ushort(tmp[0..2]);
2542                 w = bigEndianToNative!ushort(tmp[5..7]);
2543                 h = bigEndianToNative!ushort(tmp[3..5]);
2544                 chans = tmp[7];
2545                 return;
2546             case SOS, EOI: throw new ImageIOException("no frame header");
2547             case DRI, DHT, DQT, COM: goto case SKIP;
2548             case APP0: .. case APPf: goto case SKIP;
2549             case SKIP:
2550                 ubyte[2] lenbuf = void;
2551                 stream.readExact(lenbuf, 2);
2552                 int skiplen = bigEndianToNative!ushort(lenbuf) - 2;
2553                 stream.seek(skiplen, SEEK_CUR);
2554                 break;
2555             default: throw new ImageIOException("unsupported marker");
2556         }
2557     }
2558     assert(0);
2559 }
2560 
2561 // --------------------------------------------------------------------------------
2562 // Conversions
2563 
2564 enum _ColFmt : int {
2565     Unknown = 0,
2566     Y = 1,
2567     YA,
2568     RGB,
2569     RGBA,
2570     BGR,
2571     BGRA,
2572 }
2573 
2574 alias LineConv = void function(in ubyte[] src, ubyte[] tgt);
2575 
2576 LineConv get_converter(long src_chans, long tgt_chans) pure {
2577     long combo(long a, long b) pure nothrow { return a*16 + b; }
2578 
2579     if (src_chans == tgt_chans)
2580         return &copy_line;
2581 
2582     switch (combo(src_chans, tgt_chans)) with (_ColFmt) {
2583         case combo(Y, YA)      : return &Y_to_YA;
2584         case combo(Y, RGB)     : return &Y_to_RGB;
2585         case combo(Y, RGBA)    : return &Y_to_RGBA;
2586         case combo(Y, BGR)     : return &Y_to_BGR;
2587         case combo(Y, BGRA)    : return &Y_to_BGRA;
2588         case combo(YA, Y)      : return &YA_to_Y;
2589         case combo(YA, RGB)    : return &YA_to_RGB;
2590         case combo(YA, RGBA)   : return &YA_to_RGBA;
2591         case combo(YA, BGR)    : return &YA_to_BGR;
2592         case combo(YA, BGRA)   : return &YA_to_BGRA;
2593         case combo(RGB, Y)     : return &RGB_to_Y;
2594         case combo(RGB, YA)    : return &RGB_to_YA;
2595         case combo(RGB, RGBA)  : return &RGB_to_RGBA;
2596         case combo(RGB, BGR)   : return &RGB_to_BGR;
2597         case combo(RGB, BGRA)  : return &RGB_to_BGRA;
2598         case combo(RGBA, Y)    : return &RGBA_to_Y;
2599         case combo(RGBA, YA)   : return &RGBA_to_YA;
2600         case combo(RGBA, RGB)  : return &RGBA_to_RGB;
2601         case combo(RGBA, BGR)  : return &RGBA_to_BGR;
2602         case combo(RGBA, BGRA) : return &RGBA_to_BGRA;
2603         case combo(BGR, Y)     : return &BGR_to_Y;
2604         case combo(BGR, YA)    : return &BGR_to_YA;
2605         case combo(BGR, RGB)   : return &BGR_to_RGB;
2606         case combo(BGR, RGBA)  : return &BGR_to_RGBA;
2607         case combo(BGRA, Y)    : return &BGRA_to_Y;
2608         case combo(BGRA, YA)   : return &BGRA_to_YA;
2609         case combo(BGRA, RGB)  : return &BGRA_to_RGB;
2610         case combo(BGRA, RGBA) : return &BGRA_to_RGBA;
2611         default                : throw new ImageIOException("internal error");
2612     }
2613 }
2614 
2615 void copy_line(in ubyte[] src, ubyte[] tgt) pure nothrow {
2616     tgt[0..$] = src[0..$];
2617 }
2618 
2619 ubyte luminance(ubyte r, ubyte g, ubyte b) pure nothrow {
2620     return cast(ubyte) (0.21*r + 0.64*g + 0.15*b); // somewhat arbitrary weights
2621 }
2622 
2623 void Y_to_YA(in ubyte[] src, ubyte[] tgt) pure nothrow {
2624     for (size_t k, t;   k < src.length;   k+=1, t+=2) {
2625         tgt[t] = src[k];
2626         tgt[t+1] = 255;
2627     }
2628 }
2629 
2630 alias Y_to_BGR = Y_to_RGB;
2631 void Y_to_RGB(in ubyte[] src, ubyte[] tgt) pure nothrow {
2632     for (size_t k, t;   k < src.length;   k+=1, t+=3)
2633         tgt[t .. t+3] = src[k];
2634 }
2635 
2636 alias Y_to_BGRA = Y_to_RGBA;
2637 void Y_to_RGBA(in ubyte[] src, ubyte[] tgt) pure nothrow {
2638     for (size_t k, t;   k < src.length;   k+=1, t+=4) {
2639         tgt[t .. t+3] = src[k];
2640         tgt[t+3] = 255;
2641     }
2642 }
2643 
2644 void YA_to_Y(in ubyte[] src, ubyte[] tgt) pure nothrow {
2645     for (size_t k, t;   k < src.length;   k+=2, t+=1)
2646         tgt[t] = src[k];
2647 }
2648 
2649 alias YA_to_BGR = YA_to_RGB;
2650 void YA_to_RGB(in ubyte[] src, ubyte[] tgt) pure nothrow {
2651     for (size_t k, t;   k < src.length;   k+=2, t+=3)
2652         tgt[t .. t+3] = src[k];
2653 }
2654 
2655 alias YA_to_BGRA = YA_to_RGBA;
2656 void YA_to_RGBA(in ubyte[] src, ubyte[] tgt) pure nothrow {
2657     for (size_t k, t;   k < src.length;   k+=2, t+=4) {
2658         tgt[t .. t+3] = src[k];
2659         tgt[t+3] = src[k+1];
2660     }
2661 }
2662 
2663 void RGB_to_Y(in ubyte[] src, ubyte[] tgt) pure nothrow {
2664     for (size_t k, t;   k < src.length;   k+=3, t+=1)
2665         tgt[t] = luminance(src[k], src[k+1], src[k+2]);
2666 }
2667 
2668 void RGB_to_YA(in ubyte[] src, ubyte[] tgt) pure nothrow {
2669     for (size_t k, t;   k < src.length;   k+=3, t+=2) {
2670         tgt[t] = luminance(src[k], src[k+1], src[k+2]);
2671         tgt[t+1] = 255;
2672     }
2673 }
2674 
2675 void RGB_to_RGBA(in ubyte[] src, ubyte[] tgt) pure nothrow {
2676     for (size_t k, t;   k < src.length;   k+=3, t+=4) {
2677         tgt[t .. t+3] = src[k .. k+3];
2678         tgt[t+3] = 255;
2679     }
2680 }
2681 
2682 void RGBA_to_Y(in ubyte[] src, ubyte[] tgt) pure nothrow {
2683     for (size_t k, t;   k < src.length;   k+=4, t+=1)
2684         tgt[t] = luminance(src[k], src[k+1], src[k+2]);
2685 }
2686 
2687 void RGBA_to_YA(in ubyte[] src, ubyte[] tgt) pure nothrow {
2688     for (size_t k, t;   k < src.length;   k+=4, t+=2) {
2689         tgt[t] = luminance(src[k], src[k+1], src[k+2]);
2690         tgt[t+1] = src[k+3];
2691     }
2692 }
2693 
2694 void RGBA_to_RGB(in ubyte[] src, ubyte[] tgt) pure nothrow {
2695     for (size_t k, t;   k < src.length;   k+=4, t+=3)
2696         tgt[t .. t+3] = src[k .. k+3];
2697 }
2698 
2699 void BGR_to_Y(in ubyte[] src, ubyte[] tgt) pure nothrow {
2700     for (size_t k, t;   k < src.length;   k+=3, t+=1)
2701         tgt[t] = luminance(src[k+2], src[k+1], src[k+1]);
2702 }
2703 
2704 void BGR_to_YA(in ubyte[] src, ubyte[] tgt) pure nothrow {
2705     for (size_t k, t;   k < src.length;   k+=3, t+=2) {
2706         tgt[t] = luminance(src[k+2], src[k+1], src[k+1]);
2707         tgt[t+1] = 255;
2708     }
2709 }
2710 
2711 alias RGB_to_BGR = BGR_to_RGB;
2712 void BGR_to_RGB(in ubyte[] src, ubyte[] tgt) pure nothrow {
2713     for (size_t k;   k < src.length;   k+=3) {
2714         tgt[k  ] = src[k+2];
2715         tgt[k+1] = src[k+1];
2716         tgt[k+2] = src[k  ];
2717     }
2718 }
2719 
2720 alias RGB_to_BGRA = BGR_to_RGBA;
2721 void BGR_to_RGBA(in ubyte[] src, ubyte[] tgt) pure nothrow {
2722     for (size_t k, t;   k < src.length;   k+=3, t+=4) {
2723         tgt[t  ] = src[k+2];
2724         tgt[t+1] = src[k+1];
2725         tgt[t+2] = src[k  ];
2726         tgt[t+3] = 255;
2727     }
2728 }
2729 
2730 void BGRA_to_Y(in ubyte[] src, ubyte[] tgt) pure nothrow {
2731     for (size_t k, t;   k < src.length;   k+=4, t+=1)
2732         tgt[t] = luminance(src[k+2], src[k+1], src[k]);
2733 }
2734 
2735 void BGRA_to_YA(in ubyte[] src, ubyte[] tgt) pure nothrow {
2736     for (size_t k, t;   k < src.length;   k+=4, t+=2) {
2737         tgt[t] = luminance(src[k+2], src[k+1], src[k]);
2738         tgt[t+1] = 255;
2739     }
2740 }
2741 
2742 alias RGBA_to_BGR = BGRA_to_RGB;
2743 void BGRA_to_RGB(in ubyte[] src, ubyte[] tgt) pure nothrow {
2744     for (size_t k, t;   k < src.length;   k+=4, t+=3) {
2745         tgt[t  ] = src[k+2];
2746         tgt[t+1] = src[k+1];
2747         tgt[t+2] = src[k  ];
2748     }
2749 }
2750 
2751 alias RGBA_to_BGRA = BGRA_to_RGBA;
2752 void BGRA_to_RGBA(in ubyte[] src, ubyte[] tgt) pure nothrow {
2753     for (size_t k, t;   k < src.length;   k+=4, t+=4) {
2754         tgt[t  ] = src[k+2];
2755         tgt[t+1] = src[k+1];
2756         tgt[t+2] = src[k  ];
2757         tgt[t+3] = src[k+3];
2758     }
2759 }
2760 
2761 // --------------------------------------------------------------------------------
2762 
2763 interface Reader {
2764     void readExact(ubyte[], size_t);
2765     void seek(ptrdiff_t, int);
2766 }
2767 
2768 interface Writer {
2769     void rawWrite(in ubyte[]);
2770     void flush();
2771 }
2772 
2773 class FileReader : Reader {
2774     this(in char[] filename) {
2775         this(File(filename.idup, "rb"));
2776     }
2777 
2778     this(File f) {
2779         if (!f.isOpen) throw new ImageIOException("File not open");
2780         this.f = f;
2781     }
2782 
2783     void readExact(ubyte[] buffer, size_t bytes) {
2784         auto slice = this.f.rawRead(buffer[0..bytes]);
2785         if (slice.length != bytes)
2786             throw new Exception("not enough data");
2787     }
2788 
2789     void seek(ptrdiff_t offset, int origin) { this.f.seek(offset, origin); }
2790 
2791     private File f;
2792 }
2793 
2794 class MemReader : Reader {
2795     this(in ubyte[] source) {
2796         this.source = source;
2797     }
2798 
2799     void readExact(ubyte[] buffer, size_t bytes) {
2800         if (source.length - cursor < bytes)
2801             throw new Exception("not enough data");
2802         buffer[0..bytes] = source[cursor .. cursor+bytes];
2803         cursor += bytes;
2804     }
2805 
2806     void seek(ptrdiff_t offset, int origin) {
2807         switch (origin) {
2808             case SEEK_SET:
2809                 if (offset < 0 || source.length <= offset)
2810                     throw new Exception("seek error");
2811                 cursor = offset;
2812                 break;
2813             case SEEK_CUR:
2814                 ptrdiff_t dst = cursor + offset;
2815                 if (dst < 0 || source.length <= dst)
2816                     throw new Exception("seek error");
2817                 cursor = dst;
2818                 break;
2819             case SEEK_END:
2820                 if (0 <= offset || source.length < -offset)
2821                     throw new Exception("seek error");
2822                 cursor = cast(ptrdiff_t) source.length + offset;
2823                 break;
2824             default: assert(0);
2825         }
2826     }
2827 
2828     private const ubyte[] source;
2829     private ptrdiff_t cursor;
2830 }
2831 
2832 class FileWriter : Writer {
2833     this(in char[] filename) {
2834         this(File(filename.idup, "wb"));
2835     }
2836 
2837     this(File f) {
2838         if (!f.isOpen) throw new ImageIOException("File not open");
2839         this.f = f;
2840     }
2841 
2842     void rawWrite(in ubyte[] block) { this.f.rawWrite(block); }
2843     void flush() { this.f.flush(); }
2844 
2845     private File f;
2846 }
2847 
2848 class MemWriter : Writer {
2849     this() { }
2850 
2851     ubyte[] result() { return buffer; }
2852 
2853     void rawWrite(in ubyte[] block) { this.buffer ~= block; }
2854     void flush() { }
2855 
2856     private ubyte[] buffer;
2857 }
2858 
2859 const(char)[] extract_extension_lowercase(in char[] filename) {
2860     ptrdiff_t di = filename.lastIndexOf('.');
2861     return (0 < di && di+1 < filename.length) ? filename[di+1..$].toLower() : "";
2862 }