1 // Copyright (c) 2014 Tero Hänninen
2 // Boost Software License - Version 1.0 - August 17th, 2003
3 module imageformats;
4 
5 import std.algorithm;   // min, reverse
6 import std.bitmanip;   // endianness stuff
7 import std.stdio;    // File
8 import std.string;  // toLower, lastIndexOf
9 
10 struct IFImage {
11     long        w, h;
12     ColFmt      c;
13     ubyte[]     pixels;
14 }
15 
16 enum ColFmt {
17     Y = 1,
18     YA = 2,
19     RGB = 3,
20     RGBA = 4,
21 }
22 
23 IFImage read_image(in char[] file, long req_chans = 0) {
24     const(char)[] ext = extract_extension_lowercase(file);
25 
26     IFImage function(Reader, long) read_image;
27     switch (ext) {
28         case "png": read_image = &read_png; break;
29         case "tga": read_image = &read_tga; break;
30         case "bmp": read_image = &read_bmp; break;
31         case "jpg": read_image = &read_jpeg; break;
32         case "jpeg": read_image = &read_jpeg; break;
33         default: throw new ImageIOException("unknown image extension/type");
34     }
35     scope reader = new Reader(file);
36     return read_image(reader, req_chans);
37 }
38 
39 void write_image(in char[] file, long w, long h, in ubyte[] data, long req_chans = 0) {
40     const(char)[] ext = extract_extension_lowercase(file);
41 
42     void function(Writer, long, long, in ubyte[], long) write_image;
43     switch (ext) {
44         case "png": write_image = &write_png; break;
45         case "tga": write_image = &write_tga; break;
46         default: throw new ImageIOException("unknown image extension/type");
47     }
48     scope writer = new Writer(file);
49     write_image(writer, w, h, data, req_chans);
50 }
51 
52 // chans is set to zero if num of channels is unknown
53 void read_image_info(in char[] file, out long w, out long h, out long chans) {
54     const(char)[] ext = extract_extension_lowercase(file);
55 
56     void function(Reader, out long, out long, out long) read_image_info;
57     switch (ext) {
58         case "png": read_image_info = &read_png_info; break;
59         case "tga": read_image_info = &read_tga_info; break;
60         case "bmp": read_image_info = &read_bmp_info; break;
61         case "jpg": read_image_info = &read_jpeg_info; break;
62         case "jpeg": read_image_info = &read_jpeg_info; break;
63         default: throw new ImageIOException("unknown image extension/type");
64     }
65     scope reader = new Reader(file);
66     return read_image_info(reader, w, h, chans);
67 }
68 
69 class ImageIOException : Exception {
70    @safe pure const
71    this(string msg, string file = __FILE__, size_t line = __LINE__) {
72        super(msg, file, line);
73    }
74 }
75 
76 // From here, things are private by default and public only explicitly.
77 private:
78 
79 // --------------------------------------------------------------------------------
80 // PNG
81 
82 import std.digest.crc;
83 import std.zlib;
84 
85 public struct PNG_Header {
86     int     width;
87     int     height;
88     ubyte   bit_depth;
89     ubyte   color_type;
90     ubyte   compression_method;
91     ubyte   filter_method;
92     ubyte   interlace_method;
93 }
94 
95 public PNG_Header read_png_header(in char[] filename) {
96     scope reader = new Reader(filename);
97     return read_png_header(reader);
98 }
99 
100 PNG_Header read_png_header(Reader stream) {
101     ubyte[33] tmp = void;  // file header, IHDR len+type+data+crc
102     stream.readExact(tmp, tmp.length);
103 
104     ubyte[4] crc = crc32Of(tmp[12..29]);
105     reverse(crc[]);
106     if ( tmp[0..8] != png_file_header[0..$]              ||
107          tmp[8..16] != [0x0,0x0,0x0,0xd,'I','H','D','R'] ||
108          crc != tmp[29..33] )
109         throw new ImageIOException("corrupt header");
110 
111     PNG_Header header = {
112         width              : bigEndianToNative!int(tmp[16..20]),
113         height             : bigEndianToNative!int(tmp[20..24]),
114         bit_depth          : tmp[24],
115         color_type         : tmp[25],
116         compression_method : tmp[26],
117         filter_method      : tmp[27],
118         interlace_method   : tmp[28],
119     };
120     return header;
121 }
122 
123 public IFImage read_png(in char[] filename, long req_chans = 0) {
124     scope reader = new Reader(filename);
125     return read_png(reader, req_chans);
126 }
127 
128 public IFImage read_png_from_mem(in ubyte[] source, long req_chans = 0) {
129     scope reader = new Reader(source);
130     return read_png(reader, req_chans);
131 }
132 
133 IFImage read_png(Reader stream, long req_chans = 0) {
134     if (req_chans < 0 || 4 < req_chans)
135         throw new ImageIOException("come on...");
136 
137     PNG_Header hdr = read_png_header(stream);
138 
139     if (hdr.width < 1 || hdr.height < 1 || int.max < cast(ulong) hdr.width * hdr.height)
140         throw new ImageIOException("invalid dimensions");
141     if (hdr.bit_depth != 8)
142         throw new ImageIOException("only 8-bit images supported");
143     if (! (hdr.color_type == PNG_ColorType.Y    ||
144            hdr.color_type == PNG_ColorType.RGB  ||
145            hdr.color_type == PNG_ColorType.Idx  ||
146            hdr.color_type == PNG_ColorType.YA   ||
147            hdr.color_type == PNG_ColorType.RGBA) )
148         throw new ImageIOException("color type not supported");
149     if (hdr.compression_method != 0 || hdr.filter_method != 0 ||
150         (hdr.interlace_method != 0 && hdr.interlace_method != 1))
151         throw new ImageIOException("not supported");
152 
153     PNG_Decoder dc = {
154         stream      : stream,
155         src_indexed : (hdr.color_type == PNG_ColorType.Idx),
156         src_chans   : channels(cast(PNG_ColorType) hdr.color_type),
157         ilace       : hdr.interlace_method,
158         w           : hdr.width,
159         h           : hdr.height,
160     };
161     dc.tgt_chans = (req_chans == 0) ? dc.src_chans : cast(int) req_chans;
162 
163     IFImage result = {
164         w      : dc.w,
165         h      : dc.h,
166         c      : cast(ColFmt) dc.tgt_chans,
167         pixels : decode_png(dc)
168     };
169     return result;
170 }
171 
172 public void write_png(in char[] file, long w, long h, in ubyte[] data, long tgt_chans = 0)
173 {
174     scope writer = new Writer(file);
175     write_png(writer, w, h, data, tgt_chans);
176 }
177 
178 public ubyte[] write_png_to_mem(long w, long h, in ubyte[] data, long tgt_chans = 0) {
179     scope writer = new Writer();
180     write_png(writer, w, h, data, tgt_chans);
181     return writer.result;
182 }
183 
184 immutable ubyte[8] png_file_header =
185     [0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a];
186 
187 int channels(PNG_ColorType ct) pure nothrow {
188     final switch (ct) with (PNG_ColorType) {
189         case Y: return 1;
190         case RGB, Idx: return 3;
191         case YA: return 2;
192         case RGBA: return 4;
193     }
194 }
195 
196 PNG_ColorType color_type(long channels) pure nothrow {
197     switch (channels) {
198         case 1: return PNG_ColorType.Y;
199         case 2: return PNG_ColorType.YA;
200         case 3: return PNG_ColorType.RGB;
201         case 4: return PNG_ColorType.RGBA;
202         default: assert(0);
203     }
204 }
205 
206 struct PNG_Decoder {
207     Reader stream;
208     bool src_indexed;
209     int src_chans;
210     int tgt_chans;
211     size_t w, h;
212     ubyte ilace;
213 
214     UnCompress uc;
215     CRC32 crc;
216     ubyte[12] chunkmeta;  // crc | length and type
217     ubyte[] read_buf;
218     ubyte[] uc_buf;     // uncompressed
219     ubyte[] palette;
220 }
221 
222 ubyte[] decode_png(ref PNG_Decoder dc) {
223     dc.uc = new UnCompress(HeaderFormat.deflate);
224     dc.read_buf = new ubyte[4096];
225 
226     enum Stage {
227         IHDR_parsed,
228         PLTE_parsed,
229         IDAT_parsed,
230         IEND_parsed,
231     }
232 
233     ubyte[] result;
234     auto stage = Stage.IHDR_parsed;
235     dc.stream.readExact(dc.chunkmeta[4..$], 8);  // next chunk's len and type
236 
237     while (stage != Stage.IEND_parsed) {
238         int len = bigEndianToNative!int(dc.chunkmeta[4..8]);
239         if (len < 0)
240             throw new ImageIOException("chunk too long");
241 
242         // standard allows PLTE chunk for non-indexed images too but we don't
243         dc.crc.put(dc.chunkmeta[8..12]);  // type
244         switch (cast(char[]) dc.chunkmeta[8..12]) {    // chunk type
245             case "IDAT":
246                 if (! (stage == Stage.IHDR_parsed ||
247                       (stage == Stage.PLTE_parsed && dc.src_indexed)) )
248                     throw new ImageIOException("corrupt chunk stream");
249                 result = read_IDAT_stream(dc, len);
250                 stage = Stage.IDAT_parsed;
251                 break;
252             case "PLTE":
253                 if (stage != Stage.IHDR_parsed)
254                     throw new ImageIOException("corrupt chunk stream");
255                 int entries = len / 3;
256                 if (len % 3 != 0 || 256 < entries)
257                     throw new ImageIOException("corrupt chunk");
258                 dc.palette = new ubyte[len];
259                 dc.stream.readExact(dc.palette, dc.palette.length);
260                 dc.crc.put(dc.palette);
261                 dc.stream.readExact(dc.chunkmeta, 12); // crc | len, type
262                 ubyte[4] crc = dc.crc.finish;
263                 reverse(crc[]);
264                 if (crc != dc.chunkmeta[0..4])
265                     throw new ImageIOException("corrupt chunk");
266                 stage = Stage.PLTE_parsed;
267                 break;
268             case "IEND":
269                 if (stage != Stage.IDAT_parsed)
270                     throw new ImageIOException("corrupt chunk stream");
271                 dc.stream.readExact(dc.chunkmeta, 4); // crc
272                 if (len != 0 || dc.chunkmeta[0..4] != [0xae, 0x42, 0x60, 0x82])
273                     throw new ImageIOException("corrupt chunk");
274                 stage = Stage.IEND_parsed;
275                 break;
276             case "IHDR":
277                 throw new ImageIOException("corrupt chunk stream");
278             default:
279                 // unknown chunk, ignore but check crc
280                 while (0 < len) {
281                     size_t bytes = min(len, dc.read_buf.length);
282                     dc.stream.readExact(dc.read_buf, bytes);
283                     len -= bytes;
284                     dc.crc.put(dc.read_buf[0..bytes]);
285                 }
286                 dc.stream.readExact(dc.chunkmeta, 12); // crc | len, type
287                 ubyte[4] crc = dc.crc.finish;
288                 reverse(crc[]);
289                 if (crc != dc.chunkmeta[0..4])
290                     throw new ImageIOException("corrupt chunk");
291         }
292     }
293 
294     return result;
295 }
296 
297 enum PNG_ColorType : ubyte {
298     Y    = 0,
299     RGB  = 2,
300     Idx  = 3,
301     YA   = 4,
302     RGBA = 6,
303 }
304 
305 enum PNG_FilterType : ubyte {
306     None    = 0,
307     Sub     = 1,
308     Up      = 2,
309     Average = 3,
310     Paeth   = 4,
311 }
312 
313 enum InterlaceMethod {
314     None = 0, Adam7 = 1
315 }
316 
317 ubyte[] read_IDAT_stream(ref PNG_Decoder dc, int len) {
318     bool metaready = false;     // chunk len, type, crc
319 
320     immutable uint filter_step = dc.src_indexed ? 1 : dc.src_chans;
321     immutable size_t tgt_linesize = cast(size_t) (dc.w * dc.tgt_chans);
322 
323     ubyte[] depaletted_line = dc.src_indexed ? new ubyte[cast(size_t)dc.w * 3] : null;
324     ubyte[] result = new ubyte[cast(size_t)(dc.w * dc.h * dc.tgt_chans)];
325 
326     const LineConv chan_convert = get_converter(dc.src_chans, dc.tgt_chans);
327 
328     void depalette_convert(in ubyte[] src_line, ubyte[] tgt_line) {
329         for (size_t s, d;  s < src_line.length;  s+=1, d+=3) {
330             size_t pidx = src_line[s] * 3;
331             if (dc.palette.length < pidx + 3)
332                 throw new ImageIOException("palette idx wrong");
333             depaletted_line[d .. d+3] = dc.palette[pidx .. pidx+3];
334         }
335         chan_convert(depaletted_line[0 .. src_line.length*3], tgt_line);
336     }
337 
338     void simple_convert(in ubyte[] src_line, ubyte[] tgt_line) {
339         chan_convert(src_line, tgt_line);
340     }
341 
342     const convert = dc.src_indexed ? &depalette_convert : &simple_convert;
343 
344     if (dc.ilace == InterlaceMethod.None) {
345         immutable size_t src_sl_size = cast(size_t) dc.w * filter_step;
346         auto cline = new ubyte[src_sl_size+1];   // current line + filter byte
347         auto pline = new ubyte[src_sl_size+1];   // previous line, inited to 0
348         debug(DebugPNG) assert(pline[0] == 0);
349 
350         size_t tgt_si = 0;    // scanline index in target buffer
351         foreach (j; 0 .. dc.h) {
352             uncompress_line(dc, len, metaready, cline);
353             ubyte filter_type = cline[0];
354 
355             recon(cline[1..$], pline[1..$], filter_type, filter_step);
356             convert(cline[1 .. $], result[tgt_si .. tgt_si + tgt_linesize]);
357             tgt_si += tgt_linesize;
358 
359             ubyte[] _swap = pline;
360             pline = cline;
361             cline = _swap;
362         }
363     } else {
364         // Adam7 interlacing
365 
366         immutable size_t[7] redw = [
367             (dc.w + 7) / 8,
368             (dc.w + 3) / 8,
369             (dc.w + 3) / 4,
370             (dc.w + 1) / 4,
371             (dc.w + 1) / 2,
372             (dc.w + 0) / 2,
373             (dc.w + 0) / 1,
374         ];
375         immutable size_t[7] redh = [
376             (dc.h + 7) / 8,
377             (dc.h + 7) / 8,
378             (dc.h + 3) / 8,
379             (dc.h + 3) / 4,
380             (dc.h + 1) / 4,
381             (dc.h + 1) / 2,
382             (dc.h + 0) / 2,
383         ];
384 
385         const size_t max_scanline_size = cast(size_t) (dc.w * filter_step);
386         const linebuf0 = new ubyte[max_scanline_size+1]; // +1 for filter type byte
387         const linebuf1 = new ubyte[max_scanline_size+1]; // +1 for filter type byte
388         auto redlinebuf = new ubyte[cast(size_t) dc.w * dc.tgt_chans];
389 
390         foreach (pass; 0 .. 7) {
391             const A7_Catapult tgt_px = a7_catapults[pass];   // target pixel
392             const size_t src_linesize = redw[pass] * filter_step;
393             auto cline = cast(ubyte[]) linebuf0[0 .. src_linesize+1];
394             auto pline = cast(ubyte[]) linebuf1[0 .. src_linesize+1];
395 
396             foreach (j; 0 .. redh[pass]) {
397                 uncompress_line(dc, len, metaready, cline);
398                 ubyte filter_type = cline[0];
399 
400                 recon(cline[1..$], pline[1..$], filter_type, filter_step);
401                 convert(cline[1 .. $], redlinebuf[0 .. redw[pass]*dc.tgt_chans]);
402 
403                 for (size_t i, redi; i < redw[pass]; ++i, redi += dc.tgt_chans) {
404                     size_t tgt = tgt_px(i, j, dc.w) * dc.tgt_chans;
405                     result[tgt .. tgt + dc.tgt_chans] =
406                         redlinebuf[redi .. redi + dc.tgt_chans];
407                 }
408 
409                 ubyte[] _swap = pline;
410                 pline = cline;
411                 cline = _swap;
412             }
413         }
414     }
415 
416     if (!metaready) {
417         dc.stream.readExact(dc.chunkmeta, 12);   // crc | len & type
418         ubyte[4] crc = dc.crc.finish;
419         reverse(crc[]);
420         if (crc != dc.chunkmeta[0..4])
421             throw new ImageIOException("corrupt chunk");
422     }
423     return result;
424 }
425 
426 alias A7_Catapult = size_t function(size_t redx, size_t redy, size_t dstw);
427 immutable A7_Catapult[7] a7_catapults = [
428     &a7_red1_to_dst,
429     &a7_red2_to_dst,
430     &a7_red3_to_dst,
431     &a7_red4_to_dst,
432     &a7_red5_to_dst,
433     &a7_red6_to_dst,
434     &a7_red7_to_dst,
435 ];
436 
437 pure nothrow {
438   size_t a7_red1_to_dst(size_t redx, size_t redy, size_t dstw) { return redy*8*dstw + redx*8;     }
439   size_t a7_red2_to_dst(size_t redx, size_t redy, size_t dstw) { return redy*8*dstw + redx*8+4;   }
440   size_t a7_red3_to_dst(size_t redx, size_t redy, size_t dstw) { return (redy*8+4)*dstw + redx*4; }
441   size_t a7_red4_to_dst(size_t redx, size_t redy, size_t dstw) { return redy*4*dstw + redx*4+2;   }
442   size_t a7_red5_to_dst(size_t redx, size_t redy, size_t dstw) { return (redy*4+2)*dstw + redx*2; }
443   size_t a7_red6_to_dst(size_t redx, size_t redy, size_t dstw) { return redy*2*dstw + redx*2+1;   }
444   size_t a7_red7_to_dst(size_t redx, size_t redy, size_t dstw) { return (redy*2+1)*dstw + redx;   }
445 }
446 
447 void uncompress_line(ref PNG_Decoder dc, ref int length, ref bool metaready, ubyte[] dst) {
448     size_t readysize = min(dst.length, dc.uc_buf.length);
449     dst[0 .. readysize] = dc.uc_buf[0 .. readysize];
450     dc.uc_buf = dc.uc_buf[readysize .. $];
451 
452     if (readysize == dst.length)
453         return;
454 
455     while (readysize != dst.length) {
456         // need new data for dc.uc_buf...
457         if (length <= 0) {  // IDAT is read -> read next chunks meta
458             dc.stream.readExact(dc.chunkmeta, 12);   // crc | len & type
459             ubyte[4] crc = dc.crc.finish;
460             reverse(crc[]);
461             if (crc != dc.chunkmeta[0..4])
462                 throw new ImageIOException("corrupt chunk");
463 
464             length = bigEndianToNative!int(dc.chunkmeta[4..8]);
465             if (dc.chunkmeta[8..12] != "IDAT") {
466                 // no new IDAT chunk so flush, this is the end of the IDAT stream
467                 metaready = true;
468                 dc.uc_buf = cast(ubyte[]) dc.uc.flush();
469                 size_t part2 = dst.length - readysize;
470                 if (dc.uc_buf.length < part2)
471                     throw new ImageIOException("not enough data");
472                 dst[readysize .. readysize+part2] = dc.uc_buf[0 .. part2];
473                 dc.uc_buf = dc.uc_buf[part2 .. $];
474                 return;
475             }
476             if (length <= 0)    // empty IDAT chunk
477                 throw new ImageIOException("not enough data");
478             dc.crc.put(dc.chunkmeta[8..12]);  // type
479         }
480 
481         size_t bytes = min(length, dc.read_buf.length);
482         dc.stream.readExact(dc.read_buf, bytes);
483         length -= bytes;
484         dc.crc.put(dc.read_buf[0..bytes]);
485 
486         if (bytes <= 0)
487             throw new ImageIOException("not enough data");
488 
489         dc.uc_buf = cast(ubyte[]) dc.uc.uncompress(dc.read_buf[0..bytes].dup);
490 
491         size_t part2 = min(dst.length - readysize, dc.uc_buf.length);
492         dst[readysize .. readysize+part2] = dc.uc_buf[0 .. part2];
493         dc.uc_buf = dc.uc_buf[part2 .. $];
494         readysize += part2;
495     }
496 }
497 
498 void recon(ubyte[] cline, in ubyte[] pline, ubyte ftype, int fstep) pure {
499     switch (ftype) with (PNG_FilterType) {
500         case None:
501             break;
502         case Sub:
503             foreach (k; fstep .. cline.length)
504                 cline[k] += cline[k-fstep];
505             break;
506         case Up:
507             foreach (k; 0 .. cline.length)
508                 cline[k] += pline[k];
509             break;
510         case Average:
511             foreach (k; 0 .. fstep)
512                 cline[k] += pline[k] / 2;
513             foreach (k; fstep .. cline.length)
514                 cline[k] += cast(ubyte)
515                     ((cast(uint) cline[k-fstep] + cast(uint) pline[k]) / 2);
516             break;
517         case Paeth:
518             foreach (i; 0 .. fstep)
519                 cline[i] += paeth(0, pline[i], 0);
520             foreach (i; fstep .. cline.length)
521                 cline[i] += paeth(cline[i-fstep], pline[i], pline[i-fstep]);
522             break;
523         default:
524             throw new ImageIOException("filter type not supported");
525     }
526 }
527 
528 ubyte paeth(ubyte a, ubyte b, ubyte c) pure nothrow {
529     int pc = cast(int) c;
530     int pa = cast(int) b - pc;
531     int pb = cast(int) a - pc;
532     pc = pa + pb;
533     if (pa < 0) pa = -pa;
534     if (pb < 0) pb = -pb;
535     if (pc < 0) pc = -pc;
536 
537     if (pa <= pb && pa <= pc) {
538         return a;
539     } else if (pb <= pc) {
540         return b;
541     }
542     return c;
543 }
544 
545 // ----------------------------------------------------------------------
546 // PNG encoder
547 
548 void write_png(Writer stream, long w, long h, in ubyte[] data, long tgt_chans = 0) {
549     if (w < 1 || h < 1 || int.max < w || int.max < h)
550         throw new ImageIOException("invalid dimensions");
551     uint src_chans = cast(uint) (data.length / w / h);
552     if (src_chans < 1 || 4 < src_chans || tgt_chans < 0 || 4 < tgt_chans)
553         throw new ImageIOException("invalid channel count");
554     if (src_chans * w * h != data.length)
555         throw new ImageIOException("mismatching dimensions and length");
556 
557     PNG_Encoder ec = {
558         stream    : stream,
559         w         : cast(size_t) w,
560         h         : cast(size_t) h,
561         src_chans : src_chans,
562         tgt_chans : tgt_chans ? cast(uint) tgt_chans : src_chans,
563         data      : data,
564     };
565 
566     write_png(ec);
567     stream.flush();
568 }
569 
570 struct PNG_Encoder {
571     Writer stream;
572     size_t w, h;
573     uint src_chans;
574     uint tgt_chans;
575     const(ubyte)[] data;
576 
577     CRC32 crc;
578 
579     uint writelen;      // how much written of current idat data
580     ubyte[] chunk_buf;  // len type data crc
581     ubyte[] data_buf;   // slice of chunk_buf, for just chunk data
582 }
583 
584 void write_png(ref PNG_Encoder ec) {
585     ubyte[33] hdr = void;
586     hdr[ 0 ..  8] = png_file_header;
587     hdr[ 8 .. 16] = [0x0, 0x0, 0x0, 0xd, 'I','H','D','R'];
588     hdr[16 .. 20] = nativeToBigEndian(cast(uint) ec.w);
589     hdr[20 .. 24] = nativeToBigEndian(cast(uint) ec.h);
590     hdr[24      ] = 8;  // bit depth
591     hdr[25      ] = color_type(ec.tgt_chans);
592     hdr[26 .. 29] = 0;  // compression, filter and interlace methods
593     ec.crc.start();
594     ec.crc.put(hdr[12 .. 29]);
595     ubyte[4] crc = ec.crc.finish();
596     reverse(crc[]);
597     hdr[29 .. 33] = crc;
598     ec.stream.rawWrite(hdr);
599 
600     write_IDATs(ec);
601 
602     static immutable ubyte[12] iend =
603         [0, 0, 0, 0, 'I','E','N','D', 0xae, 0x42, 0x60, 0x82];
604     ec.stream.rawWrite(iend);
605 }
606 
607 void write_IDATs(ref PNG_Encoder ec) {
608     static immutable ubyte[4] IDAT_type = ['I','D','A','T'];
609     immutable long max_idatlen = 4 * 4096;
610     ec.writelen = 0;
611     ec.chunk_buf = new ubyte[8 + max_idatlen + 4];
612     ec.data_buf = ec.chunk_buf[8 .. 8 + max_idatlen];
613     ec.chunk_buf[4 .. 8] = IDAT_type;
614 
615     immutable size_t linesize = ec.w * ec.tgt_chans + 1; // +1 for filter type
616     ubyte[] cline = new ubyte[linesize];
617     ubyte[] pline = new ubyte[linesize];
618     debug(DebugPNG) assert(pline[0] == 0);
619 
620     ubyte[] filtered_line = new ubyte[linesize];
621     ubyte[] filtered_image;
622 
623     const LineConv convert = get_converter(ec.src_chans, ec.tgt_chans);
624 
625     immutable size_t filter_step = ec.tgt_chans;   // step between pixels, in bytes
626     immutable size_t src_linesize = ec.w * ec.src_chans;
627 
628     size_t si = 0;
629     foreach (j; 0 .. ec.h) {
630         convert(ec.data[si .. si+src_linesize], cline[1..$]);
631         si += src_linesize;
632 
633         foreach (i; 1 .. filter_step+1)
634             filtered_line[i] = cast(ubyte) (cline[i] - paeth(0, pline[i], 0));
635         foreach (i; filter_step+1 .. cline.length)
636             filtered_line[i] = cast(ubyte)
637                 (cline[i] - paeth(cline[i-filter_step], pline[i], pline[i-filter_step]));
638 
639         filtered_line[0] = PNG_FilterType.Paeth;
640 
641         filtered_image ~= filtered_line;
642 
643         ubyte[] _swap = pline;
644         pline = cline;
645         cline = _swap;
646     }
647 
648     const (void)[] xx = compress(filtered_image, 6);
649 
650     ec.write_to_IDAT_stream(xx);
651     if (0 < ec.writelen)
652         ec.write_IDAT_chunk();
653 }
654 
655 void write_to_IDAT_stream(ref PNG_Encoder ec, in void[] _compressed) {
656     ubyte[] compressed = cast(ubyte[]) _compressed;
657     while (compressed.length) {
658         size_t space_left = ec.data_buf.length - ec.writelen;
659         size_t writenow_len = min(space_left, compressed.length);
660         ec.data_buf[ec.writelen .. ec.writelen + writenow_len] =
661             compressed[0 .. writenow_len];
662         ec.writelen += writenow_len;
663         compressed = compressed[writenow_len .. $];
664         if (ec.writelen == ec.data_buf.length)
665             ec.write_IDAT_chunk();
666     }
667 }
668 
669 // chunk: len type data crc, type is already in buf
670 void write_IDAT_chunk(ref PNG_Encoder ec) {
671     ec.chunk_buf[0 .. 4] = nativeToBigEndian!uint(ec.writelen);
672     ec.crc.put(ec.chunk_buf[4 .. 8 + ec.writelen]);   // crc of type and data
673     ubyte[4] crc = ec.crc.finish();
674     reverse(crc[]);
675     ec.chunk_buf[8 + ec.writelen .. 8 + ec.writelen + 4] = crc;
676     ec.stream.rawWrite(ec.chunk_buf[0 .. 8 + ec.writelen + 4]);
677     ec.writelen = 0;
678 }
679 
680 void read_png_info(Reader stream, out long w, out long h, out long chans) {
681     PNG_Header hdr = read_png_header(stream);
682     w = hdr.width;
683     h = hdr.height;
684     chans = channels(cast(PNG_ColorType) hdr.color_type);
685 }
686 
687 // --------------------------------------------------------------------------------
688 // TGA
689 
690 public struct TGA_Header {
691    ubyte id_length;
692    ubyte palette_type;
693    ubyte data_type;
694    ushort palette_start;
695    ushort palette_length;
696    ubyte palette_bits;
697    ushort x_origin;
698    ushort y_origin;
699    ushort width;
700    ushort height;
701    ubyte bits_pp;
702    ubyte flags;
703 }
704 
705 public TGA_Header read_tga_header(in char[] filename) {
706     scope reader = new Reader(filename);
707     return read_tga_header(reader);
708 }
709 
710 TGA_Header read_tga_header(Reader stream) {
711     ubyte[18] tmp = void;
712     stream.readExact(tmp, tmp.length);
713 
714     TGA_Header header = {
715         id_length       : tmp[0],
716         palette_type    : tmp[1],
717         data_type       : tmp[2],
718         palette_start   : littleEndianToNative!ushort(tmp[3..5]),
719         palette_length  : littleEndianToNative!ushort(tmp[5..7]),
720         palette_bits    : tmp[7],
721         x_origin        : littleEndianToNative!ushort(tmp[8..10]),
722         y_origin        : littleEndianToNative!ushort(tmp[10..12]),
723         width           : littleEndianToNative!ushort(tmp[12..14]),
724         height          : littleEndianToNative!ushort(tmp[14..16]),
725         bits_pp         : tmp[16],
726         flags           : tmp[17],
727     };
728     return header;
729 }
730 
731 public IFImage read_tga(in char[] filename, long req_chans = 0) {
732     scope reader = new Reader(filename);
733     return read_tga(reader, req_chans);
734 }
735 
736 public IFImage read_tga_from_mem(in ubyte[] source, long req_chans = 0) {
737     scope reader = new Reader(source);
738     return read_tga(reader, req_chans);
739 }
740 
741 IFImage read_tga(Reader stream, long req_chans = 0) {
742     if (req_chans < 0 || 4 < req_chans)
743         throw new ImageIOException("come on...");
744 
745     TGA_Header hdr = read_tga_header(stream);
746 
747     if (hdr.width < 1 || hdr.height < 1)
748         throw new ImageIOException("invalid dimensions");
749     if (hdr.flags & 0xc0)   // two bits
750         throw new ImageIOException("interlaced TGAs not supported");
751     if (hdr.flags & 0x10)
752         throw new ImageIOException("right-to-left TGAs not supported");
753     ubyte attr_bits_pp = (hdr.flags & 0xf);
754     if (! (attr_bits_pp == 0 || attr_bits_pp == 8)) // some set it 0 although data has 8
755         throw new ImageIOException("only 8-bit alpha/attribute(s) supported");
756     if (hdr.palette_type)
757         throw new ImageIOException("paletted TGAs not supported");
758 
759     bool rle = false;
760     switch (hdr.data_type) with (TGA_DataType) {
761         //case 1: ;   // paletted, uncompressed
762         case TrueColor:
763             if (! (hdr.bits_pp == 24 || hdr.bits_pp == 32))
764                 throw new ImageIOException("not supported");
765             break;
766         case Gray:
767             if (! (hdr.bits_pp == 8 || (hdr.bits_pp == 16 && attr_bits_pp == 8)))
768                 throw new ImageIOException("not supported");
769             break;
770         //case 9: ;   // paletted, RLE
771         case TrueColor_RLE:
772             if (! (hdr.bits_pp == 24 || hdr.bits_pp == 32))
773                 throw new ImageIOException("not supported");
774             rle = true;
775             break;
776         case Gray_RLE:
777             if (! (hdr.bits_pp == 8 || (hdr.bits_pp == 16 && attr_bits_pp == 8)))
778                 throw new ImageIOException("not supported");
779             rle = true;
780             break;
781         default: throw new ImageIOException("data type not supported");
782     }
783 
784     int src_chans = hdr.bits_pp / 8;
785 
786     if (hdr.id_length)
787         stream.seek(hdr.id_length, SEEK_CUR);
788 
789     TGA_Decoder dc = {
790         stream         : stream,
791         w              : hdr.width,
792         h              : hdr.height,
793         origin_at_top  : cast(bool) (hdr.flags & 0x20),
794         bytes_pp       : hdr.bits_pp / 8,
795         rle            : rle,
796         tgt_chans      : (req_chans == 0) ? src_chans : cast(int) req_chans,
797     };
798 
799     switch (dc.bytes_pp) {
800         case 1: dc.src_fmt = _ColFmt.Y; break;
801         case 2: dc.src_fmt = _ColFmt.YA; break;
802         case 3: dc.src_fmt = _ColFmt.BGR; break;
803         case 4: dc.src_fmt = _ColFmt.BGRA; break;
804         default: throw new ImageIOException("TGA: format not supported");
805     }
806 
807     IFImage result = {
808         w      : dc.w,
809         h      : dc.h,
810         c      : cast(ColFmt) dc.tgt_chans,
811         pixels : decode_tga(dc),
812     };
813     return result;
814 }
815 
816 public void write_tga(in char[] file, long w, long h, in ubyte[] data, long tgt_chans = 0)
817 {
818     scope writer = new Writer(file);
819     write_tga(writer, w, h, data, tgt_chans);
820 }
821 
822 public ubyte[] write_tga_to_mem(long w, long h, in ubyte[] data, long tgt_chans = 0) {
823     scope writer = new Writer();
824     write_tga(writer, w, h, data, tgt_chans);
825     return writer.result;
826 }
827 
828 void write_tga(Writer stream, long w, long h, in ubyte[] data, long tgt_chans = 0) {
829     if (w < 1 || h < 1 || ushort.max < w || ushort.max < h)
830         throw new ImageIOException("invalid dimensions");
831     ulong src_chans = data.length / w / h;
832     if (src_chans < 1 || 4 < src_chans || tgt_chans < 0 || 4 < tgt_chans)
833         throw new ImageIOException("invalid channel count");
834     if (src_chans * w * h != data.length)
835         throw new ImageIOException("mismatching dimensions and length");
836 
837     TGA_Encoder ec = {
838         stream    : stream,
839         w         : cast(ushort) w,
840         h         : cast(ushort) h,
841         src_chans : cast(int) src_chans,
842         tgt_chans : cast(int) ((tgt_chans) ? tgt_chans : src_chans),
843         rle       : true,
844         data      : data,
845     };
846 
847     write_tga(ec);
848     stream.flush();
849 }
850 
851 struct TGA_Decoder {
852     Reader stream;
853     size_t w, h;
854     bool origin_at_top;    // src
855     uint bytes_pp;
856     bool rle;   // run length compressed
857     _ColFmt src_fmt;
858     uint tgt_chans;
859 }
860 
861 ubyte[] decode_tga(ref TGA_Decoder dc) {
862     auto result = new ubyte[dc.w * dc.h * dc.tgt_chans];
863 
864     immutable size_t tgt_linesize = dc.w * dc.tgt_chans;
865     immutable size_t src_linesize = dc.w * dc.bytes_pp;
866     auto src_line = new ubyte[src_linesize];
867 
868     immutable ptrdiff_t tgt_stride = (dc.origin_at_top) ? tgt_linesize : -tgt_linesize;
869     ptrdiff_t ti                   = (dc.origin_at_top) ? 0 : (dc.h-1) * tgt_linesize;
870 
871     const LineConv convert = get_converter(dc.src_fmt, dc.tgt_chans);
872 
873     if (!dc.rle) {
874         foreach (_j; 0 .. dc.h) {
875             dc.stream.readExact(src_line, src_linesize);
876             convert(src_line, result[ti .. ti + tgt_linesize]);
877             ti += tgt_stride;
878         }
879         return result;
880     }
881 
882     // ----- RLE  -----
883 
884     auto rbuf = new ubyte[src_linesize];
885     size_t plen = 0;      // packet length
886     bool its_rle = false;
887 
888     foreach (_j; 0 .. dc.h) {
889         // fill src_line with uncompressed data (this works like a stream)
890         size_t wanted = src_linesize;
891         while (wanted) {
892             if (plen == 0) {
893                 dc.stream.readExact(rbuf, 1);
894                 its_rle = cast(bool) (rbuf[0] & 0x80);
895                 plen = ((rbuf[0] & 0x7f) + 1) * dc.bytes_pp; // length in bytes
896             }
897             const size_t gotten = src_linesize - wanted;
898             const size_t copysize = min(plen, wanted);
899             if (its_rle) {
900                 dc.stream.readExact(rbuf, dc.bytes_pp);
901                 for (size_t p = gotten; p < gotten+copysize; p += dc.bytes_pp)
902                     src_line[p .. p+dc.bytes_pp] = rbuf[0 .. dc.bytes_pp];
903             } else {    // it's raw
904                 auto slice = src_line[gotten .. gotten+copysize];
905                 dc.stream.readExact(slice, copysize);
906             }
907             wanted -= copysize;
908             plen -= copysize;
909         }
910 
911         convert(src_line, result[ti .. ti + tgt_linesize]);
912         ti += tgt_stride;
913     }
914 
915     return result;
916 }
917 
918 // ----------------------------------------------------------------------
919 // TGA encoder
920 
921 immutable ubyte[18] tga_footer_sig =
922     ['T','R','U','E','V','I','S','I','O','N','-','X','F','I','L','E','.', 0];
923 
924 struct TGA_Encoder {
925     Writer stream;
926     ushort w, h;
927     int src_chans;
928     int tgt_chans;
929     bool rle;   // run length compression
930     const(ubyte)[] data;
931 }
932 
933 void write_tga(ref TGA_Encoder ec) {
934     ubyte data_type;
935     bool has_alpha = false;
936     switch (ec.tgt_chans) with (TGA_DataType) {
937         case 1: data_type = ec.rle ? Gray_RLE : Gray;                             break;
938         case 2: data_type = ec.rle ? Gray_RLE : Gray;           has_alpha = true; break;
939         case 3: data_type = ec.rle ? TrueColor_RLE : TrueColor;                   break;
940         case 4: data_type = ec.rle ? TrueColor_RLE : TrueColor; has_alpha = true; break;
941         default: throw new ImageIOException("internal error");
942     }
943 
944     ubyte[18] hdr = void;
945     hdr[0] = 0;         // id length
946     hdr[1] = 0;         // palette type
947     hdr[2] = data_type;
948     hdr[3..8] = 0;         // palette start (2), len (2), bits per palette entry (1)
949     hdr[8..12] = 0;     // x origin (2), y origin (2)
950     hdr[12..14] = nativeToLittleEndian(ec.w);
951     hdr[14..16] = nativeToLittleEndian(ec.h);
952     hdr[16] = cast(ubyte) (ec.tgt_chans * 8);     // bits per pixel
953     hdr[17] = (has_alpha) ? 0x8 : 0x0;     // flags: attr_bits_pp = 8
954     ec.stream.rawWrite(hdr);
955 
956     write_image_data(ec);
957 
958     ubyte[26] ftr = void;
959     ftr[0..4] = 0;   // extension area offset
960     ftr[4..8] = 0;   // developer directory offset
961     ftr[8..26] = tga_footer_sig;
962     ec.stream.rawWrite(ftr);
963 }
964 
965 void write_image_data(ref TGA_Encoder ec) {
966     _ColFmt tgt_fmt;
967     switch (ec.tgt_chans) {
968         case 1: tgt_fmt = _ColFmt.Y; break;
969         case 2: tgt_fmt = _ColFmt.YA; break;
970         case 3: tgt_fmt = _ColFmt.BGR; break;
971         case 4: tgt_fmt = _ColFmt.BGRA; break;
972         default: throw new ImageIOException("internal error");
973     }
974 
975     const LineConv convert = get_converter(ec.src_chans, tgt_fmt);
976 
977     immutable size_t src_linesize = ec.w * ec.src_chans;
978     immutable size_t tgt_linesize = ec.w * ec.tgt_chans;
979     auto tgt_line = new ubyte[tgt_linesize];
980 
981     ptrdiff_t si = (ec.h-1) * src_linesize;     // origin at bottom
982 
983     if (!ec.rle) {
984         foreach (_; 0 .. ec.h) {
985             convert(ec.data[si .. si + src_linesize], tgt_line);
986             ec.stream.rawWrite(tgt_line);
987             si -= src_linesize; // origin at bottom
988         }
989         return;
990     }
991 
992     // ----- RLE  -----
993 
994     immutable bytes_pp = ec.tgt_chans;
995     immutable size_t max_packets_per_line = (tgt_linesize+127) / 128;
996     auto tgt_cmp = new ubyte[tgt_linesize + max_packets_per_line];  // compressed line
997     foreach (_; 0 .. ec.h) {
998         convert(ec.data[si .. si + src_linesize], tgt_line);
999         ubyte[] compressed_line = rle_compress(tgt_line, tgt_cmp, ec.w, bytes_pp);
1000         ec.stream.rawWrite(compressed_line);
1001         si -= src_linesize; // origin at bottom
1002     }
1003 }
1004 
1005 ubyte[] rle_compress(in ubyte[] line, ubyte[] tgt_cmp, in size_t w, in int bytes_pp) pure {
1006     immutable int rle_limit = (1 < bytes_pp) ? 2 : 3;  // run len worth an RLE packet
1007     size_t runlen = 0;
1008     size_t rawlen = 0;
1009     size_t raw_i = 0; // start of raw packet data in line
1010     size_t cmp_i = 0;
1011     size_t pixels_left = w;
1012     const (ubyte)[] px;
1013     for (size_t i = bytes_pp; pixels_left; i += bytes_pp) {
1014         runlen = 1;
1015         px = line[i-bytes_pp .. i];
1016         while (i < line.length && line[i .. i+bytes_pp] == px[0..$] && runlen < 128) {
1017             ++runlen;
1018             i += bytes_pp;
1019         }
1020         pixels_left -= runlen;
1021 
1022         if (runlen < rle_limit) {
1023             // data goes to raw packet
1024             rawlen += runlen;
1025             if (128 <= rawlen) {     // full packet, need to store it
1026                 size_t copysize = 128 * bytes_pp;
1027                 tgt_cmp[cmp_i++] = 0x7f; // raw packet header
1028                 tgt_cmp[cmp_i .. cmp_i+copysize] = line[raw_i .. raw_i+copysize];
1029                 cmp_i += copysize;
1030                 raw_i += copysize;
1031                 rawlen -= 128;
1032             }
1033         } else {
1034             // RLE packet is worth it
1035 
1036             // store raw packet first, if any
1037             if (rawlen) {
1038                 assert(rawlen < 128);
1039                 size_t copysize = rawlen * bytes_pp;
1040                 tgt_cmp[cmp_i++] = cast(ubyte) (rawlen-1); // raw packet header
1041                 tgt_cmp[cmp_i .. cmp_i+copysize] = line[raw_i .. raw_i+copysize];
1042                 cmp_i += copysize;
1043                 rawlen = 0;
1044             }
1045 
1046             // store RLE packet
1047             tgt_cmp[cmp_i++] = cast(ubyte) (0x80 | (runlen-1)); // packet header
1048             tgt_cmp[cmp_i .. cmp_i+bytes_pp] = px[0..$];       // packet data
1049             cmp_i += bytes_pp;
1050             raw_i = i;
1051         }
1052     }   // for
1053 
1054     if (rawlen) {   // last packet of the line
1055         size_t copysize = rawlen * bytes_pp;
1056         tgt_cmp[cmp_i++] = cast(ubyte) (rawlen-1); // raw packet header
1057         tgt_cmp[cmp_i .. cmp_i+copysize] = line[raw_i .. raw_i+copysize];
1058         cmp_i += copysize;
1059     }
1060     return tgt_cmp[0 .. cmp_i];
1061 }
1062 
1063 enum TGA_DataType : ubyte {
1064     Idx           = 1,
1065     TrueColor     = 2,
1066     Gray          = 3,
1067     Idx_RLE       = 9,
1068     TrueColor_RLE = 10,
1069     Gray_RLE      = 11,
1070 }
1071 
1072 void read_tga_info(Reader stream, out long w, out long h, out long chans) {
1073     TGA_Header hdr = read_tga_header(stream);
1074     w = hdr.width;
1075     h = hdr.height;
1076 
1077     // TGA is awkward...
1078     auto dt = hdr.data_type;
1079     if ((dt == TGA_DataType.TrueColor     || dt == TGA_DataType.Gray ||
1080          dt == TGA_DataType.TrueColor_RLE || dt == TGA_DataType.Gray_RLE)
1081          && (hdr.bits_pp % 8) == 0)
1082     {
1083         chans = hdr.bits_pp / 8;
1084         return;
1085     } else if (dt == TGA_DataType.Idx || dt == TGA_DataType.Idx_RLE) {
1086         switch (hdr.palette_bits) {
1087             case 15: chans = 3; return;
1088             case 16: chans = 3; return; // one bit could be for some "interrupt control"
1089             case 24: chans = 3; return;
1090             case 32: chans = 4; return;
1091             default:
1092         }
1093     }
1094     chans = 0;  // unknown
1095 }
1096 
1097 // --------------------------------------------------------------------------------
1098 // BMP
1099 
1100 public IFImage read_bmp(in char[] filename, long req_chans = 0) {
1101     scope reader = new Reader(filename);
1102     return read_bmp(reader, req_chans);
1103 }
1104 
1105 public IFImage read_bmp_from_mem(in ubyte[] source, long req_chans = 0) {
1106     scope reader = new Reader(source);
1107     return read_bmp(reader, req_chans);
1108 }
1109 
1110 public BMP_Header read_bmp_header(in char[] filename) {
1111     scope reader = new Reader(filename);
1112     return read_bmp_header(reader);
1113 }
1114 
1115 public struct BMP_Header {
1116     size_t file_size;
1117     size_t pixel_data_offset;
1118 
1119     size_t dib_size;
1120     ptrdiff_t width;
1121     ptrdiff_t height;
1122     ushort planes;
1123     uint dib_version;
1124     DibV1 dib_v1;
1125     DibV2 dib_v2;
1126     uint dib_v3_alpha_mask;
1127     DibV4 dib_v4;
1128     DibV5 dib_v5;
1129 }
1130 
1131 public struct DibV1 {
1132     size_t bits_pp;
1133     uint compression;
1134     size_t idat_size;
1135     size_t pixels_per_meter_x;
1136     size_t pixels_per_meter_y;
1137     size_t palette_length;
1138     uint important_color_count;
1139 }
1140 
1141 public struct DibV2 {
1142     uint red_mask;
1143     uint green_mask;
1144     uint blue_mask;
1145 }
1146 
1147 public struct DibV4 {
1148     uint color_space_type;
1149     ubyte[36] color_space_endpoints;
1150     uint gamma_red;
1151     uint gamma_green;
1152     uint gamma_blue;
1153     uint intent;
1154 }
1155 
1156 public struct DibV5 {
1157     uint icc_profile_data;
1158     uint icc_profile_size;
1159 }
1160 
1161 BMP_Header read_bmp_header(Reader stream) {
1162     ubyte[18] tmp = void;  // bmp header + size of dib header
1163     stream.readExact(tmp[], tmp.length);
1164 
1165     if (tmp[0..2] != ['B', 'M'])
1166         throw new ImageIOException("corrupt header");
1167 
1168     size_t dib_size = littleEndianToNative!uint(tmp[14..18]);
1169     uint dib_version;
1170     switch (dib_size) {
1171         case 12: dib_version = 0; break;
1172         case 40: dib_version = 1; break;
1173         case 52: dib_version = 2; break;
1174         case 56: dib_version = 3; break;
1175         case 108: dib_version = 4; break;
1176         case 124: dib_version = 5; break;
1177         default: throw new ImageIOException("unsupported dib version");
1178     }
1179     auto dib_header = new ubyte[dib_size-4];
1180     stream.readExact(dib_header[], dib_header.length);
1181 
1182     DibV1 dib_v1;
1183     DibV2 dib_v2;
1184     uint dib_v3_alpha_mask;
1185     DibV4 dib_v4;
1186     DibV5 dib_v5;
1187 
1188     if (1 <= dib_version) {
1189         DibV1 v1 = {
1190             bits_pp               : cast(size_t) littleEndianToNative!ushort(dib_header[10..12]),
1191             compression           : littleEndianToNative!uint(dib_header[12..16]),
1192             idat_size             : cast(size_t) littleEndianToNative!uint(dib_header[16..20]),
1193             pixels_per_meter_x    : cast(size_t) littleEndianToNative!uint(dib_header[20..24]),
1194             pixels_per_meter_y    : cast(size_t) littleEndianToNative!uint(dib_header[24..28]),
1195             palette_length        : cast(size_t) littleEndianToNative!uint(dib_header[28..32]),
1196             important_color_count : littleEndianToNative!uint(dib_header[32..36]),
1197         };
1198         dib_v1 = v1;
1199     }
1200 
1201     if (2 <= dib_version) {
1202         DibV2 v2 = {
1203             red_mask              : littleEndianToNative!uint(dib_header[36..40]),
1204             green_mask            : littleEndianToNative!uint(dib_header[40..44]),
1205             blue_mask             : littleEndianToNative!uint(dib_header[44..48]),
1206         };
1207         dib_v2 = v2;
1208     }
1209 
1210     if (3 <= dib_version) {
1211         dib_v3_alpha_mask = littleEndianToNative!uint(dib_header[48..52]);
1212     }
1213 
1214     if (4 <= dib_version) {
1215         DibV4 v4 = {
1216             color_space_type      : littleEndianToNative!uint(dib_header[52..56]),
1217             color_space_endpoints : dib_header[56..92],
1218             gamma_red             : littleEndianToNative!uint(dib_header[92..96]),
1219             gamma_green           : littleEndianToNative!uint(dib_header[96..100]),
1220             gamma_blue            : littleEndianToNative!uint(dib_header[100..104]),
1221             intent                : littleEndianToNative!uint(dib_header[104..108]),
1222         };
1223         dib_v4 = v4;
1224     }
1225 
1226     if (5 <= dib_version) {
1227         DibV5 v5 = {
1228             icc_profile_data      : littleEndianToNative!uint(dib_header[108..112]),
1229             icc_profile_size      : littleEndianToNative!uint(dib_header[112..116]),
1230         };
1231         dib_v5 = v5;
1232     }
1233 
1234     BMP_Header header = {
1235         file_size             : cast(size_t) littleEndianToNative!uint(tmp[2..6]),
1236         pixel_data_offset     : cast(size_t) littleEndianToNative!uint(tmp[10..14]),
1237         width                 : littleEndianToNative!int(dib_header[0..4]),
1238         height                : littleEndianToNative!int(dib_header[4..8]),
1239         planes                : littleEndianToNative!ushort(dib_header[8..10]),
1240         dib_version           : dib_version,
1241         dib_v1                : dib_v1,
1242         dib_v2                : dib_v2,
1243         dib_v3_alpha_mask     : dib_v3_alpha_mask,
1244         dib_v4                : dib_v4,
1245         dib_v5                : dib_v5,
1246     };
1247     return header;
1248 }
1249 
1250 enum CMP_RGB  = 0;
1251 enum CMP_BITS = 3;
1252 
1253 IFImage read_bmp(Reader stream, long req_chans = 0) {
1254     if (req_chans < 0 || 4 < req_chans)
1255         throw new ImageIOException("unknown color format");
1256 
1257     BMP_Header hdr = read_bmp_header(stream);
1258 
1259     if (hdr.width < 1 || hdr.height == 0) { throw new ImageIOException("invalid dimensions"); }
1260     if (hdr.pixel_data_offset < (14 + hdr.dib_size)
1261     || hdr.pixel_data_offset > 0xffffff /* arbitrary */) {
1262         throw new ImageIOException("invalid pixel data offset");
1263     }
1264     if (hdr.planes != 1) { throw new ImageIOException("not supported"); }
1265 
1266     auto bytes_pp       = 1;
1267     bool paletted       = true;
1268     size_t palette_length = 256;
1269     bool rgb_masked     = false;
1270     auto pe_bytes_pp    = 3;
1271 
1272     if (1 <= hdr.dib_version) {
1273         if (256 < hdr.dib_v1.palette_length)
1274             throw new ImageIOException("ivnalid palette length");
1275         if (hdr.dib_v1.bits_pp <= 8 &&
1276            (hdr.dib_v1.palette_length == 0 || hdr.dib_v1.compression != CMP_RGB))
1277              throw new ImageIOException("invalid format");
1278         if (hdr.dib_v1.compression != CMP_RGB && hdr.dib_v1.compression != CMP_BITS)
1279              throw new ImageIOException("unsupported compression");
1280 
1281         switch (hdr.dib_v1.bits_pp) {
1282             case 8  : bytes_pp = 1; paletted = true; break;
1283             case 24 : bytes_pp = 3; paletted = false; break;
1284             case 32 : bytes_pp = 4; paletted = false; break;
1285             default: throw new ImageIOException("not supported");
1286         }
1287 
1288         palette_length = hdr.dib_v1.palette_length;
1289         rgb_masked = hdr.dib_v1.compression == CMP_BITS;
1290         pe_bytes_pp = 4;
1291     }
1292 
1293     size_t mask_to_idx(uint mask) {
1294         switch (mask) {
1295             case 0xff00_0000: return 3;
1296             case 0x00ff_0000: return 2;
1297             case 0x0000_ff00: return 1;
1298             case 0x0000_00ff: return 0;
1299             default: throw new ImageIOException("unsupported mask");
1300         }
1301     }
1302 
1303     size_t redi = 2;
1304     size_t greeni = 1;
1305     size_t bluei = 0;
1306     if (rgb_masked) {
1307         if (hdr.dib_version < 2)
1308             throw new ImageIOException("invalid format");
1309         redi = mask_to_idx(hdr.dib_v2.red_mask);
1310         greeni = mask_to_idx(hdr.dib_v2.green_mask);
1311         bluei = mask_to_idx(hdr.dib_v2.blue_mask);
1312     }
1313 
1314     bool alpha_masked = false;
1315     size_t alphai = 0;
1316     if (3 <= hdr.dib_version && hdr.dib_v3_alpha_mask != 0) {
1317         alpha_masked = true;
1318         alphai = mask_to_idx(hdr.dib_v3_alpha_mask);
1319     }
1320 
1321     ubyte[] depaletted_line = null;
1322     ubyte[] palette = null;
1323     if (paletted) {
1324         depaletted_line = new ubyte[hdr.width * pe_bytes_pp];
1325         palette = new ubyte[palette_length * pe_bytes_pp];
1326         stream.readExact(palette[], palette.length);
1327     }
1328 
1329     stream.seek(hdr.pixel_data_offset, SEEK_SET);
1330 
1331     immutable tgt_chans = (0 < req_chans) ? req_chans
1332                                           : (alpha_masked) ? _ColFmt.RGBA
1333                                                            : _ColFmt.RGB;
1334 
1335     const LineConv convert = get_converter(_ColFmt.BGRA, tgt_chans);
1336 
1337     immutable size_t src_linesize = hdr.width * bytes_pp;  // without padding
1338     immutable size_t src_pad = (paletted) ? 0 : 3 - ((src_linesize-1) % 4);
1339     immutable ptrdiff_t tgt_linesize = (hdr.width * cast(int) tgt_chans);
1340 
1341     immutable ptrdiff_t tgt_stride = (hdr.height < 0) ? tgt_linesize : -tgt_linesize;
1342     ptrdiff_t ti                   = (hdr.height < 0) ? 0 : (hdr.height-1) * tgt_linesize;
1343 
1344     auto src_line_buf  = new ubyte[src_linesize + src_pad];
1345     auto bgra_line_buf = (paletted) ? null : new ubyte[hdr.width * 4];
1346     auto result        = new ubyte[hdr.width * abs(hdr.height) * cast(int) tgt_chans];
1347 
1348     foreach (_; 0 .. abs(hdr.height)) {
1349         stream.readExact(src_line_buf[], src_line_buf.length);
1350         auto src_line = src_line_buf[0..src_linesize];
1351 
1352         if (paletted) {
1353             size_t ps = pe_bytes_pp;
1354             size_t di = 0;
1355             foreach (idx; src_line[]) {
1356                 size_t i = idx * ps;
1357                 depaletted_line[di .. di+ps] = palette[i .. i+ps];
1358                 if (ps == 4) {
1359                     depaletted_line[di+3] = 255;
1360                 }
1361                 di += ps;
1362             }
1363             convert(depaletted_line[], result[ti .. (ti+tgt_linesize)]);
1364         } else {
1365             for (size_t si, di;   si < src_line.length;   si+=bytes_pp, di+=4) {
1366                 bgra_line_buf[di + 0] = src_line[si + bluei];
1367                 bgra_line_buf[di + 1] = src_line[si + greeni];
1368                 bgra_line_buf[di + 2] = src_line[si + redi];
1369                 bgra_line_buf[di + 3] = (alpha_masked) ? src_line[si + alphai]
1370                                                        : 255;
1371             }
1372             convert(bgra_line_buf[], result[ti .. (ti+tgt_linesize)]);
1373         }
1374 
1375         ti += tgt_stride;
1376     }
1377 
1378     IFImage ret = {
1379         w      : hdr.width,
1380         h      : abs(hdr.height),
1381         c      : cast(ColFmt) tgt_chans,
1382         pixels : result,
1383     };
1384     return ret;
1385 }
1386 
1387 void read_bmp_info(Reader stream, out long w, out long h, out long chans) {
1388     BMP_Header hdr = read_bmp_header(stream);
1389     w = abs(hdr.width);
1390     h = abs(hdr.height);
1391     chans = (hdr.dib_version >= 3 && hdr.dib_v3_alpha_mask != 0) ? ColFmt.RGBA
1392                                                                  : ColFmt.RGB;
1393 }
1394 
1395 // --------------------------------------------------------------------------------
1396 // Baseline JPEG decoder
1397 
1398 import std.math;    // floor, ceil
1399 import core.stdc.stdlib : alloca;
1400 
1401 //debug = DebugJPEG;
1402 
1403 public struct JPEG_Header {    // JFIF
1404     ubyte version_major;
1405     ubyte version_minor;
1406     ushort width, height;
1407     ubyte num_comps;
1408     ubyte precision;    // sample precision
1409     ubyte density_unit;     // 0 = no units but aspect ratio, 1 = dots/inch, 2 = dots/cm
1410     ushort density_x;
1411     ushort density_y;
1412     ubyte type; // 0xc0 = baseline, 0xc2 = progressive, ..., see Marker
1413 }
1414 
1415 public JPEG_Header read_jpeg_header(in char[] filename) {
1416     scope reader = new Reader(filename);
1417     return read_jpeg_header(reader);
1418 }
1419 
1420 JPEG_Header read_jpeg_header(Reader stream) {
1421     ubyte[20 + 8] tmp = void;   // SOI, APP0 + SOF0
1422     stream.readExact(tmp, 20);
1423 
1424     ushort len = bigEndianToNative!ushort(tmp[4..6]);
1425     if ( tmp[0..4] != [0xff,0xd8,0xff,0xe0] ||
1426          tmp[6..11] != ['J','F','I','F',0]  ||
1427          len < 16 )
1428         throw new ImageIOException("not JPEG/JFIF");
1429 
1430     int thumbsize = tmp[18] * tmp[19] * 3;
1431     if (thumbsize != cast(int) len - 16)
1432         throw new ImageIOException("corrupt header");
1433     if (thumbsize)
1434         stream.seek(thumbsize, SEEK_CUR);
1435 
1436     JPEG_Header header = {
1437         version_major      : tmp[11],
1438         version_minor      : tmp[12],
1439         density_unit       : tmp[13],
1440         density_x          : bigEndianToNative!ushort(tmp[14..16]),
1441         density_y          : bigEndianToNative!ushort(tmp[16..18]),
1442     };
1443 
1444     while (true) {
1445         ubyte[2] marker;
1446         stream.readExact(marker, 2);
1447 
1448         if (marker[0] != 0xff)
1449             throw new ImageIOException("no frame header");
1450         while (marker[1] == 0xff)
1451             stream.readExact(marker[1..$], 1);
1452 
1453         enum SKIP = 0xff;
1454         switch (marker[1]) with (Marker) {
1455             case SOF0: .. case SOF3: goto case;
1456             case SOF9: .. case SOF11:
1457                 header.type = marker[1];
1458                 stream.readExact(tmp[20..28], 8);
1459                 //int len = bigEndianToNative!ushort(tmp[20..22]);
1460                 header.precision = tmp[22];
1461                 header.height = bigEndianToNative!ushort(tmp[23..25]);
1462                 header.width = bigEndianToNative!ushort(tmp[25..27]);
1463                 header.num_comps = tmp[27];
1464                 // ignore the rest
1465                 return header;
1466             case SOS, EOI: throw new ImageIOException("no frame header");
1467             case DRI, DHT, DQT, COM: goto case SKIP;
1468             case APP0: .. case APPf: goto case SKIP;
1469             case SKIP:
1470                 ubyte[2] lenbuf = void;
1471                 stream.readExact(lenbuf, 2);
1472                 int skiplen = bigEndianToNative!ushort(lenbuf) - 2;
1473                 stream.seek(skiplen, SEEK_CUR);
1474                 break;
1475             default: throw new ImageIOException("unsupported marker");
1476         }
1477     }
1478     assert(0);
1479 }
1480 
1481 public IFImage read_jpeg(in char[] filename, long req_chans = 0) {
1482     scope reader = new Reader(filename);
1483     return read_jpeg(reader, req_chans);
1484 }
1485 
1486 public IFImage read_jpeg_from_mem(in ubyte[] source, long req_chans = 0) {
1487     scope reader = new Reader(source);
1488     return read_jpeg(reader, req_chans);
1489 }
1490 
1491 IFImage read_jpeg(Reader stream, long req_chans = 0) {
1492     if (req_chans < 0 || 4 < req_chans)
1493         throw new ImageIOException("come on...");
1494 
1495     ubyte[20] tmp = void;   // SOI, APP0, len, data
1496     stream.readExact(tmp, tmp.length);
1497 
1498     ushort len = bigEndianToNative!ushort(tmp[4..6]);
1499     if ( tmp[0..4] != [0xff,0xd8,0xff,0xe0] ||
1500          tmp[6..11] != ['J','F','I','F',0]  ||
1501          len < 16 )
1502         throw new ImageIOException("not JPEG/JFIF");
1503 
1504     if (tmp[11] != 1)   // major version (minor is at tmp[12])
1505         throw new ImageIOException("version not supported");
1506 
1507     //ubyte density_unit = tmp[13];
1508     //int density_x = bigEndianToNative!ushort(tmp[14..16]);
1509     //int density_y = bigEndianToNative!ushort(tmp[16..18]);
1510 
1511     int thumbsize = tmp[18] * tmp[19] * 3;
1512     if (thumbsize != cast(int) len - 16)
1513         throw new ImageIOException("corrupt header");
1514     if (thumbsize)
1515         stream.seek(thumbsize, SEEK_CUR);
1516 
1517     JPEG_Decoder dc = { stream: stream };
1518 
1519     read_markers(dc);   // reads until first scan header or eoi
1520     if (dc.eoi_reached)
1521         throw new ImageIOException("no image data");
1522 
1523     dc.tgt_chans = (req_chans == 0) ? dc.num_comps : cast(int) req_chans;
1524 
1525     IFImage result = {
1526         w      : dc.width,
1527         h      : dc.height,
1528         c      : cast(ColFmt) dc.tgt_chans,
1529         pixels : decode_jpeg(dc),
1530     };
1531     return result;
1532 }
1533 
1534 struct JPEG_Decoder {
1535     Reader stream;
1536 
1537     bool has_frame_header = false;
1538     bool eoi_reached = false;
1539 
1540     ubyte[64][4] qtables;
1541     HuffTab[2] ac_tables;
1542     HuffTab[2] dc_tables;
1543 
1544     ubyte cb;  // current byte (next bit always at MSB)
1545     int bits_left;   // num of unused bits in cb
1546 
1547     Component[3] comps;
1548     ubyte num_comps;
1549     int[3] index_for;   // index_for[0] is index of comp that comes first in stream
1550     int tgt_chans;
1551 
1552     size_t width, height;
1553 
1554     int hmax, vmax;
1555 
1556     ushort restart_interval;    // number of MCUs in restart interval
1557 
1558     // image component
1559     struct Component {
1560         ubyte id;
1561         ubyte sfx, sfy;   // sampling factors, aka. h and v
1562         long x, y;       // total num of samples, without fill samples
1563         ubyte qtable;
1564         ubyte ac_table;
1565         ubyte dc_table;
1566         int pred;                // dc prediction
1567         ubyte[] data;   // reconstructed samples
1568     }
1569 
1570     int num_mcu_x;
1571     int num_mcu_y;
1572 }
1573 
1574 struct HuffTab {
1575     // TODO where in the spec does it say 256 values/codes at most?
1576     ubyte[256] values;
1577     ubyte[257] sizes;
1578     short[16] mincode, maxcode;
1579     short[16] valptr;
1580 }
1581 
1582 enum Marker : ubyte {
1583     SOI = 0xd8,     // start of image
1584     SOF0 = 0xc0,    // start of frame / baseline DCT
1585     //SOF1 = 0xc1,    // start of frame / extended seq.
1586     //SOF2 = 0xc2,    // start of frame / progressive DCT
1587     SOF3 = 0xc3,    // start of frame / lossless
1588     SOF9 = 0xc9,    // start of frame / extended seq., arithmetic
1589     SOF11 = 0xcb,    // start of frame / lossless, arithmetic
1590     DHT = 0xc4,     // define huffman tables
1591     DQT = 0xdb,     // define quantization tables
1592     DRI = 0xdd,     // define restart interval
1593     SOS = 0xda,     // start of scan
1594     DNL = 0xdc,     // define number of lines
1595     RST0 = 0xd0,    // restart entropy coded data
1596     // ...
1597     RST7 = 0xd7,    // restart entropy coded data
1598     APP0 = 0xe0,    // application 0 segment
1599     // ...
1600     APPf = 0xef,    // application f segment
1601     //DAC = 0xcc,     // define arithmetic conditioning table
1602     COM = 0xfe,     // comment
1603     EOI = 0xd9,     // end of image
1604 }
1605 
1606 void read_markers(ref JPEG_Decoder dc) {
1607     bool has_next_scan_header = false;
1608     while (!has_next_scan_header && !dc.eoi_reached) {
1609         ubyte[2] marker;
1610         dc.stream.readExact(marker, 2);
1611 
1612         if (marker[0] != 0xff)
1613             throw new ImageIOException("no marker");
1614         while (marker[1] == 0xff)
1615             dc.stream.readExact(marker[1..$], 1);
1616 
1617         debug(DebugJPEG) writefln("marker: %s (%1$x)\t", cast(Marker) marker[1]);
1618         switch (marker[1]) with (Marker) {
1619             case DHT: dc.read_huffman_tables(); break;
1620             case DQT: dc.read_quantization_tables(); break;
1621             case SOF0:
1622                 if (dc.has_frame_header)
1623                     throw new ImageIOException("extra frame header");
1624                 debug(DebugJPEG) writeln();
1625                 dc.read_frame_header();
1626                 dc.has_frame_header = true;
1627                 break;
1628             case SOS:
1629                 if (!dc.has_frame_header)
1630                     throw new ImageIOException("no frame header");
1631                 dc.read_scan_header();
1632                 has_next_scan_header = true;
1633                 break;
1634             case DRI: dc.read_restart_interval(); break;
1635             case EOI: dc.eoi_reached = true; break;
1636             case APP0: .. case APPf: goto case;
1637             case COM:
1638                 debug(DebugJPEG) writefln("-> skipping segment");
1639                 ubyte[2] lenbuf = void;
1640                 dc.stream.readExact(lenbuf, lenbuf.length);
1641                 int len = bigEndianToNative!ushort(lenbuf) - 2;
1642                 dc.stream.seek(len, SEEK_CUR);
1643                 break;
1644             default: throw new ImageIOException("invalid / unsupported marker");
1645         }
1646     }
1647 }
1648 
1649 // DHT -- define huffman tables
1650 void read_huffman_tables(ref JPEG_Decoder dc) {
1651     ubyte[19] tmp = void;
1652     dc.stream.readExact(tmp, 2);
1653     int len = bigEndianToNative!ushort(tmp[0..2]);
1654     len -= 2;
1655 
1656     while (0 < len) {
1657         dc.stream.readExact(tmp, 17);   // info byte & the BITS
1658         ubyte table_slot = tmp[0] & 0xf; // must be 0 or 1 for baseline
1659         ubyte table_class = tmp[0] >> 4;  // 0 = dc table, 1 = ac table
1660         if (1 < table_slot || 1 < table_class)
1661             throw new ImageIOException("invalid / not supported");
1662 
1663         // compute total number of huffman codes
1664         int mt = 0;
1665         foreach (i; 1..17)
1666             mt += tmp[i];
1667         if (256 < mt)   // TODO where in the spec?
1668             throw new ImageIOException("invalid / not supported");
1669 
1670         if (table_class == 0) {
1671             dc.stream.readExact(dc.dc_tables[table_slot].values, mt);
1672             derive_table(dc.dc_tables[table_slot], tmp[1..17]);
1673         } else {
1674             dc.stream.readExact(dc.ac_tables[table_slot].values, mt);
1675             derive_table(dc.ac_tables[table_slot], tmp[1..17]);
1676         }
1677 
1678         len -= 17 + mt;
1679     }
1680 }
1681 
1682 // num_values is the BITS
1683 void derive_table(ref HuffTab table, in ref ubyte[16] num_values) {
1684     short[256] codes;
1685 
1686     int k = 0;
1687     foreach (i; 0..16) {
1688         foreach (j; 0..num_values[i]) {
1689             table.sizes[k] = cast(ubyte) (i + 1);
1690             ++k;
1691         }
1692     }
1693     table.sizes[k] = 0;
1694 
1695     k = 0;
1696     short code = 0;
1697     ubyte si = table.sizes[k];
1698     while (true) {
1699         do {
1700             codes[k] = code;
1701             ++code;
1702             ++k;
1703         } while (si == table.sizes[k]);
1704 
1705         if (table.sizes[k] == 0)
1706             break;
1707 
1708         debug(DebugJPEG) assert(si < table.sizes[k]);
1709         do {
1710             code <<= 1;
1711             ++si;
1712         } while (si != table.sizes[k]);
1713     }
1714 
1715     derive_mincode_maxcode_valptr(
1716         table.mincode, table.maxcode, table.valptr,
1717         codes, num_values
1718     );
1719 }
1720 
1721 // F.15
1722 void derive_mincode_maxcode_valptr(
1723         ref short[16] mincode, ref short[16] maxcode, ref short[16] valptr,
1724         in ref short[256] codes, in ref ubyte[16] num_values) pure
1725 {
1726     mincode[] = -1;
1727     maxcode[] = -1;
1728     valptr[] = -1;
1729 
1730     int j = 0;
1731     foreach (i; 0..16) {
1732         if (num_values[i] != 0) {
1733             valptr[i] = cast(short) j;
1734             mincode[i] = codes[j];
1735             j += num_values[i] - 1;
1736             maxcode[i] = codes[j];
1737             j += 1;
1738         }
1739     }
1740 }
1741 
1742 // DQT -- define quantization tables
1743 void read_quantization_tables(ref JPEG_Decoder dc) {
1744     ubyte[2] tmp = void;
1745     dc.stream.readExact(tmp, 2);
1746     int len = bigEndianToNative!ushort(tmp[0..2]);
1747     if (len % 65 != 2)
1748         throw new ImageIOException("invalid / not supported");
1749     len -= 2;
1750     while (0 < len) {
1751         dc.stream.readExact(tmp, 1);
1752         ubyte table_info = tmp[0];
1753         ubyte table_slot = table_info & 0xf;
1754         ubyte precision = table_info >> 4;  // 0 = 8 bit, 1 = 16 bit
1755         if (3 < table_slot || precision != 0)    // only 8 bit for baseline
1756             throw new ImageIOException("invalid / not supported");
1757 
1758         dc.stream.readExact(dc.qtables[table_slot], 64);
1759         len -= 1 + 64;
1760     }
1761 }
1762 
1763 // SOF0 -- start of frame
1764 void read_frame_header(ref JPEG_Decoder dc) {
1765     ubyte[9] tmp = void;
1766     dc.stream.readExact(tmp, 8);
1767     int len = bigEndianToNative!ushort(tmp[0..2]);  // 8 + num_comps*3
1768     ubyte precision = tmp[2];
1769     dc.height = bigEndianToNative!ushort(tmp[3..5]);
1770     dc.width = bigEndianToNative!ushort(tmp[5..7]);
1771     dc.num_comps = tmp[7];
1772 
1773     if ( precision != 8 ||
1774          (dc.num_comps != 1 && dc.num_comps != 3) ||
1775          len != 8 + dc.num_comps*3 )
1776         throw new ImageIOException("invalid / not supported");
1777 
1778     dc.hmax = 0;
1779     dc.vmax = 0;
1780     int mcu_du = 0; // data units in one mcu
1781     dc.stream.readExact(tmp, dc.num_comps*3);
1782     foreach (i; 0..dc.num_comps) {
1783         uint ci = tmp[i*3]-1;
1784         if (dc.num_comps <= ci)
1785             throw new ImageIOException("invalid / not supported");
1786         dc.index_for[i] = ci;
1787         auto comp = &dc.comps[ci];
1788         comp.id = tmp[i*3];
1789         ubyte sampling_factors = tmp[i*3 + 1];
1790         comp.sfx = sampling_factors >> 4;
1791         comp.sfy = sampling_factors & 0xf;
1792         comp.qtable = tmp[i*3 + 2];
1793         if ( comp.sfy < 1 || 4 < comp.sfy ||
1794              comp.sfx < 1 || 4 < comp.sfx ||
1795              3 < comp.qtable )
1796             throw new ImageIOException("invalid / not supported");
1797 
1798         if (dc.hmax < comp.sfx) dc.hmax = comp.sfx;
1799         if (dc.vmax < comp.sfy) dc.vmax = comp.sfy;
1800 
1801         mcu_du += comp.sfx * comp.sfy;
1802     }
1803     if (10 < mcu_du)
1804         throw new ImageIOException("invalid / not supported");
1805 
1806     foreach (i; 0..dc.num_comps) {
1807         dc.comps[i].x = cast(long) ceil(dc.width * (cast(double) dc.comps[i].sfx / dc.hmax));
1808         dc.comps[i].y = cast(long) ceil(dc.height * (cast(double) dc.comps[i].sfy / dc.vmax));
1809 
1810         debug(DebugJPEG) writefln("%d comp %d sfx/sfy: %d/%d", i, dc.comps[i].id,
1811                                                                   dc.comps[i].sfx,
1812                                                                   dc.comps[i].sfy);
1813     }
1814 
1815     long mcu_w = dc.hmax * 8;
1816     long mcu_h = dc.vmax * 8;
1817     dc.num_mcu_x = cast(int) ((dc.width + mcu_w-1) / mcu_w);
1818     dc.num_mcu_y = cast(int) ((dc.height + mcu_h-1) / mcu_h);
1819 
1820     debug(DebugJPEG) {
1821         writefln("\tlen: %s", len);
1822         writefln("\tprecision: %s", precision);
1823         writefln("\tdimensions: %s x %s", dc.width, dc.height);
1824         writefln("\tnum_comps: %s", dc.num_comps);
1825         writefln("\tnum_mcu_x: %s", dc.num_mcu_x);
1826         writefln("\tnum_mcu_y: %s", dc.num_mcu_y);
1827     }
1828 
1829 }
1830 
1831 // SOS -- start of scan
1832 void read_scan_header(ref JPEG_Decoder dc) {
1833     ubyte[3] tmp = void;
1834     dc.stream.readExact(tmp, tmp.length);
1835     ushort len = bigEndianToNative!ushort(tmp[0..2]);
1836     ubyte num_scan_comps = tmp[2];
1837 
1838     if ( num_scan_comps != dc.num_comps ||
1839          len != (6+num_scan_comps*2) )
1840         throw new ImageIOException("invalid / not supported");
1841 
1842     auto buf = (cast(ubyte*) alloca((len-3) * ubyte.sizeof))[0..len-3];
1843     dc.stream.readExact(buf, buf.length);
1844 
1845     foreach (i; 0..num_scan_comps) {
1846         ubyte comp_id = buf[i*2];
1847         int ci;    // component index
1848         while (ci < dc.num_comps && dc.comps[ci].id != comp_id) ++ci;
1849         if (dc.num_comps <= ci)
1850             throw new ImageIOException("invalid / not supported");
1851 
1852         ubyte tables = buf[i*2+1];
1853         dc.comps[ci].dc_table = tables >> 4;
1854         dc.comps[ci].ac_table = tables & 0xf;
1855         if ( 1 < dc.comps[ci].dc_table ||
1856              1 < dc.comps[ci].ac_table )
1857             throw new ImageIOException("invalid / not supported");
1858     }
1859 
1860     // ignore these
1861     //ubyte spectral_start = buf[$-3];
1862     //ubyte spectral_end = buf[$-2];
1863     //ubyte approx = buf[$-1];
1864 }
1865 
1866 void read_restart_interval(ref JPEG_Decoder dc) {
1867     ubyte[4] tmp = void;
1868     dc.stream.readExact(tmp, tmp.length);
1869     ushort len = bigEndianToNative!ushort(tmp[0..2]);
1870     if (len != 4)
1871         throw new ImageIOException("invalid / not supported");
1872     dc.restart_interval = bigEndianToNative!ushort(tmp[2..4]);
1873     debug(DebugJPEG) writeln("restart interval set to: ", dc.restart_interval);
1874 }
1875 
1876 // reads data after the SOS segment
1877 ubyte[] decode_jpeg(ref JPEG_Decoder dc) {
1878     foreach (ref comp; dc.comps[0..dc.num_comps])
1879         comp.data = new ubyte[dc.num_mcu_x*comp.sfx*8*dc.num_mcu_y*comp.sfy*8];
1880 
1881     // E.7 -- Multiple scans are for progressive images which are not supported
1882     //while (!dc.eoi_reached) {
1883         decode_scan(dc);    // E.2.3
1884         //read_markers(dc);   // reads until next scan header or eoi
1885     //}
1886 
1887     // throw away fill samples and convert to target format
1888     return dc.reconstruct();
1889 }
1890 
1891 // E.2.3 and E.8 and E.9
1892 void decode_scan(ref JPEG_Decoder dc) {
1893     debug(DebugJPEG) writeln("decode scan...");
1894 
1895     int intervals, mcus;
1896     if (0 < dc.restart_interval) {
1897         int total_mcus = dc.num_mcu_x * dc.num_mcu_y;
1898         intervals = (total_mcus + dc.restart_interval-1) / dc.restart_interval;
1899         mcus = dc.restart_interval;
1900     } else {
1901         intervals = 1;
1902         mcus = dc.num_mcu_x * dc.num_mcu_y;
1903     }
1904     debug(DebugJPEG) writeln("intervals: ", intervals);
1905 
1906     foreach (mcu_j; 0 .. dc.num_mcu_y) {
1907         foreach (mcu_i; 0 .. dc.num_mcu_x) {
1908 
1909             // decode mcu
1910             foreach (_c; 0..dc.num_comps) {
1911                 auto comp = &dc.comps[dc.index_for[_c]];
1912                 foreach (du_j; 0 .. comp.sfy) {
1913                     foreach (du_i; 0 .. comp.sfx) {
1914                         // decode entropy, dequantize & dezigzag
1915                         short[64] data = decode_block(dc, *comp, dc.qtables[comp.qtable]);
1916                         // idct & level-shift
1917                         long outx = (mcu_i * comp.sfx + du_i) * 8;
1918                         long outy = (mcu_j * comp.sfy + du_j) * 8;
1919                         long dst_stride = dc.num_mcu_x * comp.sfx*8;
1920                         ubyte* dst = comp.data.ptr + outy*dst_stride + outx;
1921                         stbi__idct_block(dst, dst_stride, data);
1922                     }
1923                 }
1924             }
1925 
1926             --mcus;
1927 
1928             if (!mcus) {
1929                 --intervals;
1930                 if (!intervals)
1931                     return;
1932 
1933                 read_restart(dc.stream);    // RSTx marker
1934 
1935                 if (intervals == 1) {
1936                     // last interval, may have fewer MCUs than defined by DRI
1937                     mcus = (dc.num_mcu_y - mcu_j - 1) * dc.num_mcu_x + dc.num_mcu_x - mcu_i - 1;
1938                 } else {
1939                     mcus = dc.restart_interval;
1940                 }
1941 
1942                 // reset decoder
1943                 dc.cb = 0;
1944                 dc.bits_left = 0;
1945                 foreach (k; 0..dc.num_comps)
1946                     dc.comps[k].pred = 0;
1947             }
1948 
1949         }
1950     }
1951 }
1952 
1953 // RST0-RST7
1954 void read_restart(Reader stream) {
1955     ubyte[2] tmp = void;
1956     stream.readExact(tmp, tmp.length);
1957     if (tmp[0] != 0xff || tmp[1] < Marker.RST0 || Marker.RST7 < tmp[1])
1958         throw new ImageIOException("reset marker missing");
1959     // the markers should cycle 0 through 7, could check that here...
1960 }
1961 
1962 immutable ubyte[64] dezigzag = [
1963      0,  1,  8, 16,  9,  2,  3, 10,
1964     17, 24, 32, 25, 18, 11,  4,  5,
1965     12, 19, 26, 33, 40, 48, 41, 34,
1966     27, 20, 13,  6,  7, 14, 21, 28,
1967     35, 42, 49, 56, 57, 50, 43, 36,
1968     29, 22, 15, 23, 30, 37, 44, 51,
1969     58, 59, 52, 45, 38, 31, 39, 46,
1970     53, 60, 61, 54, 47, 55, 62, 63,
1971 ];
1972 
1973 // decode entropy, dequantize & dezigzag (see section F.2)
1974 short[64] decode_block(ref JPEG_Decoder dc, ref JPEG_Decoder.Component comp,
1975                                                     in ref ubyte[64] qtable)
1976 {
1977     short[64] res = 0;
1978 
1979     ubyte t = decode_huff(dc, dc.dc_tables[comp.dc_table]);
1980     int diff = t ? dc.receive_and_extend(t) : 0;
1981 
1982     comp.pred = comp.pred + diff;
1983     res[0] = cast(short) (comp.pred * qtable[0]);
1984 
1985     int k = 1;
1986     do {
1987         ubyte rs = decode_huff(dc, dc.ac_tables[comp.ac_table]);
1988         ubyte rrrr = rs >> 4;
1989         ubyte ssss = rs & 0xf;
1990 
1991         if (ssss == 0) {
1992             if (rrrr != 0xf)
1993                 break;      // end of block
1994             k += 16;    // run length is 16
1995             continue;
1996         }
1997 
1998         k += rrrr;
1999 
2000         if (63 < k)
2001             throw new ImageIOException("corrupt block");
2002         res[dezigzag[k]] = cast(short) (dc.receive_and_extend(ssss) * qtable[k]);
2003         k += 1;
2004     } while (k < 64);
2005 
2006     return res;
2007 }
2008 
2009 int receive_and_extend(ref JPEG_Decoder dc, ubyte s) {
2010     // receive
2011     int symbol = 0;
2012     foreach (_; 0..s)
2013         symbol = (symbol << 1) + nextbit(dc);
2014     // extend
2015     int vt = 1 << (s-1);
2016     if (symbol < vt)
2017         return symbol + (-1 << s) + 1;
2018     return symbol;
2019 }
2020 
2021 // F.16 -- the DECODE
2022 ubyte decode_huff(ref JPEG_Decoder dc, in ref HuffTab tab) {
2023     short code = nextbit(dc);
2024 
2025     int i = 0;
2026     while (tab.maxcode[i] < code) {
2027         code = cast(short) ((code << 1) + nextbit(dc));
2028         i += 1;
2029         if (tab.maxcode.length <= i)
2030             throw new ImageIOException("corrupt huffman coding");
2031     }
2032     int j = tab.valptr[i] + code - tab.mincode[i];
2033     if (tab.values.length <= cast(uint) j)
2034         throw new ImageIOException("corrupt huffman coding");
2035     return tab.values[j];
2036 }
2037 
2038 // F.2.2.5 and F.18
2039 ubyte nextbit(ref JPEG_Decoder dc) {
2040     if (!dc.bits_left) {
2041         ubyte[1] bytebuf;
2042         dc.stream.readExact(bytebuf, 1);
2043         dc.cb = bytebuf[0];
2044         dc.bits_left = 8;
2045 
2046         if (dc.cb == 0xff) {
2047             dc.stream.readExact(bytebuf, 1);
2048             if (bytebuf[0] != 0x0) {
2049                 throw new ImageIOException("unexpected marker");
2050             }
2051         }
2052     }
2053 
2054     ubyte r = dc.cb >> 7;
2055     dc.cb <<= 1;
2056     dc.bits_left -= 1;
2057     return r;
2058 }
2059 
2060 ubyte[] reconstruct(in ref JPEG_Decoder dc) {
2061     auto result = new ubyte[dc.width * dc.height * dc.tgt_chans];
2062 
2063     switch (dc.num_comps * 10 + dc.tgt_chans) {
2064         case 34, 33:
2065             foreach (const ref comp; dc.comps[0..dc.num_comps]) {
2066                 if (comp.sfx != dc.hmax || comp.sfy != dc.vmax)
2067                     return dc.upsample_rgb(result);
2068             }
2069 
2070             size_t si, di;
2071             foreach (j; 0 .. dc.height) {
2072                 foreach (i; 0 .. dc.width) {
2073                     result[di .. di+3] = ycbcr_to_rgb(
2074                         dc.comps[0].data[si+i],
2075                         dc.comps[1].data[si+i],
2076                         dc.comps[2].data[si+i],
2077                     );
2078                     if (dc.tgt_chans == 4)
2079                         result[di+3] = 255;
2080                     di += dc.tgt_chans;
2081                 }
2082                 si += dc.num_mcu_x * dc.comps[0].sfx * 8;
2083             }
2084             return result;
2085         case 32, 12, 31, 11:
2086             const comp = &dc.comps[0];
2087             if (comp.sfx == dc.hmax && comp.sfy == dc.vmax) {
2088                 size_t si, di;
2089                 if (dc.tgt_chans == 2) {
2090                     foreach (j; 0 .. dc.height) {
2091                         foreach (i; 0 .. dc.width) {
2092                             result[di++] = comp.data[si+i];
2093                             result[di++] = 255;
2094                         }
2095                         si += dc.num_mcu_x * comp.sfx * 8;
2096                     }
2097                 } else {
2098                     foreach (j; 0 .. dc.height) {
2099                         result[di .. di+dc.width] = comp.data[si .. si+dc.width];
2100                         si += dc.num_mcu_x * comp.sfx * 8;
2101                         di += dc.width;
2102                     }
2103                 }
2104                 return result;
2105             } else {
2106                 // need to resample (haven't tested this...)
2107                 return dc.upsample_gray(result);
2108             }
2109         case 14, 13:
2110             const comp = &dc.comps[0];
2111             size_t si, di;
2112             foreach (j; 0 .. dc.height) {
2113                 foreach (i; 0 .. dc.width) {
2114                     result[di .. di+3] = comp.data[si+i];
2115                     if (dc.tgt_chans == 4)
2116                         result[di+3] = 255;
2117                     di += dc.tgt_chans;
2118                 }
2119                 si += dc.num_mcu_x * comp.sfx * 8;
2120             }
2121             return result;
2122         default: assert(0);
2123     }
2124 }
2125 
2126 ubyte[] upsample_gray(in ref JPEG_Decoder dc, ubyte[] result) {
2127     const size_t stride0 = dc.num_mcu_x * dc.comps[0].sfx * 8;
2128     const double si0yratio = cast(double) dc.comps[0].y / dc.height;
2129     const double si0xratio = cast(double) dc.comps[0].x / dc.width;
2130     size_t si0, di;
2131 
2132     foreach (j; 0 .. dc.height) {
2133         si0 = cast(size_t) floor(j * si0yratio) * stride0;
2134         foreach (i; 0 .. dc.width) {
2135             result[di] = dc.comps[0].data[si0 + cast(size_t) floor(i * si0xratio)];
2136             if (dc.tgt_chans == 2)
2137                 result[di+1] = 255;
2138             di += dc.tgt_chans;
2139         }
2140     }
2141     return result;
2142 }
2143 
2144 ubyte[] upsample_rgb(in ref JPEG_Decoder dc, ubyte[] result) {
2145     const size_t stride0 = dc.num_mcu_x * dc.comps[0].sfx * 8;
2146     const size_t stride1 = dc.num_mcu_x * dc.comps[1].sfx * 8;
2147     const size_t stride2 = dc.num_mcu_x * dc.comps[2].sfx * 8;
2148 
2149     const double si0yratio = cast(double) dc.comps[0].y / dc.height;
2150     const double si1yratio = cast(double) dc.comps[1].y / dc.height;
2151     const double si2yratio = cast(double) dc.comps[2].y / dc.height;
2152     const double si0xratio = cast(double) dc.comps[0].x / dc.width;
2153     const double si1xratio = cast(double) dc.comps[1].x / dc.width;
2154     const double si2xratio = cast(double) dc.comps[2].x / dc.width;
2155     size_t si0, si1, si2, di;
2156 
2157     foreach (j; 0 .. dc.height) {
2158         si0 = cast(size_t) floor(j * si0yratio) * stride0;
2159         si1 = cast(size_t) floor(j * si1yratio) * stride1;
2160         si2 = cast(size_t) floor(j * si2yratio) * stride2;
2161 
2162         foreach (i; 0 .. dc.width) {
2163             result[di .. di+3] = ycbcr_to_rgb(
2164                 dc.comps[0].data[si0 + cast(size_t) floor(i * si0xratio)],
2165                 dc.comps[1].data[si1 + cast(size_t) floor(i * si1xratio)],
2166                 dc.comps[2].data[si2 + cast(size_t) floor(i * si2xratio)],
2167             );
2168             if (dc.tgt_chans == 4)
2169                 result[di+3] = 255;
2170             di += dc.tgt_chans;
2171         }
2172     }
2173     return result;
2174 }
2175 
2176 ubyte[3] ycbcr_to_rgb(ubyte y, ubyte cb, ubyte cr) pure {
2177     ubyte[3] rgb = void;
2178     rgb[0] = clamp(y + 1.402*(cr-128));
2179     rgb[1] = clamp(y - 0.34414*(cb-128) - 0.71414*(cr-128));
2180     rgb[2] = clamp(y + 1.772*(cb-128));
2181     return rgb;
2182 }
2183 
2184 ubyte clamp(float x) pure {
2185     if (x < 0) return 0;
2186     if (255 < x) return 255;
2187     return cast(ubyte) x;
2188 }
2189 
2190 // ------------------------------------------------------------
2191 // The IDCT stuff here (to the next dashed line) is copied and adapted from
2192 // stb_image which is released under public domain.  Many thanks to stb_image
2193 // author, Sean Barrett.
2194 // Link: https://github.com/nothings/stb/blob/master/stb_image.h
2195 
2196 pure int f2f(float x) { return cast(int) (x * 4096 + 0.5); }
2197 pure int fsh(int x) { return x << 12; }
2198 
2199 // from stb_image, derived from jidctint -- DCT_ISLOW
2200 pure void STBI__IDCT_1D(ref int t0, ref int t1, ref int t2, ref int t3,
2201                         ref int x0, ref int x1, ref int x2, ref int x3,
2202         int s0, int s1, int s2, int s3, int s4, int s5, int s6, int s7)
2203 {
2204    int p1,p2,p3,p4,p5;
2205    //int t0,t1,t2,t3,p1,p2,p3,p4,p5,x0,x1,x2,x3;
2206    p2 = s2;
2207    p3 = s6;
2208    p1 = (p2+p3) * f2f(0.5411961f);
2209    t2 = p1 + p3 * f2f(-1.847759065f);
2210    t3 = p1 + p2 * f2f( 0.765366865f);
2211    p2 = s0;
2212    p3 = s4;
2213    t0 = fsh(p2+p3);
2214    t1 = fsh(p2-p3);
2215    x0 = t0+t3;
2216    x3 = t0-t3;
2217    x1 = t1+t2;
2218    x2 = t1-t2;
2219    t0 = s7;
2220    t1 = s5;
2221    t2 = s3;
2222    t3 = s1;
2223    p3 = t0+t2;
2224    p4 = t1+t3;
2225    p1 = t0+t3;
2226    p2 = t1+t2;
2227    p5 = (p3+p4)*f2f( 1.175875602f);
2228    t0 = t0*f2f( 0.298631336f);
2229    t1 = t1*f2f( 2.053119869f);
2230    t2 = t2*f2f( 3.072711026f);
2231    t3 = t3*f2f( 1.501321110f);
2232    p1 = p5 + p1*f2f(-0.899976223f);
2233    p2 = p5 + p2*f2f(-2.562915447f);
2234    p3 = p3*f2f(-1.961570560f);
2235    p4 = p4*f2f(-0.390180644f);
2236    t3 += p1+p4;
2237    t2 += p2+p3;
2238    t1 += p2+p4;
2239    t0 += p1+p3;
2240 }
2241 
2242 // idct and level-shift
2243 pure void stbi__idct_block(ubyte* dst, long dst_stride, in ref short[64] data) {
2244    int i;
2245    int[64] val;
2246    int* v = val.ptr;
2247    const(short)* d = data.ptr;
2248 
2249    // columns
2250    for (i=0; i < 8; ++i,++d, ++v) {
2251       // if all zeroes, shortcut -- this avoids dequantizing 0s and IDCTing
2252       if (d[ 8]==0 && d[16]==0 && d[24]==0 && d[32]==0
2253            && d[40]==0 && d[48]==0 && d[56]==0) {
2254          //    no shortcut                 0     seconds
2255          //    (1|2|3|4|5|6|7)==0          0     seconds
2256          //    all separate               -0.047 seconds
2257          //    1 && 2|3 && 4|5 && 6|7:    -0.047 seconds
2258          int dcterm = d[0] << 2;
2259          v[0] = v[8] = v[16] = v[24] = v[32] = v[40] = v[48] = v[56] = dcterm;
2260       } else {
2261          int t0,t1,t2,t3,x0,x1,x2,x3;
2262          STBI__IDCT_1D(
2263              t0, t1, t2, t3,
2264              x0, x1, x2, x3,
2265              d[ 0], d[ 8], d[16], d[24],
2266              d[32], d[40], d[48], d[56]
2267          );
2268          // constants scaled things up by 1<<12; let's bring them back
2269          // down, but keep 2 extra bits of precision
2270          x0 += 512; x1 += 512; x2 += 512; x3 += 512;
2271          v[ 0] = (x0+t3) >> 10;
2272          v[56] = (x0-t3) >> 10;
2273          v[ 8] = (x1+t2) >> 10;
2274          v[48] = (x1-t2) >> 10;
2275          v[16] = (x2+t1) >> 10;
2276          v[40] = (x2-t1) >> 10;
2277          v[24] = (x3+t0) >> 10;
2278          v[32] = (x3-t0) >> 10;
2279       }
2280    }
2281 
2282    ubyte* o = dst;
2283    for (i=0, v=val.ptr; i < 8; ++i,v+=8,o+=dst_stride) {
2284       // no fast case since the first 1D IDCT spread components out
2285       int t0,t1,t2,t3,x0,x1,x2,x3;
2286       STBI__IDCT_1D(
2287           t0, t1, t2, t3,
2288           x0, x1, x2, x3,
2289           v[0],v[1],v[2],v[3],v[4],v[5],v[6],v[7]
2290       );
2291       // constants scaled things up by 1<<12, plus we had 1<<2 from first
2292       // loop, plus horizontal and vertical each scale by sqrt(8) so together
2293       // we've got an extra 1<<3, so 1<<17 total we need to remove.
2294       // so we want to round that, which means adding 0.5 * 1<<17,
2295       // aka 65536. Also, we'll end up with -128 to 127 that we want
2296       // to encode as 0-255 by adding 128, so we'll add that before the shift
2297       x0 += 65536 + (128<<17);
2298       x1 += 65536 + (128<<17);
2299       x2 += 65536 + (128<<17);
2300       x3 += 65536 + (128<<17);
2301       // tried computing the shifts into temps, or'ing the temps to see
2302       // if any were out of range, but that was slower
2303       o[0] = stbi__clamp((x0+t3) >> 17);
2304       o[7] = stbi__clamp((x0-t3) >> 17);
2305       o[1] = stbi__clamp((x1+t2) >> 17);
2306       o[6] = stbi__clamp((x1-t2) >> 17);
2307       o[2] = stbi__clamp((x2+t1) >> 17);
2308       o[5] = stbi__clamp((x2-t1) >> 17);
2309       o[3] = stbi__clamp((x3+t0) >> 17);
2310       o[4] = stbi__clamp((x3-t0) >> 17);
2311    }
2312 }
2313 
2314 // clamp to 0-255
2315 pure ubyte stbi__clamp(int x) {
2316    if (cast(uint) x > 255) {
2317       if (x < 0) return 0;
2318       if (x > 255) return 255;
2319    }
2320    return cast(ubyte) x;
2321 }
2322 
2323 // the above is adapted from stb_image
2324 // ------------------------------------------------------------
2325 
2326 void read_jpeg_info(Reader stream, out long w, out long h, out long chans) {
2327     JPEG_Header hdr = read_jpeg_header(stream);
2328     w = hdr.width;
2329     h = hdr.height;
2330     chans = hdr.num_comps;
2331 }
2332 
2333 // --------------------------------------------------------------------------------
2334 // Conversions
2335 
2336 enum _ColFmt : int {
2337     Unknown = 0,
2338     Y = 1,
2339     YA,
2340     RGB,
2341     RGBA,
2342     BGR,
2343     BGRA,
2344 }
2345 
2346 alias LineConv = void function(in ubyte[] src, ubyte[] tgt);
2347 
2348 LineConv get_converter(long src_chans, long tgt_chans) pure {
2349     long combo(long a, long b) pure nothrow { return a*16 + b; }
2350 
2351     if (src_chans == tgt_chans)
2352         return &copy_line;
2353 
2354     switch (combo(src_chans, tgt_chans)) with (_ColFmt) {
2355         case combo(Y, YA)      : return &Y_to_YA;
2356         case combo(Y, RGB)     : return &Y_to_RGB;
2357         case combo(Y, RGBA)    : return &Y_to_RGBA;
2358         case combo(Y, BGR)     : return &Y_to_BGR;
2359         case combo(Y, BGRA)    : return &Y_to_BGRA;
2360         case combo(YA, Y)      : return &YA_to_Y;
2361         case combo(YA, RGB)    : return &YA_to_RGB;
2362         case combo(YA, RGBA)   : return &YA_to_RGBA;
2363         case combo(YA, BGR)    : return &YA_to_BGR;
2364         case combo(YA, BGRA)   : return &YA_to_BGRA;
2365         case combo(RGB, Y)     : return &RGB_to_Y;
2366         case combo(RGB, YA)    : return &RGB_to_YA;
2367         case combo(RGB, RGBA)  : return &RGB_to_RGBA;
2368         case combo(RGB, BGR)   : return &RGB_to_BGR;
2369         case combo(RGB, BGRA)  : return &RGB_to_BGRA;
2370         case combo(RGBA, Y)    : return &RGBA_to_Y;
2371         case combo(RGBA, YA)   : return &RGBA_to_YA;
2372         case combo(RGBA, RGB)  : return &RGBA_to_RGB;
2373         case combo(RGBA, BGR)  : return &RGBA_to_BGR;
2374         case combo(RGBA, BGRA) : return &RGBA_to_BGRA;
2375         case combo(BGR, Y)     : return &BGR_to_Y;
2376         case combo(BGR, YA)    : return &BGR_to_YA;
2377         case combo(BGR, RGB)   : return &BGR_to_RGB;
2378         case combo(BGR, RGBA)  : return &BGR_to_RGBA;
2379         case combo(BGRA, Y)    : return &BGRA_to_Y;
2380         case combo(BGRA, YA)   : return &BGRA_to_YA;
2381         case combo(BGRA, RGB)  : return &BGRA_to_RGB;
2382         case combo(BGRA, RGBA) : return &BGRA_to_RGBA;
2383         default                : throw new ImageIOException("internal error");
2384     }
2385 }
2386 
2387 void copy_line(in ubyte[] src, ubyte[] tgt) pure nothrow {
2388     tgt[0..$] = src[0..$];
2389 }
2390 
2391 ubyte luminance(ubyte r, ubyte g, ubyte b) pure nothrow {
2392     return cast(ubyte) (0.21*r + 0.64*g + 0.15*b); // somewhat arbitrary weights
2393 }
2394 
2395 void Y_to_YA(in ubyte[] src, ubyte[] tgt) pure nothrow {
2396     for (size_t k, t;   k < src.length;   k+=1, t+=2) {
2397         tgt[t] = src[k];
2398         tgt[t+1] = 255;
2399     }
2400 }
2401 
2402 alias Y_to_BGR = Y_to_RGB;
2403 void Y_to_RGB(in ubyte[] src, ubyte[] tgt) pure nothrow {
2404     for (size_t k, t;   k < src.length;   k+=1, t+=3)
2405         tgt[t .. t+3] = src[k];
2406 }
2407 
2408 alias Y_to_BGRA = Y_to_RGBA;
2409 void Y_to_RGBA(in ubyte[] src, ubyte[] tgt) pure nothrow {
2410     for (size_t k, t;   k < src.length;   k+=1, t+=4) {
2411         tgt[t .. t+3] = src[k];
2412         tgt[t+3] = 255;
2413     }
2414 }
2415 
2416 void YA_to_Y(in ubyte[] src, ubyte[] tgt) pure nothrow {
2417     for (size_t k, t;   k < src.length;   k+=2, t+=1)
2418         tgt[t] = src[k];
2419 }
2420 
2421 alias YA_to_BGR = YA_to_RGB;
2422 void YA_to_RGB(in ubyte[] src, ubyte[] tgt) pure nothrow {
2423     for (size_t k, t;   k < src.length;   k+=2, t+=3)
2424         tgt[t .. t+3] = src[k];
2425 }
2426 
2427 alias YA_to_BGRA = YA_to_RGBA;
2428 void YA_to_RGBA(in ubyte[] src, ubyte[] tgt) pure nothrow {
2429     for (size_t k, t;   k < src.length;   k+=2, t+=4) {
2430         tgt[t .. t+3] = src[k];
2431         tgt[t+3] = src[k+1];
2432     }
2433 }
2434 
2435 void RGB_to_Y(in ubyte[] src, ubyte[] tgt) pure nothrow {
2436     for (size_t k, t;   k < src.length;   k+=3, t+=1)
2437         tgt[t] = luminance(src[k], src[k+1], src[k+2]);
2438 }
2439 
2440 void RGB_to_YA(in ubyte[] src, ubyte[] tgt) pure nothrow {
2441     for (size_t k, t;   k < src.length;   k+=3, t+=2) {
2442         tgt[t] = luminance(src[k], src[k+1], src[k+2]);
2443         tgt[t+1] = 255;
2444     }
2445 }
2446 
2447 void RGB_to_RGBA(in ubyte[] src, ubyte[] tgt) pure nothrow {
2448     for (size_t k, t;   k < src.length;   k+=3, t+=4) {
2449         tgt[t .. t+3] = src[k .. k+3];
2450         tgt[t+3] = 255;
2451     }
2452 }
2453 
2454 void RGBA_to_Y(in ubyte[] src, ubyte[] tgt) pure nothrow {
2455     for (size_t k, t;   k < src.length;   k+=4, t+=1)
2456         tgt[t] = luminance(src[k], src[k+1], src[k+2]);
2457 }
2458 
2459 void RGBA_to_YA(in ubyte[] src, ubyte[] tgt) pure nothrow {
2460     for (size_t k, t;   k < src.length;   k+=4, t+=2) {
2461         tgt[t] = luminance(src[k], src[k+1], src[k+2]);
2462         tgt[t+1] = src[k+3];
2463     }
2464 }
2465 
2466 void RGBA_to_RGB(in ubyte[] src, ubyte[] tgt) pure nothrow {
2467     for (size_t k, t;   k < src.length;   k+=4, t+=3)
2468         tgt[t .. t+3] = src[k .. k+3];
2469 }
2470 
2471 void BGR_to_Y(in ubyte[] src, ubyte[] tgt) pure nothrow {
2472     for (size_t k, t;   k < src.length;   k+=3, t+=1)
2473         tgt[t] = luminance(src[k+2], src[k+1], src[k+1]);
2474 }
2475 
2476 void BGR_to_YA(in ubyte[] src, ubyte[] tgt) pure nothrow {
2477     for (size_t k, t;   k < src.length;   k+=3, t+=2) {
2478         tgt[t] = luminance(src[k+2], src[k+1], src[k+1]);
2479         tgt[t+1] = 255;
2480     }
2481 }
2482 
2483 alias RGB_to_BGR = BGR_to_RGB;
2484 void BGR_to_RGB(in ubyte[] src, ubyte[] tgt) pure nothrow {
2485     for (size_t k;   k < src.length;   k+=3) {
2486         tgt[k  ] = src[k+2];
2487         tgt[k+1] = src[k+1];
2488         tgt[k+2] = src[k  ];
2489     }
2490 }
2491 
2492 alias RGB_to_BGRA = BGR_to_RGBA;
2493 void BGR_to_RGBA(in ubyte[] src, ubyte[] tgt) pure nothrow {
2494     for (size_t k, t;   k < src.length;   k+=3, t+=4) {
2495         tgt[t  ] = src[k+2];
2496         tgt[t+1] = src[k+1];
2497         tgt[t+2] = src[k  ];
2498         tgt[t+3] = 255;
2499     }
2500 }
2501 
2502 void BGRA_to_Y(in ubyte[] src, ubyte[] tgt) pure nothrow {
2503     for (size_t k, t;   k < src.length;   k+=4, t+=1)
2504         tgt[t] = luminance(src[k+2], src[k+1], src[k]);
2505 }
2506 
2507 void BGRA_to_YA(in ubyte[] src, ubyte[] tgt) pure nothrow {
2508     for (size_t k, t;   k < src.length;   k+=4, t+=2) {
2509         tgt[t] = luminance(src[k+2], src[k+1], src[k]);
2510         tgt[t+1] = 255;
2511     }
2512 }
2513 
2514 alias RGBA_to_BGR = BGRA_to_RGB;
2515 void BGRA_to_RGB(in ubyte[] src, ubyte[] tgt) pure nothrow {
2516     for (size_t k, t;   k < src.length;   k+=4, t+=3) {
2517         tgt[t  ] = src[k+2];
2518         tgt[t+1] = src[k+1];
2519         tgt[t+2] = src[k  ];
2520     }
2521 }
2522 
2523 alias RGBA_to_BGRA = BGRA_to_RGBA;
2524 void BGRA_to_RGBA(in ubyte[] src, ubyte[] tgt) pure nothrow {
2525     for (size_t k, t;   k < src.length;   k+=4, t+=4) {
2526         tgt[t  ] = src[k+2];
2527         tgt[t+1] = src[k+1];
2528         tgt[t+2] = src[k  ];
2529         tgt[t+3] = src[k+3];
2530     }
2531 }
2532 
2533 // --------------------------------------------------------------------------------
2534 
2535 class Reader {
2536     const void delegate(ubyte[], size_t) readExact;
2537     const void delegate(ptrdiff_t, int) seek;
2538 
2539     this(in char[] filename) {
2540         this(File(filename.idup, "rb"));
2541     }
2542 
2543     this(File f) {
2544         if (!f.isOpen) throw new ImageIOException("File not open");
2545         this.f = f;
2546         this.readExact = &file_readExact;
2547         this.seek = &file_seek;
2548         this.source = null;
2549     }
2550 
2551     this(in ubyte[] source) {
2552         this.source = source;
2553         this.readExact = &mem_readExact;
2554         this.seek = &mem_seek;
2555     }
2556 
2557     private:
2558 
2559     File f;
2560     void file_readExact(ubyte[] buffer, size_t bytes) {
2561         auto slice = this.f.rawRead(buffer[0..bytes]);
2562         if (slice.length != bytes)
2563             throw new Exception("not enough data");
2564     }
2565     void file_seek(ptrdiff_t offset, int origin) { this.f.seek(offset, origin); }
2566 
2567     const ubyte[] source;
2568     ptrdiff_t cursor;
2569     void mem_readExact(ubyte[] buffer, size_t bytes) {
2570         if (source.length - cursor < bytes)
2571             throw new Exception("not enough data");
2572         buffer[0..bytes] = source[cursor .. cursor+bytes];
2573         cursor += bytes;
2574     }
2575     void mem_seek(ptrdiff_t offset, int origin) {
2576         switch (origin) {
2577             case SEEK_SET:
2578                 if (offset < 0 || source.length <= offset)
2579                     throw new Exception("seek error");
2580                 cursor = offset;
2581                 break;
2582             case SEEK_CUR:
2583                 ptrdiff_t dst = cursor + offset;
2584                 if (dst < 0 || source.length <= dst)
2585                     throw new Exception("seek error");
2586                 cursor = dst;
2587                 break;
2588             case SEEK_END:
2589                 if (0 <= offset || source.length < -offset)
2590                     throw new Exception("seek error");
2591                 cursor = cast(ptrdiff_t) source.length + offset;
2592                 break;
2593             default: assert(0);
2594         }
2595     }
2596 }
2597 
2598 class Writer {
2599     const void delegate(in ubyte[]) rawWrite;
2600     const void delegate() flush;
2601 
2602     this(in char[] filename) {
2603         this(File(filename.idup, "wb"));
2604     }
2605 
2606     this(File f) {
2607         if (!f.isOpen) throw new ImageIOException("File not open");
2608         this.f = f;
2609         this.rawWrite = &file_rawWrite;
2610         this.flush = &file_flush;
2611     }
2612 
2613     this() {
2614         this.rawWrite = &mem_rawWrite;
2615         this.flush = &mem_flush;
2616     }
2617 
2618     @property ubyte[] result() { return buffer; }
2619 
2620     private:
2621 
2622     File f;
2623     void file_rawWrite(in ubyte[] block) { this.f.rawWrite(block); }
2624     void file_flush() { this.f.flush(); }
2625 
2626     ubyte[] buffer;
2627     void mem_rawWrite(in ubyte[] block) { this.buffer ~= block; }
2628     void mem_flush() { }
2629 }
2630 
2631 const(char)[] extract_extension_lowercase(in char[] filename) {
2632     ptrdiff_t di = filename.lastIndexOf('.');
2633     return (0 < di && di+1 < filename.length) ? filename[di+1..$].toLower() : "";
2634 }