From 3db4e346e02058bbe89ecef5e1a68929a4423d10 Mon Sep 17 00:00:00 2001 From: Ross Wightman Date: Mon, 21 Nov 2022 17:42:19 -0800 Subject: [PATCH] Switch TFDS dataset to use INTEGER_ACCURATE jpeg decode by default --- timm/data/readers/reader_tfds.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/timm/data/readers/reader_tfds.py b/timm/data/readers/reader_tfds.py index a327df7b..25aab471 100644 --- a/timm/data/readers/reader_tfds.py +++ b/timm/data/readers/reader_tfds.py @@ -43,6 +43,15 @@ SHUFFLE_SIZE = int(os.environ.get('TFDS_SHUFFLE_SIZE', 8192)) # samples to shuf PREFETCH_SIZE = int(os.environ.get('TFDS_PREFETCH_SIZE', 2048)) # samples to prefetch +@tfds.decode.make_decoder() +def decode_example(serialized_image, feature, dct_method='INTEGER_ACCURATE'): + return tf.image.decode_jpeg( + serialized_image, + channels=3, + dct_method=dct_method, + ) + + def even_split_indices(split, n, num_samples): partitions = [round(i * num_samples / n) for i in range(n + 1)] return [f"{split}[{partitions[i]}:{partitions[i + 1]}]" for i in range(n)] @@ -242,6 +251,7 @@ class ReaderTfds(Reader): ds = self.builder.as_dataset( split=self.subsplit or self.split, shuffle_files=self.is_training, + decoders=dict(image=decode_example()), read_config=read_config, ) # avoid overloading threading w/ combo of TF ds threads + PyTorch workers