Use `torch.repeat_interleave()` to generate repeated indices faster (#1058)

* update: use numpy to generate repeated indices faster

* update: use torch.repeat_interleave() instead of np.repeat()

* refactor: remove unused import, numpy

* refactor: torch.range to torch.arange

* update: tensor to list before appending the extra samples

* update: concatenate the paddings with torch.cat
pull/659/merge
Hyeongchan Kim 3 years ago committed by GitHub
parent a79b2f3328
commit a0b2657497
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -103,15 +103,16 @@ class RepeatAugSampler(Sampler):
g = torch.Generator() g = torch.Generator()
g.manual_seed(self.epoch) g.manual_seed(self.epoch)
if self.shuffle: if self.shuffle:
indices = torch.randperm(len(self.dataset), generator=g).tolist() indices = torch.randperm(len(self.dataset), generator=g)
else: else:
indices = list(range(len(self.dataset))) indices = torch.arange(start=0, end=len(self.dataset))
# produce repeats e.g. [0, 0, 0, 1, 1, 1, 2, 2, 2....] # produce repeats e.g. [0, 0, 0, 1, 1, 1, 2, 2, 2....]
indices = [x for x in indices for _ in range(self.num_repeats)] indices = torch.repeat_interleave(indices, repeats=self.num_repeats, dim=0)
# add extra samples to make it evenly divisible # add extra samples to make it evenly divisible
padding_size = self.total_size - len(indices) padding_size = self.total_size - len(indices)
indices += indices[:padding_size] if padding_size > 0:
indices = torch.cat([indices, indices[:padding_size]], dim=0)
assert len(indices) == self.total_size assert len(indices) == self.total_size
# subsample per rank # subsample per rank

Loading…
Cancel
Save