From c08417339cbe4279717067ef00f51cd4e102840b Mon Sep 17 00:00:00 2001 From: sahilg06 Date: Thu, 10 Mar 2022 01:39:53 +0530 Subject: [PATCH 1/6] update urls for kinetics dataset --- torchvision/datasets/kinetics.py | 1 + 1 file changed, 1 insertion(+) diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py index 189142a5e67..33de16de218 100644 --- a/torchvision/datasets/kinetics.py +++ b/torchvision/datasets/kinetics.py @@ -177,6 +177,7 @@ def _download_videos(self) -> None: if not check_integrity(split_url_filepath): download_url(split_url, file_list_path) list_video_urls = open(split_url_filepath) + lines = [str(line).replace(" ", "%20") for line in list_video_urls.readlines()] if self.num_download_workers == 1: for line in list_video_urls.readlines(): From a8a4aec87591a8f74cc494b35971f91bb9a2d1a6 Mon Sep 17 00:00:00 2001 From: sahilg06 Date: Thu, 10 Mar 2022 01:49:35 +0530 Subject: [PATCH 2/6] update urls for kinetics dataset --- torchvision/datasets/kinetics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py index 33de16de218..b7326571f27 100644 --- a/torchvision/datasets/kinetics.py +++ b/torchvision/datasets/kinetics.py @@ -177,7 +177,7 @@ def _download_videos(self) -> None: if not check_integrity(split_url_filepath): download_url(split_url, file_list_path) list_video_urls = open(split_url_filepath) - lines = [str(line).replace(" ", "%20") for line in list_video_urls.readlines()] + list_video_urls = [str(line).replace(" ", "%20") for line in list_video_urls.readlines()] if self.num_download_workers == 1: for line in list_video_urls.readlines(): From 701194b68175935012a3c3353b76144f5a5e5d6a Mon Sep 17 00:00:00 2001 From: sahilg06 Date: Thu, 10 Mar 2022 02:18:50 +0530 Subject: [PATCH 3/6] remove errors --- torchvision/datasets/kinetics.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py index b7326571f27..3a555d340b1 100644 --- a/torchvision/datasets/kinetics.py +++ b/torchvision/datasets/kinetics.py @@ -180,12 +180,12 @@ def _download_videos(self) -> None: list_video_urls = [str(line).replace(" ", "%20") for line in list_video_urls.readlines()] if self.num_download_workers == 1: - for line in list_video_urls.readlines(): - line = str(line).replace("\n", "") + for line in list_video_urls: + line = line.replace("\n", "") download_and_extract_archive(line, tar_path, self.split_folder) else: part = partial(_dl_wrap, tar_path, self.split_folder) - lines = [str(line).replace("\n", "") for line in list_video_urls.readlines()] + lines = [line.replace("\n", "") for line in list_video_urls] poolproc = Pool(self.num_download_workers) poolproc.map(part, lines) From 69e21cfaea76ff8470b2ed9c297effb5d7ede80d Mon Sep 17 00:00:00 2001 From: sahilg06 Date: Thu, 10 Mar 2022 14:32:26 +0530 Subject: [PATCH 4/6] update the changes and add test option to split --- torchvision/datasets/kinetics.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py index 3a555d340b1..70e3e7c3131 100644 --- a/torchvision/datasets/kinetics.py +++ b/torchvision/datasets/kinetics.py @@ -1,6 +1,7 @@ import csv import os import time +import urllib import warnings from functools import partial from multiprocessing import Pool @@ -53,7 +54,7 @@ class Kinetics(VisionDataset): Note: split is appended automatically using the split argument. frames_per_clip (int): number of frames in a clip num_classes (int): select between Kinetics-400 (default), Kinetics-600, and Kinetics-700 - split (str): split of the dataset to consider; supports ``"train"`` (default) ``"val"`` + split (str): split of the dataset to consider; supports ``"train"`` (default) ``"val"`` ``"test"`` frame_rate (float): If omitted, interpolate different frame rate for each clip. step_between_clips (int): number of frames between each clip transform (callable, optional): A function/transform that takes in a TxHxWxC video @@ -176,18 +177,16 @@ def _download_videos(self) -> None: split_url_filepath = path.join(file_list_path, path.basename(split_url)) if not check_integrity(split_url_filepath): download_url(split_url, file_list_path) - list_video_urls = open(split_url_filepath) - list_video_urls = [str(line).replace(" ", "%20") for line in list_video_urls.readlines()] + with open(split_url_filepath) as file: + list_video_urls = [urllib.parse.quote(line, safe="/,:") for line in file.read().splitlines()] if self.num_download_workers == 1: for line in list_video_urls: - line = line.replace("\n", "") download_and_extract_archive(line, tar_path, self.split_folder) else: part = partial(_dl_wrap, tar_path, self.split_folder) - lines = [line.replace("\n", "") for line in list_video_urls] poolproc = Pool(self.num_download_workers) - poolproc.map(part, lines) + poolproc.map(part, list_video_urls) def _make_ds_structure(self) -> None: """move videos from From a662e974e55d4a87caf3044f0461eba6a6a82a9b Mon Sep 17 00:00:00 2001 From: Sahil Goyal Date: Fri, 18 Mar 2022 12:33:19 +0530 Subject: [PATCH 5/6] added test to valid values for split arg --- torchvision/datasets/kinetics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py index 70e3e7c3131..643f66edbc6 100644 --- a/torchvision/datasets/kinetics.py +++ b/torchvision/datasets/kinetics.py @@ -122,7 +122,7 @@ def __init__( assert not download, "Cannot download the videos using legacy_structure." else: self.split_folder = path.join(root, split) - self.split = verify_str_arg(split, arg="split", valid_values=["train", "val"]) + self.split = verify_str_arg(split, arg="split", valid_values=["train", "val", "test"]) if download: self.download_and_process_videos() From 110b8a9fee0d2e2f643756ebc515ad2fc197c88b Mon Sep 17 00:00:00 2001 From: Sahil Goyal Date: Fri, 18 Mar 2022 15:27:36 +0530 Subject: [PATCH 6/6] change .txt to .csv for annotation url of k600 --- torchvision/datasets/kinetics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py index 643f66edbc6..14aec6fca3b 100644 --- a/torchvision/datasets/kinetics.py +++ b/torchvision/datasets/kinetics.py @@ -82,7 +82,7 @@ class Kinetics(VisionDataset): } _ANNOTATION_URLS = { "400": "https://s3.amazonaws.com/kinetics/400/annotations/{split}.csv", - "600": "https://s3.amazonaws.com/kinetics/600/annotations/{split}.txt", + "600": "https://s3.amazonaws.com/kinetics/600/annotations/{split}.csv", "700": "https://s3.amazonaws.com/kinetics/700_2020/annotations/{split}.csv", }