Skip to content

Commit 73093f0

Browse files
authored
Merge branch 'main' into checkpoint
2 parents 117363d + 8aa3174 commit 73093f0

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

74 files changed

+1471
-246
lines changed

.circleci/config.yml

Lines changed: 593 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

.circleci/config.yml.in

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -311,7 +311,7 @@ jobs:
311311
descr: Install Python type check utilities
312312
- run:
313313
name: Check Python types statically
314-
command: mypy --config-file mypy.ini
314+
command: mypy --install-types --non-interactive --config-file mypy.ini
315315

316316
unittest_torchhub:
317317
docker:

.circleci/regenerate.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,8 @@ def build_workflows(prefix="", filter_branch=None, upload=False, indentation=6,
3232
for os_type in ["linux", "macos", "win"]:
3333
python_versions = PYTHON_VERSIONS
3434
cu_versions_dict = {
35-
"linux": ["cpu", "cu102", "cu111", "cu113", "rocm4.2", "rocm4.3.1"],
36-
"win": ["cpu", "cu111", "cu113"],
35+
"linux": ["cpu", "cu102", "cu111", "cu113", "cu115", "rocm4.2", "rocm4.3.1"],
36+
"win": ["cpu", "cu111", "cu113", "cu115"],
3737
"macos": ["cpu"],
3838
}
3939
cu_versions = cu_versions_dict[os_type]
@@ -128,6 +128,7 @@ def upload_doc_job(filter_branch):
128128
"cu111": "pytorch/manylinux-cuda111",
129129
"cu112": "pytorch/manylinux-cuda112",
130130
"cu113": "pytorch/manylinux-cuda113",
131+
"cu115": "pytorch/manylinux-cuda115",
131132
}
132133

133134

packaging/build_conda.sh

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,5 +11,11 @@ setup_conda_pytorch_constraint
1111
setup_conda_cudatoolkit_constraint
1212
setup_visual_studio_constraint
1313
setup_junit_results_folder
14-
# nvidia channel included for cudatoolkit >= 11
15-
conda build -c defaults -c nvidia $CONDA_CHANNEL_FLAGS --no-anaconda-upload --python "$PYTHON_VERSION" packaging/torchvision
14+
15+
# nvidia channel included for cudatoolkit >= 11 however for 11.5 we use conda-forge
16+
export CUDATOOLKIT_CHANNEL="nvidia"
17+
if [[ "$CU_VERSION" == cu115 ]]; then
18+
export CUDATOOLKIT_CHANNEL="conda-forge"
19+
fi
20+
21+
conda build -c defaults -c $CUDATOOLKIT_CHANNEL $CONDA_CHANNEL_FLAGS --no-anaconda-upload --python "$PYTHON_VERSION" packaging/torchvision

packaging/pkg_helpers.bash

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,14 @@ setup_cuda() {
4646

4747
# Now work out the CUDA settings
4848
case "$CU_VERSION" in
49+
cu115)
50+
if [[ "$OSTYPE" == "msys" ]]; then
51+
export CUDA_HOME="C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v11.5"
52+
else
53+
export CUDA_HOME=/usr/local/cuda-11.5/
54+
fi
55+
export TORCH_CUDA_ARCH_LIST="3.5;5.0+PTX;6.0;7.0;7.5;8.0;8.6"
56+
;;
4957
cu113)
5058
if [[ "$OSTYPE" == "msys" ]]; then
5159
export CUDA_HOME="C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v11.3"
@@ -289,6 +297,9 @@ setup_conda_cudatoolkit_constraint() {
289297
export CONDA_BUILD_VARIANT="cpu"
290298
else
291299
case "$CU_VERSION" in
300+
cu115)
301+
export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=11.5,<11.6 # [not osx]"
302+
;;
292303
cu113)
293304
export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=11.3,<11.4 # [not osx]"
294305
;;
@@ -333,6 +344,9 @@ setup_conda_cudatoolkit_plain_constraint() {
333344
export CMAKE_USE_CUDA=0
334345
else
335346
case "$CU_VERSION" in
347+
cu115)
348+
export CONDA_CUDATOOLKIT_CONSTRAINT="cudatoolkit=11.5"
349+
;;
336350
cu113)
337351
export CONDA_CUDATOOLKIT_CONSTRAINT="cudatoolkit=11.3"
338352
;;

packaging/torchvision/meta.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ requirements:
2424
run:
2525
- python
2626
- defaults::numpy >=1.11
27+
- requests
2728
- libpng
2829
- ffmpeg >=4.2 # [not win]
2930
- jpeg

packaging/windows/internal/cuda_install.bat

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ set CUDA_VER_MAJOR=%CUDA_VER:~0,-1%
2121
set CUDA_VER_MINOR=%CUDA_VER:~-1,1%
2222
set CUDA_VERSION_STR=%CUDA_VER_MAJOR%.%CUDA_VER_MINOR%
2323

24+
2425
if %CUDA_VER% EQU 92 goto cuda92
2526
if %CUDA_VER% EQU 100 goto cuda100
2627
if %CUDA_VER% EQU 101 goto cuda101
@@ -29,6 +30,8 @@ if %CUDA_VER% EQU 110 goto cuda110
2930
if %CUDA_VER% EQU 111 goto cuda111
3031
if %CUDA_VER% EQU 112 goto cuda112
3132
if %CUDA_VER% EQU 113 goto cuda113
33+
if %CUDA_VER% EQU 115 goto cuda115
34+
3235

3336
echo CUDA %CUDA_VERSION_STR% is not supported
3437
exit /b 1
@@ -180,6 +183,25 @@ if not exist "%SRC_DIR%\temp_build\%CUDNN_INSTALL_ZIP%" (
180183

181184
goto cuda_common
182185

186+
:cuda115
187+
188+
set CUDA_INSTALL_EXE=cuda_11.5.0_496.13_win10.exe
189+
if not exist "%SRC_DIR%\temp_build\%CUDA_INSTALL_EXE%" (
190+
curl -k -L "https://ossci-windows.s3.amazonaws.com/%CUDA_INSTALL_EXE%" --output "%SRC_DIR%\temp_build\%CUDA_INSTALL_EXE%"
191+
if errorlevel 1 exit /b 1
192+
set "CUDA_SETUP_FILE=%SRC_DIR%\temp_build\%CUDA_INSTALL_EXE%"
193+
set "ARGS=thrust_11.5 nvcc_11.5 cuobjdump_11.5 nvprune_11.5 nvprof_11.5 cupti_11.5 cublas_11.5 cublas_dev_11.5 cudart_11.5 cufft_11.5 cufft_dev_11.5 curand_11.5 curand_dev_11.5 cusolver_11.5 cusolver_dev_11.5 cusparse_11.5 cusparse_dev_11.5 npp_11.5 npp_dev_11.5 nvrtc_11.5 nvrtc_dev_11.5 nvml_dev_11.5"
194+
)
195+
196+
set CUDNN_INSTALL_ZIP=cudnn-11.3-windows-x64-v8.2.0.53.zip
197+
if not exist "%SRC_DIR%\temp_build\%CUDNN_INSTALL_ZIP%" (
198+
curl -k -L "http://s3.amazonaws.com/ossci-windows/%CUDNN_INSTALL_ZIP%" --output "%SRC_DIR%\temp_build\%CUDNN_INSTALL_ZIP%"
199+
if errorlevel 1 exit /b 1
200+
set "CUDNN_SETUP_FILE=%SRC_DIR%\temp_build\%CUDNN_INSTALL_ZIP%"
201+
)
202+
203+
goto cuda_common
204+
183205
:cuda_common
184206

185207
if not exist "%SRC_DIR%\temp_build\NvToolsExt.7z" (
@@ -222,7 +244,7 @@ set "NVTOOLSEXT_PATH=%ProgramFiles%\NVIDIA Corporation\NvToolsExt\bin\x64"
222244
if not exist "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION_STR%\bin\nvcc.exe" (
223245
echo CUDA %CUDA_VERSION_STR% installed failed.
224246
echo --------- RunDll32.exe.log
225-
type "%SRC_DIR%\temp_build\cuda\cuda_install_logs\LOG.RunDll32.exe.log"
247+
type "%SRC_DIR%\temp_build\cuda\cuda_install_logs\LOG.RunDll32.exe.log"
226248
echo --------- setup.exe.log -------
227249
type "%SRC_DIR%\temp_build\cuda\cuda_install_logs\LOG.setup.exe.log"
228250
exit /b 1

references/classification/sampler.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ class RASampler(torch.utils.data.Sampler):
1515
https://github.com/facebookresearch/deit/blob/main/samplers.py
1616
"""
1717

18-
def __init__(self, dataset, num_replicas=None, rank=None, shuffle=True):
18+
def __init__(self, dataset, num_replicas=None, rank=None, shuffle=True, seed=0):
1919
if num_replicas is None:
2020
if not dist.is_available():
2121
raise RuntimeError("Requires distributed package to be available!")
@@ -32,11 +32,12 @@ def __init__(self, dataset, num_replicas=None, rank=None, shuffle=True):
3232
self.total_size = self.num_samples * self.num_replicas
3333
self.num_selected_samples = int(math.floor(len(self.dataset) // 256 * 256 / self.num_replicas))
3434
self.shuffle = shuffle
35+
self.seed = seed
3536

3637
def __iter__(self):
3738
# Deterministically shuffle based on epoch
3839
g = torch.Generator()
39-
g.manual_seed(self.epoch)
40+
g.manual_seed(self.seed + self.epoch)
4041
if self.shuffle:
4142
indices = torch.randperm(len(self.dataset), generator=g).tolist()
4243
else:

references/classification/train.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
import torchvision
1010
import transforms
1111
import utils
12-
from references.classification.sampler import RASampler
12+
from sampler import RASampler
1313
from torch import nn
1414
from torch.utils.data.dataloader import default_collate
1515
from torchvision.transforms.functional import InterpolationMode

references/optical_flow/README.md

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
# Optical flow reference training scripts
2+
3+
This folder contains reference training scripts for optical flow.
4+
They serve as a log of how to train specific models, so as to provide baseline
5+
training and evaluation scripts to quickly bootstrap research.
6+
7+
8+
### RAFT Large
9+
10+
The RAFT large model was trained on Flying Chairs and then on Flying Things.
11+
Both used 8 A100 GPUs and a batch size of 2 (so effective batch size is 16). The
12+
rest of the hyper-parameters are exactly the same as the original RAFT training
13+
recipe from https://github.com/princeton-vl/RAFT.
14+
15+
```
16+
torchrun --nproc_per_node 8 --nnodes 1 train.py \
17+
--dataset-root $dataset_root \
18+
--name $name_chairs \
19+
--model raft_large \
20+
--train-dataset chairs \
21+
--batch-size 2 \
22+
--lr 0.0004 \
23+
--weight-decay 0.0001 \
24+
--num-steps 100000 \
25+
--output-dir $chairs_dir
26+
```
27+
28+
```
29+
torchrun --nproc_per_node 8 --nnodes 1 train.py \
30+
--dataset-root $dataset_root \
31+
--name $name_things \
32+
--model raft_large \
33+
--train-dataset things \
34+
--batch-size 2 \
35+
--lr 0.000125 \
36+
--weight-decay 0.0001 \
37+
--num-steps 100000 \
38+
--freeze-batch-norm \
39+
--output-dir $things_dir\
40+
--resume $chairs_dir/$name_chairs.pth
41+
```
42+
43+
44+
### Evaluation
45+
46+
```
47+
torchrun --nproc_per_node 1 --nnodes 1 train.py --val-dataset sintel --batch-size 1 --dataset-root $dataset_root --model raft_large --pretrained
48+
```
49+
50+
This should give an epe of about 1.3822 on the clean pass and 2.7161 on the
51+
final pass of Sintel-train. Results may vary slightly depending on the batch
52+
size and the number of GPUs. For the most accurate resuts use 1 GPU and
53+
`--batch-size 1`:
54+
55+
```
56+
Sintel val clean epe: 1.3822 1px: 0.9028 3px: 0.9573 5px: 0.9697 per_image_epe: 1.3822 f1: 4.0248
57+
Sintel val final epe: 2.7161 1px: 0.8528 3px: 0.9204 5px: 0.9392 per_image_epe: 2.7161 f1: 7.5964
58+
```
59+
60+
You can also evaluate on Kitti train:
61+
62+
```
63+
torchrun --nproc_per_node 1 --nnodes 1 train.py --val-dataset kitti --batch-size 1 --dataset-root $dataset_root --model raft_large --pretrained
64+
Kitti val epe: 4.7968 1px: 0.6388 3px: 0.8197 5px: 0.8661 per_image_epe: 4.5118 f1: 16.0679
65+
```

references/optical_flow/train.py

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,16 @@
33
from pathlib import Path
44

55
import torch
6+
import torchvision.models.optical_flow
67
import utils
78
from presets import OpticalFlowPresetTrain, OpticalFlowPresetEval
89
from torchvision.datasets import KittiFlow, FlyingChairs, FlyingThings3D, Sintel, HD1K
9-
from torchvision.models.optical_flow import raft_large, raft_small
10+
11+
try:
12+
from torchvision.prototype import models as PM
13+
from torchvision.prototype.models import optical_flow as PMOF
14+
except ImportError:
15+
PM = PMOF = None
1016

1117

1218
def get_train_dataset(stage, dataset_root):
@@ -125,6 +131,13 @@ def inner_loop(blob):
125131

126132
def validate(model, args):
127133
val_datasets = args.val_dataset or []
134+
135+
if args.weights:
136+
weights = PM.get_weight(args.weights)
137+
preprocessing = weights.transforms()
138+
else:
139+
preprocessing = OpticalFlowPresetEval()
140+
128141
for name in val_datasets:
129142
if name == "kitti":
130143
# Kitti has different image sizes so we need to individually pad them, we can't batch.
@@ -134,14 +147,14 @@ def validate(model, args):
134147
f"Batch-size={args.batch_size} was passed. For technical reasons, evaluating on Kitti can only be done with a batch-size of 1."
135148
)
136149

137-
val_dataset = KittiFlow(root=args.dataset_root, split="train", transforms=OpticalFlowPresetEval())
150+
val_dataset = KittiFlow(root=args.dataset_root, split="train", transforms=preprocessing)
138151
_validate(
139152
model, args, val_dataset, num_flow_updates=24, padder_mode="kitti", header="Kitti val", batch_size=1
140153
)
141154
elif name == "sintel":
142155
for pass_name in ("clean", "final"):
143156
val_dataset = Sintel(
144-
root=args.dataset_root, split="train", pass_name=pass_name, transforms=OpticalFlowPresetEval()
157+
root=args.dataset_root, split="train", pass_name=pass_name, transforms=preprocessing
145158
)
146159
_validate(
147160
model,
@@ -187,7 +200,11 @@ def train_one_epoch(model, optimizer, scheduler, train_loader, logger, current_s
187200
def main(args):
188201
utils.setup_ddp(args)
189202

190-
model = raft_small() if args.small else raft_large()
203+
if args.weights:
204+
model = PMOF.__dict__[args.model](weights=args.weights)
205+
else:
206+
model = torchvision.models.optical_flow.__dict__[args.model](pretrained=args.pretrained)
207+
191208
model = model.to(args.local_rank)
192209
model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.local_rank])
193210

@@ -306,7 +323,12 @@ def get_args_parser(add_help=True):
306323
"--freeze-batch-norm", action="store_true", help="Set BatchNorm modules of the model in eval mode."
307324
)
308325

309-
parser.add_argument("--small", action="store_true", help="Use the 'small' RAFT architecture.")
326+
parser.add_argument(
327+
"--model", type=str, default="raft_large", help="The name of the model to use - either raft_large or raft_small"
328+
)
329+
# TODO: resume, pretrained, and weights should be in an exclusive arg group
330+
parser.add_argument("--pretrained", action="store_true", help="Whether to use pretrained weights")
331+
parser.add_argument("--weights", default=None, type=str, help="the weights enum name to load.")
310332

311333
parser.add_argument(
312334
"--num_flow_updates",

setup.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ def write_version_file():
5959

6060
requirements = [
6161
"numpy",
62+
"requests",
6263
pytorch_dep,
6364
]
6465

test/builtin_dataset_mocks.py

Lines changed: 24 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,19 @@
2929
DEFAULT_TEST_DECODER = object()
3030

3131

32+
class TestResource(datasets.utils.OnlineResource):
33+
def __init__(self, *, dataset_name, dataset_config, **kwargs):
34+
super().__init__(**kwargs)
35+
self.dataset_name = dataset_name
36+
self.dataset_config = dataset_config
37+
38+
def _download(self, _):
39+
raise pytest.UsageError(
40+
f"Dataset '{self.dataset_name}' requires the file '{self.file_name}' for {self.dataset_config}, "
41+
f"but this file does not exist."
42+
)
43+
44+
3245
class DatasetMocks:
3346
def __init__(self):
3447
self._mock_data_fns = {}
@@ -72,7 +85,7 @@ def _parse_mock_info(self, mock_info, *, name):
7285
)
7386
return mock_info
7487

75-
def _get(self, dataset, config):
88+
def _get(self, dataset, config, root):
7689
name = dataset.info.name
7790
resources_and_mock_info = self._cache.get((name, config))
7891
if resources_and_mock_info:
@@ -87,20 +100,12 @@ def _get(self, dataset, config):
87100
f"Did you register the mock data function with `@DatasetMocks.register_mock_data_fn`?"
88101
)
89102

90-
root = self._tmp_home / name
91-
root.mkdir(exist_ok=True)
103+
mock_resources = [
104+
TestResource(dataset_name=name, dataset_config=config, file_name=resource.file_name)
105+
for resource in dataset.resources(config)
106+
]
92107
mock_info = self._parse_mock_info(fakedata_fn(dataset.info, root, config), name=name)
93108

94-
mock_resources = []
95-
for resource in dataset.resources(config):
96-
path = root / resource.file_name
97-
if not path.exists() and path.is_file():
98-
raise pytest.UsageError(
99-
f"Dataset '{name}' requires the file {path.name} for {config}, but this file does not exist."
100-
)
101-
102-
mock_resources.append(datasets.utils.LocalResource(path))
103-
104109
self._cache[(name, config)] = mock_resources, mock_info
105110
return mock_resources, mock_info
106111

@@ -109,9 +114,13 @@ def load(
109114
) -> Tuple[IterDataPipe, Dict[str, Any]]:
110115
dataset = find(name)
111116
config = dataset.info.make_config(split=split, **options)
112-
resources, mock_info = self._get(dataset, config)
117+
118+
root = self._tmp_home / name
119+
root.mkdir(exist_ok=True)
120+
resources, mock_info = self._get(dataset, config, root)
121+
113122
datapipe = dataset._make_datapipe(
114-
[resource.to_datapipe() for resource in resources],
123+
[resource.load(root) for resource in resources],
115124
config=config,
116125
decoder=DEFAULT_DECODER_MAP.get(dataset.info.type) if decoder is DEFAULT_DECODER else decoder,
117126
)

0 commit comments

Comments
 (0)