Spaces:

jamtur01
/

MMaDA

Runtime error

App Files Files Community

MMaDA / venv /lib /python3.11 /site-packages /fairscale /nn /pipe /balance /profile.py

jamtur01

Upload folder using huggingface_hub

9c6594c verified about 2 months ago

raw

history blame contribute delete

4.14 kB

	# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.
	#
	# This source code is licensed under the BSD license found in the
	# LICENSE file in the root directory of this source tree.

	# Copyright 2019 Kakao Brain
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	"""Per-layer profilers."""
	import copy
	import time
	from typing import Generator, List, Tuple, Union

	import torch
	from torch import Tensor
	import torch.nn as nn

	from ..microbatch import Batch

	__all__: List[str] = []


	Device = Union[torch.device, int, str]

	Tensors = Tuple[Tensor, ...]
	TensorOrTensors = Union[Tensor, Tensors]


	def layerwise_sandbox(
	module: nn.Sequential,
	device: torch.device,
	) -> Generator[nn.Module, None, None]:
	"""Copies layers for ease to profile. It doesn't modify the given
	module.
	"""
	for layer in module:
	layer_copy = copy.deepcopy(layer)
	layer_copy.to(device)
	layer_copy.train()
	yield layer_copy


	def detach(batch: Batch) -> None:
	"""Detaches from autograd graph."""
	for i, x in enumerate(batch):
	batch[i] = x.detach().requires_grad_(x.requires_grad)


	def profile_times(
	module: nn.Sequential,
	sample: TensorOrTensors,
	timeout: float,
	device: torch.device,
	) -> List[int]:
	"""Profiles elapsed times per layer."""
	if any(p.grad is not None for p in module.parameters()):
	raise ValueError("some parameter already has gradient")

	_batch = Batch(sample, 0)
	for i, x in enumerate(_batch):
	_batch[i] = x.detach().to(device).requires_grad_(x.requires_grad)

	time_bufs: List[List[float]] = [[] for _ in module]
	begun_at = time.time()

	while time.time() - begun_at < timeout:
	batch = _batch

	for i, layer in enumerate(layerwise_sandbox(module, device)):
	detach(batch)

	if device.type == "cuda":
	torch.cuda.synchronize(device)
	tick = time.time()

	# Forward
	batch = batch.call(layer)

	# Backward
	backward_tensors = tuple(y for y in batch if y.requires_grad)
	if backward_tensors:
	torch.autograd.backward(backward_tensors, backward_tensors)

	if device.type == "cuda":
	torch.cuda.synchronize(device)
	tock = time.time()

	time_bufs[i].append(tock - tick)

	us = 1_000_000
	return [sum(int(t * us) for t in buf) for buf in time_bufs]


	def profile_sizes(
	module: nn.Sequential,
	input: TensorOrTensors,
	chunks: int,
	param_scale: float,
	device: torch.device,
	) -> List[int]:
	"""Profiles CUDA memory usage per layer."""
	if device.type != "cuda":
	raise ValueError("size profiler supports only CUDA device")

	batch = Batch(input, 0)
	sizes: List[int] = []

	latent_scale = batch[0].size(0) / chunks
	for i, x in enumerate(batch):
	batch[i] = x[:1].detach().to(device).requires_grad_(x.requires_grad)

	for layer in layerwise_sandbox(module, device):
	detach(batch)

	# Detect memory usage at forward.
	memory_before = torch.cuda.memory_allocated(device)
	batch = batch.call(layer)
	memory_after = torch.cuda.memory_allocated(device)
	latent_size = memory_after - memory_before

	# Analyze size of parameters.
	param_size = sum(p.storage().size() * p.storage().element_size() for p in layer.parameters())

	# Combine size of parameters and activations with normalize scales.
	size = latent_size * latent_scale + param_size * param_scale
	sizes.append(int(size))

	return sizes