unit-scaling
Contents
1. User guide
2. Limitations
3. API reference
unit-scaling
Index
Index
A
|
B
|
C
|
D
|
E
|
F
|
G
|
H
|
I
|
J
|
K
|
L
|
M
|
N
|
O
|
P
|
Q
|
R
|
S
|
T
|
U
|
V
|
W
|
X
|
Z
A
abs() (unit_scaling.parameter.Tensor method)
abs_() (unit_scaling.parameter.Tensor method)
absolute() (unit_scaling.parameter.Tensor method)
absolute_() (unit_scaling.parameter.Tensor method)
acos() (unit_scaling.parameter.Tensor method)
acos_() (unit_scaling.parameter.Tensor method)
acosh() (unit_scaling.parameter.Tensor method)
acosh_() (unit_scaling.parameter.Tensor method)
Adam (class in unit_scaling.optim)
AdamW (class in unit_scaling.optim)
add() (in module unit_scaling.functional)
(unit_scaling.parameter.Tensor method)
add_() (unit_scaling.parameter.Tensor method)
add_param_group() (unit_scaling.optim.Adam method)
(unit_scaling.optim.AdamW method)
(unit_scaling.optim.SGD method)
addbmm() (unit_scaling.parameter.Tensor method)
addbmm_() (unit_scaling.parameter.Tensor method)
addcdiv() (unit_scaling.parameter.Tensor method)
addcdiv_() (unit_scaling.parameter.Tensor method)
addcmul() (unit_scaling.parameter.Tensor method)
addcmul_() (unit_scaling.parameter.Tensor method)
addmm() (unit_scaling.parameter.Tensor method)
addmm_() (unit_scaling.parameter.Tensor method)
addmv() (unit_scaling.parameter.Tensor method)
addmv_() (unit_scaling.parameter.Tensor method)
addr() (unit_scaling.parameter.Tensor method)
addr_() (unit_scaling.parameter.Tensor method)
adjoint() (unit_scaling.parameter.Tensor method)
align_as() (unit_scaling.parameter.Tensor method)
align_to() (unit_scaling.parameter.Tensor method)
all() (unit_scaling.parameter.Tensor method)
allclose() (unit_scaling.parameter.Tensor method)
amax() (unit_scaling.parameter.Tensor method)
amean() (in module unit_scaling.constraints)
amin() (unit_scaling.parameter.Tensor method)
aminmax() (unit_scaling.parameter.Tensor method)
analyse_module() (in module unit_scaling.utils)
angle() (unit_scaling.parameter.Tensor method)
any() (unit_scaling.parameter.Tensor method)
append() (unit_scaling.DepthModuleList method)
(unit_scaling.DepthSequential method)
(unit_scaling.TransformerDecoder method)
apply_() (unit_scaling.parameter.Tensor method)
apply_constraint() (in module unit_scaling.constraints)
apply_transform() (in module unit_scaling.transforms.utils)
arccos() (unit_scaling.parameter.Tensor method)
arccos_() (unit_scaling.parameter.Tensor method)
arccosh() (unit_scaling.parameter.Tensor method)
arccosh_() (unit_scaling.parameter.Tensor method)
arcsin() (unit_scaling.parameter.Tensor method)
arcsin_() (unit_scaling.parameter.Tensor method)
arcsinh() (unit_scaling.parameter.Tensor method)
arcsinh_() (unit_scaling.parameter.Tensor method)
arctan() (unit_scaling.parameter.Tensor method)
arctan2() (unit_scaling.parameter.Tensor method)
arctan2_() (unit_scaling.parameter.Tensor method)
arctan_() (unit_scaling.parameter.Tensor method)
arctanh() (unit_scaling.parameter.Tensor method)
arctanh_() (unit_scaling.parameter.Tensor method)
argmax() (unit_scaling.parameter.Tensor method)
argmin() (unit_scaling.parameter.Tensor method)
argsort() (unit_scaling.parameter.Tensor method)
argwhere() (unit_scaling.parameter.Tensor method)
as_strided() (unit_scaling.parameter.Tensor method)
as_strided_() (unit_scaling.parameter.Tensor method)
as_strided_scatter() (unit_scaling.parameter.Tensor method)
as_subclass() (unit_scaling.parameter.Tensor method)
asin() (unit_scaling.parameter.Tensor method)
asin_() (unit_scaling.parameter.Tensor method)
asinh() (unit_scaling.parameter.Tensor method)
asinh_() (unit_scaling.parameter.Tensor method)
atan() (unit_scaling.parameter.Tensor method)
atan2() (unit_scaling.parameter.Tensor method)
atan2_() (unit_scaling.parameter.Tensor method)
atan_() (unit_scaling.parameter.Tensor method)
atanh() (unit_scaling.parameter.Tensor method)
atanh_() (unit_scaling.parameter.Tensor method)
B
backward() (unit_scaling.parameter.Tensor method)
(unit_scaling.utils.ScaleTracker static method)
baddbmm() (unit_scaling.parameter.Tensor method)
baddbmm_() (unit_scaling.parameter.Tensor method)
bernoulli() (unit_scaling.parameter.Tensor method)
bernoulli_() (unit_scaling.parameter.Tensor method)
bfloat16() (unit_scaling.parameter.Tensor method)
bias (unit_scaling.Conv1d attribute)
(unit_scaling.LayerNorm attribute)
(unit_scaling.Linear attribute)
(unit_scaling.LinearReadout attribute)
bincount() (unit_scaling.parameter.Tensor method)
bits (unit_scaling.formats.FPFormat property)
bitwise_and() (unit_scaling.parameter.Tensor method)
bitwise_and_() (unit_scaling.parameter.Tensor method)
bitwise_left_shift() (unit_scaling.parameter.Tensor method)
bitwise_left_shift_() (unit_scaling.parameter.Tensor method)
bitwise_not() (unit_scaling.parameter.Tensor method)
bitwise_not_() (unit_scaling.parameter.Tensor method)
bitwise_or() (unit_scaling.parameter.Tensor method)
bitwise_or_() (unit_scaling.parameter.Tensor method)
bitwise_right_shift() (unit_scaling.parameter.Tensor method)
bitwise_right_shift_() (unit_scaling.parameter.Tensor method)
bitwise_xor() (unit_scaling.parameter.Tensor method)
bitwise_xor_() (unit_scaling.parameter.Tensor method)
bmm() (unit_scaling.parameter.Tensor method)
bool() (unit_scaling.parameter.Tensor method)
boxed_run() (unit_scaling.utils.ScaleTrackingInterpreter method)
broadcast_to() (unit_scaling.parameter.Tensor method)
byte() (unit_scaling.parameter.Tensor method)
C
call_function() (unit_scaling.utils.ScaleTrackingInterpreter method)
call_method() (unit_scaling.utils.ScaleTrackingInterpreter method)
call_module() (unit_scaling.utils.ScaleTrackingInterpreter method)
cauchy_() (unit_scaling.parameter.Tensor method)
cdouble() (unit_scaling.parameter.Tensor method)
ceil() (unit_scaling.parameter.Tensor method)
ceil_() (unit_scaling.parameter.Tensor method)
cfloat() (unit_scaling.parameter.Tensor method)
chalf() (unit_scaling.parameter.Tensor method)
char() (unit_scaling.parameter.Tensor method)
cholesky() (unit_scaling.parameter.Tensor method)
cholesky_inverse() (unit_scaling.parameter.Tensor method)
cholesky_solve() (unit_scaling.parameter.Tensor method)
chunk() (unit_scaling.parameter.Tensor method)
clamp() (unit_scaling.parameter.Tensor method)
clamp_() (unit_scaling.parameter.Tensor method)
clear() (unit_scaling.parameter.OrderedDict method)
clip() (unit_scaling.parameter.Tensor method)
clip_() (unit_scaling.parameter.Tensor method)
clone() (unit_scaling.parameter.Tensor method)
coalesce() (unit_scaling.parameter.Tensor method)
col_indices() (unit_scaling.parameter.Tensor method)
compile() (in module unit_scaling.transforms)
conj() (unit_scaling.parameter.Tensor method)
conj_physical() (unit_scaling.parameter.Tensor method)
conj_physical_() (unit_scaling.parameter.Tensor method)
contiguous() (unit_scaling.parameter.Tensor method)
Conv1d (class in unit_scaling)
conv1d() (in module unit_scaling.functional)
copy() (unit_scaling.parameter.OrderedDict method)
copy_() (unit_scaling.parameter.Tensor method)
copysign() (unit_scaling.parameter.Tensor method)
copysign_() (unit_scaling.parameter.Tensor method)
corrcoef() (unit_scaling.parameter.Tensor method)
cos() (unit_scaling.parameter.Tensor method)
cos_() (unit_scaling.parameter.Tensor method)
cosh() (unit_scaling.parameter.Tensor method)
cosh_() (unit_scaling.parameter.Tensor method)
count_nonzero() (unit_scaling.parameter.Tensor method)
cov() (unit_scaling.parameter.Tensor method)
cpu() (unit_scaling.parameter.Tensor method)
cross() (unit_scaling.parameter.Tensor method)
cross_entropy() (in module unit_scaling.functional)
CrossEntropyLoss (class in unit_scaling)
crow_indices() (unit_scaling.parameter.Tensor method)
cuda() (unit_scaling.parameter.Tensor method)
cummax() (unit_scaling.parameter.Tensor method)
cummin() (unit_scaling.parameter.Tensor method)
cumprod() (unit_scaling.parameter.Tensor method)
cumprod_() (unit_scaling.parameter.Tensor method)
cumsum() (unit_scaling.parameter.Tensor method)
cumsum_() (unit_scaling.parameter.Tensor method)
D
data_ptr() (unit_scaling.parameter.Tensor method)
deg2rad() (unit_scaling.parameter.Tensor method)
deg2rad_() (unit_scaling.parameter.Tensor method)
dense_dim() (unit_scaling.parameter.Tensor method)
DepthModuleList (class in unit_scaling)
DepthSequential (class in unit_scaling)
dequantize() (unit_scaling.parameter.Tensor method)
det() (unit_scaling.parameter.Tensor method)
detach() (unit_scaling.parameter.Tensor method)
detach_() (unit_scaling.parameter.Tensor method)
device (unit_scaling.parameter.Tensor attribute)
diag() (unit_scaling.parameter.Tensor method)
diag_embed() (unit_scaling.parameter.Tensor method)
diagflat() (unit_scaling.parameter.Tensor method)
diagonal() (unit_scaling.parameter.Tensor method)
diagonal_scatter() (unit_scaling.parameter.Tensor method)
diff() (unit_scaling.parameter.Tensor method)
digamma() (unit_scaling.parameter.Tensor method)
digamma_() (unit_scaling.parameter.Tensor method)
dim() (unit_scaling.parameter.Tensor method)
dim_order() (unit_scaling.parameter.Tensor method)
dist() (unit_scaling.parameter.Tensor method)
div() (unit_scaling.parameter.Tensor method)
div_() (unit_scaling.parameter.Tensor method)
divide() (unit_scaling.parameter.Tensor method)
divide_() (unit_scaling.parameter.Tensor method)
dot() (unit_scaling.parameter.Tensor method)
double() (unit_scaling.parameter.Tensor method)
Dropout (class in unit_scaling)
dropout() (in module unit_scaling.functional)
dsplit() (unit_scaling.parameter.Tensor method)
E
element_size() (unit_scaling.parameter.Tensor method)
Embedding (class in unit_scaling)
embedding() (in module unit_scaling.functional)
eq() (unit_scaling.parameter.Tensor method)
eq_() (unit_scaling.parameter.Tensor method)
equal() (unit_scaling.parameter.Tensor method)
erf() (unit_scaling.parameter.Tensor method)
erf_() (unit_scaling.parameter.Tensor method)
erfc() (unit_scaling.parameter.Tensor method)
erfc_() (unit_scaling.parameter.Tensor method)
erfinv() (unit_scaling.parameter.Tensor method)
erfinv_() (unit_scaling.parameter.Tensor method)
example_batch() (in module unit_scaling.analysis)
exp() (unit_scaling.parameter.Tensor method)
exp2() (unit_scaling.parameter.Tensor method)
exp2_() (unit_scaling.parameter.Tensor method)
exp_() (unit_scaling.parameter.Tensor method)
expand() (unit_scaling.parameter.Tensor method)
expand_as() (unit_scaling.parameter.Tensor method)
expm1() (unit_scaling.parameter.Tensor method)
expm1_() (unit_scaling.parameter.Tensor method)
exponential_() (unit_scaling.parameter.Tensor method)
extend() (unit_scaling.DepthModuleList method)
F
fetch_args_kwargs_from_env() (unit_scaling.utils.ScaleTrackingInterpreter method)
fetch_attr() (unit_scaling.utils.ScaleTrackingInterpreter method)
fill_() (unit_scaling.parameter.Tensor method)
fill_diagonal_() (unit_scaling.parameter.Tensor method)
fix() (unit_scaling.parameter.Tensor method)
fix_() (unit_scaling.parameter.Tensor method)
flatten() (unit_scaling.parameter.Tensor method)
flip() (unit_scaling.parameter.Tensor method)
fliplr() (unit_scaling.parameter.Tensor method)
flipud() (unit_scaling.parameter.Tensor method)
float() (unit_scaling.parameter.Tensor method)
float_power() (unit_scaling.parameter.Tensor method)
float_power_() (unit_scaling.parameter.Tensor method)
floor() (unit_scaling.parameter.Tensor method)
floor_() (unit_scaling.parameter.Tensor method)
floor_divide() (unit_scaling.parameter.Tensor method)
floor_divide_() (unit_scaling.parameter.Tensor method)
fmax() (unit_scaling.parameter.Tensor method)
fmin() (unit_scaling.parameter.Tensor method)
fmod() (unit_scaling.parameter.Tensor method)
fmod_() (unit_scaling.parameter.Tensor method)
format_to_tuple() (in module unit_scaling.formats)
FPFormat (class in unit_scaling.formats)
frac() (unit_scaling.parameter.Tensor method)
frac_() (unit_scaling.parameter.Tensor method)
frexp() (unit_scaling.parameter.Tensor method)
from_pretrained() (unit_scaling.Embedding class method)
fromkeys() (unit_scaling.parameter.OrderedDict method)
G
gather() (unit_scaling.parameter.Tensor method)
gcd() (unit_scaling.parameter.Tensor method)
gcd_() (unit_scaling.parameter.Tensor method)
ge() (unit_scaling.parameter.Tensor method)
ge_() (unit_scaling.parameter.Tensor method)
GELU (class in unit_scaling)
gelu() (in module unit_scaling.functional)
geometric_() (unit_scaling.parameter.Tensor method)
geqrf() (unit_scaling.parameter.Tensor method)
ger() (unit_scaling.parameter.Tensor method)
get() (unit_scaling.parameter.OrderedDict method)
get_attr() (unit_scaling.utils.ScaleTrackingInterpreter method)
get_device() (unit_scaling.parameter.Tensor method)
gmean() (in module unit_scaling.constraints)
grad (unit_scaling.parameter.Tensor attribute)
graph_to_dataframe() (in module unit_scaling.analysis)
greater() (unit_scaling.parameter.Tensor method)
greater_() (unit_scaling.parameter.Tensor method)
greater_equal() (unit_scaling.parameter.Tensor method)
greater_equal_() (unit_scaling.parameter.Tensor method)
gt() (unit_scaling.parameter.Tensor method)
gt_() (unit_scaling.parameter.Tensor method)
H
H (unit_scaling.parameter.Tensor attribute)
half() (unit_scaling.parameter.Tensor method)
hardshrink() (unit_scaling.parameter.Tensor method)
has_names() (unit_scaling.parameter.Tensor method)
has_parameter_data() (in module unit_scaling.parameter)
heaviside() (unit_scaling.parameter.Tensor method)
heaviside_() (unit_scaling.parameter.Tensor method)
histc() (unit_scaling.parameter.Tensor method)
histogram() (unit_scaling.parameter.Tensor method)
hmean() (in module unit_scaling.constraints)
hsplit() (unit_scaling.parameter.Tensor method)
hypot() (unit_scaling.parameter.Tensor method)
hypot_() (unit_scaling.parameter.Tensor method)
I
i0() (unit_scaling.parameter.Tensor method)
i0_() (unit_scaling.parameter.Tensor method)
igamma() (unit_scaling.parameter.Tensor method)
igamma_() (unit_scaling.parameter.Tensor method)
igammac() (unit_scaling.parameter.Tensor method)
igammac_() (unit_scaling.parameter.Tensor method)
imag (unit_scaling.parameter.Tensor attribute)
index_add() (unit_scaling.parameter.Tensor method)
index_add_() (unit_scaling.parameter.Tensor method)
index_copy() (unit_scaling.parameter.Tensor method)
index_copy_() (unit_scaling.parameter.Tensor method)
index_fill() (unit_scaling.parameter.Tensor method)
index_fill_() (unit_scaling.parameter.Tensor method)
index_put() (unit_scaling.parameter.Tensor method)
index_put_() (unit_scaling.parameter.Tensor method)
index_reduce_() (unit_scaling.parameter.Tensor method)
index_select() (unit_scaling.parameter.Tensor method)
indices() (unit_scaling.parameter.Tensor method)
inner() (unit_scaling.parameter.Tensor method)
insert() (unit_scaling.DepthModuleList method)
int() (unit_scaling.parameter.Tensor method)
int_repr() (unit_scaling.parameter.Tensor method)
inverse() (unit_scaling.parameter.Tensor method)
ipu() (unit_scaling.parameter.Tensor method)
is_coalesced() (unit_scaling.parameter.Tensor method)
is_complex() (unit_scaling.parameter.Tensor method)
is_conj() (unit_scaling.parameter.Tensor method)
is_contiguous() (unit_scaling.parameter.Tensor method)
is_cpu (unit_scaling.parameter.Tensor attribute)
is_cuda (unit_scaling.parameter.Tensor attribute)
is_floating_point() (unit_scaling.parameter.Tensor method)
is_inference() (unit_scaling.parameter.Tensor method)
is_ipu (unit_scaling.parameter.Tensor attribute)
is_leaf (unit_scaling.parameter.Tensor attribute)
is_meta (unit_scaling.parameter.Tensor attribute)
is_mps (unit_scaling.parameter.Tensor attribute)
is_neg() (unit_scaling.parameter.Tensor method)
is_pinned() (unit_scaling.parameter.Tensor method)
is_quantized (unit_scaling.parameter.Tensor attribute)
is_set_to() (unit_scaling.parameter.Tensor method)
is_shared() (unit_scaling.parameter.Tensor method)
is_signed() (unit_scaling.parameter.Tensor method)
is_sparse (unit_scaling.parameter.Tensor attribute)
is_sparse_csr (unit_scaling.parameter.Tensor attribute)
is_xla (unit_scaling.parameter.Tensor attribute)
is_xpu (unit_scaling.parameter.Tensor attribute)
isclose() (unit_scaling.parameter.Tensor method)
isfinite() (unit_scaling.parameter.Tensor method)
isinf() (unit_scaling.parameter.Tensor method)
isnan() (unit_scaling.parameter.Tensor method)
isneginf() (unit_scaling.parameter.Tensor method)
isposinf() (unit_scaling.parameter.Tensor method)
isreal() (unit_scaling.parameter.Tensor method)
istft() (unit_scaling.parameter.Tensor method)
item() (unit_scaling.parameter.Tensor method)
items() (unit_scaling.parameter.OrderedDict method)
itemsize (unit_scaling.parameter.Tensor attribute)
J
jvp() (unit_scaling.utils.ScaleTracker static method)
K
keys() (unit_scaling.parameter.OrderedDict method)
kron() (unit_scaling.parameter.Tensor method)
kthvalue() (unit_scaling.parameter.Tensor method)
L
layer_norm() (in module unit_scaling.functional)
LayerNorm (class in unit_scaling)
lcm() (unit_scaling.parameter.Tensor method)
lcm_() (unit_scaling.parameter.Tensor method)
ldexp() (unit_scaling.parameter.Tensor method)
ldexp_() (unit_scaling.parameter.Tensor method)
le() (unit_scaling.parameter.Tensor method)
le_() (unit_scaling.parameter.Tensor method)
lerp() (unit_scaling.parameter.Tensor method)
lerp_() (unit_scaling.parameter.Tensor method)
less() (unit_scaling.parameter.Tensor method)
less_() (unit_scaling.parameter.Tensor method)
less_equal() (unit_scaling.parameter.Tensor method)
less_equal_() (unit_scaling.parameter.Tensor method)
lgamma() (unit_scaling.parameter.Tensor method)
lgamma_() (unit_scaling.parameter.Tensor method)
Linear (class in unit_scaling)
linear() (in module unit_scaling.functional)
linear_readout() (in module unit_scaling.functional)
LinearReadout (class in unit_scaling)
load_state_dict() (unit_scaling.optim.Adam method)
(unit_scaling.optim.AdamW method)
(unit_scaling.optim.SGD method)
log() (unit_scaling.parameter.Tensor method)
log10() (unit_scaling.parameter.Tensor method)
log10_() (unit_scaling.parameter.Tensor method)
log1p() (unit_scaling.parameter.Tensor method)
log1p_() (unit_scaling.parameter.Tensor method)
log2() (unit_scaling.parameter.Tensor method)
log2_() (unit_scaling.parameter.Tensor method)
log_() (unit_scaling.parameter.Tensor method)
log_normal_() (unit_scaling.parameter.Tensor method)
logaddexp() (unit_scaling.parameter.Tensor method)
logaddexp2() (unit_scaling.parameter.Tensor method)
logarithmic_interpolation() (in module unit_scaling.core.functional)
logcumsumexp() (unit_scaling.parameter.Tensor method)
logdet() (unit_scaling.parameter.Tensor method)
logical_and() (unit_scaling.parameter.Tensor method)
logical_and_() (unit_scaling.parameter.Tensor method)
logical_not() (unit_scaling.parameter.Tensor method)
logical_not_() (unit_scaling.parameter.Tensor method)
logical_or() (unit_scaling.parameter.Tensor method)
logical_or_() (unit_scaling.parameter.Tensor method)
logical_xor() (unit_scaling.parameter.Tensor method)
logical_xor_() (unit_scaling.parameter.Tensor method)
logit() (unit_scaling.parameter.Tensor method)
logit_() (unit_scaling.parameter.Tensor method)
logsumexp() (unit_scaling.parameter.Tensor method)
long() (unit_scaling.parameter.Tensor method)
lr_scale_for_depth() (in module unit_scaling.optim)
lr_scale_func_adam() (in module unit_scaling.optim)
lr_scale_func_sgd() (in module unit_scaling.optim)
lt() (unit_scaling.parameter.Tensor method)
lt_() (unit_scaling.parameter.Tensor method)
lu() (unit_scaling.parameter.Tensor method)
lu_solve() (unit_scaling.parameter.Tensor method)
M
map_() (unit_scaling.parameter.Tensor method)
map_nodes_to_values() (unit_scaling.utils.ScaleTrackingInterpreter method)
mark_dirty() (unit_scaling.utils.ScaleTracker method)
mark_non_differentiable() (unit_scaling.utils.ScaleTracker method)
masked_fill() (unit_scaling.parameter.Tensor method)
masked_fill_() (unit_scaling.parameter.Tensor method)
masked_scatter() (unit_scaling.parameter.Tensor method)
masked_scatter_() (unit_scaling.parameter.Tensor method)
masked_select() (unit_scaling.parameter.Tensor method)
matmul() (in module unit_scaling.functional)
(unit_scaling.parameter.Tensor method)
matrix_exp() (unit_scaling.parameter.Tensor method)
matrix_power() (unit_scaling.parameter.Tensor method)
max() (unit_scaling.parameter.Tensor method)
max_absolute_value (unit_scaling.formats.FPFormat property)
maximum() (unit_scaling.parameter.Tensor method)
mean() (unit_scaling.parameter.Tensor method)
median() (unit_scaling.parameter.Tensor method)
Metrics (class in unit_scaling.transforms)
Metrics.Data (class in unit_scaling.transforms)
mH (unit_scaling.parameter.Tensor attribute)
MHSA (class in unit_scaling)
min() (unit_scaling.parameter.Tensor method)
min_absolute_normal (unit_scaling.formats.FPFormat property)
min_absolute_subnormal (unit_scaling.formats.FPFormat property)
minimum() (unit_scaling.parameter.Tensor method)
MLP (class in unit_scaling)
mm() (unit_scaling.parameter.Tensor method)
mode() (unit_scaling.parameter.Tensor method)
module
unit_scaling
unit_scaling.analysis
unit_scaling.constraints
unit_scaling.core
unit_scaling.core.functional
unit_scaling.formats
unit_scaling.functional
unit_scaling.optim
unit_scaling.parameter
unit_scaling.scale
unit_scaling.transforms
unit_scaling.transforms.utils
unit_scaling.utils
module_load() (unit_scaling.parameter.Tensor method)
move_to_end() (unit_scaling.parameter.OrderedDict method)
moveaxis() (unit_scaling.parameter.Tensor method)
movedim() (unit_scaling.parameter.Tensor method)
mse_loss() (in module unit_scaling.functional)
msort() (unit_scaling.parameter.Tensor method)
mT (unit_scaling.parameter.Tensor attribute)
mtia() (unit_scaling.parameter.Tensor method)
mul() (unit_scaling.parameter.Tensor method)
mul_() (unit_scaling.parameter.Tensor method)
multinomial() (unit_scaling.parameter.Tensor method)
multiply() (unit_scaling.parameter.Tensor method)
multiply_() (unit_scaling.parameter.Tensor method)
mv() (unit_scaling.parameter.Tensor method)
mvlgamma() (unit_scaling.parameter.Tensor method)
mvlgamma_() (unit_scaling.parameter.Tensor method)
N
names (unit_scaling.parameter.Tensor attribute)
nan_to_num() (unit_scaling.parameter.Tensor method)
nan_to_num_() (unit_scaling.parameter.Tensor method)
nanmean() (unit_scaling.parameter.Tensor method)
nanmedian() (unit_scaling.parameter.Tensor method)
nanquantile() (unit_scaling.parameter.Tensor method)
nansum() (unit_scaling.parameter.Tensor method)
narrow() (unit_scaling.parameter.Tensor method)
narrow_copy() (unit_scaling.parameter.Tensor method)
nbytes (unit_scaling.parameter.Tensor attribute)
ndim (unit_scaling.parameter.Tensor attribute)
ndimension() (unit_scaling.parameter.Tensor method)
ne() (unit_scaling.parameter.Tensor method)
ne_() (unit_scaling.parameter.Tensor method)
neg() (unit_scaling.parameter.Tensor method)
neg_() (unit_scaling.parameter.Tensor method)
negative() (unit_scaling.parameter.Tensor method)
negative_() (unit_scaling.parameter.Tensor method)
nelement() (unit_scaling.parameter.Tensor method)
new_empty() (unit_scaling.parameter.Tensor method)
new_empty_strided() (unit_scaling.parameter.Tensor method)
new_full() (unit_scaling.parameter.Tensor method)
new_ones() (unit_scaling.parameter.Tensor method)
new_tensor() (unit_scaling.parameter.Tensor method)
new_zeros() (unit_scaling.parameter.Tensor method)
nextafter() (unit_scaling.parameter.Tensor method)
nextafter_() (unit_scaling.parameter.Tensor method)
nonzero() (unit_scaling.parameter.Tensor method)
nonzero_static() (unit_scaling.parameter.Tensor method)
norm() (unit_scaling.parameter.Tensor method)
normal_() (unit_scaling.parameter.Tensor method)
not_equal() (unit_scaling.parameter.Tensor method)
not_equal_() (unit_scaling.parameter.Tensor method)
numel() (unit_scaling.parameter.Tensor method)
numpy() (unit_scaling.parameter.Tensor method)
O
OrderedDict (class in unit_scaling.parameter)
orgqr() (unit_scaling.parameter.Tensor method)
ormqr() (unit_scaling.parameter.Tensor method)
outer() (unit_scaling.parameter.Tensor method)
output() (unit_scaling.utils.ScaleTrackingInterpreter method)
P
Parameter() (in module unit_scaling)
(in module unit_scaling.parameter)
ParameterData (class in unit_scaling.parameter)
patch_to_expand_modules() (in module unit_scaling.transforms.utils)
permute() (unit_scaling.parameter.Tensor method)
pin_memory() (unit_scaling.parameter.Tensor method)
pinverse() (unit_scaling.parameter.Tensor method)
placeholder() (unit_scaling.utils.ScaleTrackingInterpreter method)
plot() (in module unit_scaling.analysis)
polygamma() (unit_scaling.parameter.Tensor method)
polygamma_() (unit_scaling.parameter.Tensor method)
pop() (unit_scaling.parameter.OrderedDict method)
popitem() (unit_scaling.parameter.OrderedDict method)
positive() (unit_scaling.parameter.Tensor method)
pow() (unit_scaling.parameter.Tensor method)
pow_() (unit_scaling.parameter.Tensor method)
prod() (unit_scaling.parameter.Tensor method)
Protocol (class in unit_scaling.parameter)
prune_non_float_tensors() (in module unit_scaling.transforms)
prune_same_scale_tensors() (in module unit_scaling.transforms)
prune_selected_nodes() (in module unit_scaling.transforms)
put() (unit_scaling.parameter.Tensor method)
put_() (unit_scaling.parameter.Tensor method)
Q
q_per_channel_axis() (unit_scaling.parameter.Tensor method)
q_per_channel_scales() (unit_scaling.parameter.Tensor method)
q_per_channel_zero_points() (unit_scaling.parameter.Tensor method)
q_scale() (unit_scaling.parameter.Tensor method)
q_zero_point() (unit_scaling.parameter.Tensor method)
qr() (unit_scaling.parameter.Tensor method)
qscheme() (unit_scaling.parameter.Tensor method)
quantile() (unit_scaling.parameter.Tensor method)
quantise() (unit_scaling.formats.FPFormat method)
quantise_bwd() (unit_scaling.formats.FPFormat method)
quantise_fwd() (unit_scaling.formats.FPFormat method)
R
rad2deg() (unit_scaling.parameter.Tensor method)
rad2deg_() (unit_scaling.parameter.Tensor method)
random_() (unit_scaling.parameter.Tensor method)
ravel() (unit_scaling.parameter.Tensor method)
real (unit_scaling.parameter.Tensor attribute)
reciprocal() (unit_scaling.parameter.Tensor method)
reciprocal_() (unit_scaling.parameter.Tensor method)
record_stream() (unit_scaling.parameter.Tensor method)
refine_names() (unit_scaling.parameter.Tensor method)
register_hook() (unit_scaling.parameter.Tensor method)
register_load_state_dict_post_hook() (unit_scaling.optim.Adam method)
(unit_scaling.optim.AdamW method)
(unit_scaling.optim.SGD method)
register_load_state_dict_pre_hook() (unit_scaling.optim.Adam method)
(unit_scaling.optim.AdamW method)
(unit_scaling.optim.SGD method)
register_post_accumulate_grad_hook() (unit_scaling.parameter.Tensor method)
register_state_dict_post_hook() (unit_scaling.optim.Adam method)
(unit_scaling.optim.AdamW method)
(unit_scaling.optim.SGD method)
register_state_dict_pre_hook() (unit_scaling.optim.Adam method)
(unit_scaling.optim.AdamW method)
(unit_scaling.optim.SGD method)
register_step_post_hook() (unit_scaling.optim.Adam method)
(unit_scaling.optim.AdamW method)
(unit_scaling.optim.SGD method)
register_step_pre_hook() (unit_scaling.optim.Adam method)
(unit_scaling.optim.AdamW method)
(unit_scaling.optim.SGD method)
remainder() (unit_scaling.parameter.Tensor method)
remainder_() (unit_scaling.parameter.Tensor method)
rename() (unit_scaling.parameter.Tensor method)
rename_() (unit_scaling.parameter.Tensor method)
renorm() (unit_scaling.parameter.Tensor method)
renorm_() (unit_scaling.parameter.Tensor method)
repeat() (unit_scaling.parameter.Tensor method)
repeat_interleave() (unit_scaling.parameter.Tensor method)
replace_node_with_function() (in module unit_scaling.transforms.utils)
requires_grad (unit_scaling.parameter.Tensor attribute)
requires_grad_() (unit_scaling.parameter.Tensor method)
reshape() (unit_scaling.parameter.Tensor method)
reshape_as() (unit_scaling.parameter.Tensor method)
residual_add() (in module unit_scaling.functional)
residual_apply() (in module unit_scaling.functional)
residual_split() (in module unit_scaling.functional)
resize_() (unit_scaling.parameter.Tensor method)
resize_as_() (unit_scaling.parameter.Tensor method)
resolve_conj() (unit_scaling.parameter.Tensor method)
resolve_neg() (unit_scaling.parameter.Tensor method)
retain_grad() (unit_scaling.parameter.Tensor method)
retains_grad (unit_scaling.parameter.Tensor attribute)
rms() (in module unit_scaling.core.functional)
rms_norm() (in module unit_scaling.functional)
RMSNorm (class in unit_scaling)
roll() (unit_scaling.parameter.Tensor method)
rot90() (unit_scaling.parameter.Tensor method)
round() (unit_scaling.parameter.Tensor method)
round_() (unit_scaling.parameter.Tensor method)
rsqrt() (unit_scaling.parameter.Tensor method)
rsqrt_() (unit_scaling.parameter.Tensor method)
run() (unit_scaling.utils.ScaleTrackingInterpreter method)
run_node() (unit_scaling.utils.ScaleTrackingInterpreter method)
S
save_for_backward() (unit_scaling.utils.ScaleTracker method)
save_for_forward() (unit_scaling.utils.ScaleTracker method)
scale_bwd() (in module unit_scaling.scale)
scale_elementwise() (in module unit_scaling.core.functional)
scale_fwd() (in module unit_scaling.scale)
scaled_dot_product_attention() (in module unit_scaling.functional)
scaled_parameters() (in module unit_scaling.optim)
ScalePair (class in unit_scaling.utils)
ScaleTracker (class in unit_scaling.utils)
ScaleTrackingInterpreter (class in unit_scaling.utils)
scatter() (unit_scaling.parameter.Tensor method)
scatter_() (unit_scaling.parameter.Tensor method)
scatter_add() (unit_scaling.parameter.Tensor method)
scatter_add_() (unit_scaling.parameter.Tensor method)
scatter_reduce() (unit_scaling.parameter.Tensor method)
scatter_reduce_() (unit_scaling.parameter.Tensor method)
select() (unit_scaling.parameter.Tensor method)
select_scatter() (unit_scaling.parameter.Tensor method)
set_() (unit_scaling.parameter.Tensor method)
set_materialize_grads() (unit_scaling.utils.ScaleTracker method)
setdefault() (unit_scaling.parameter.OrderedDict method)
setup_context() (unit_scaling.utils.ScaleTracker static method)
SGD (class in unit_scaling.optim)
sgn() (unit_scaling.parameter.Tensor method)
sgn_() (unit_scaling.parameter.Tensor method)
shape (unit_scaling.parameter.Tensor attribute)
share_memory_() (unit_scaling.parameter.Tensor method)
short() (unit_scaling.parameter.Tensor method)
sigmoid() (unit_scaling.parameter.Tensor method)
sigmoid_() (unit_scaling.parameter.Tensor method)
sign() (unit_scaling.parameter.Tensor method)
sign_() (unit_scaling.parameter.Tensor method)
signbit() (unit_scaling.parameter.Tensor method)
SiLU (class in unit_scaling)
silu() (in module unit_scaling.functional)
silu_glu() (in module unit_scaling.functional)
simulate_format() (in module unit_scaling.transforms)
simulate_fp8() (in module unit_scaling.transforms)
sin() (unit_scaling.parameter.Tensor method)
sin_() (unit_scaling.parameter.Tensor method)
sinc() (unit_scaling.parameter.Tensor method)
sinc_() (unit_scaling.parameter.Tensor method)
sinh() (unit_scaling.parameter.Tensor method)
sinh_() (unit_scaling.parameter.Tensor method)
size() (unit_scaling.parameter.Tensor method)
slice_scatter() (unit_scaling.parameter.Tensor method)
slogdet() (unit_scaling.parameter.Tensor method)
smm() (unit_scaling.parameter.Tensor method)
Softmax (class in unit_scaling)
softmax() (in module unit_scaling.functional)
(unit_scaling.parameter.Tensor method)
sort() (unit_scaling.parameter.Tensor method)
sparse_dim() (unit_scaling.parameter.Tensor method)
sparse_mask() (unit_scaling.parameter.Tensor method)
sparse_resize_() (unit_scaling.parameter.Tensor method)
sparse_resize_and_clear_() (unit_scaling.parameter.Tensor method)
split() (unit_scaling.parameter.Tensor method)
sqrt() (unit_scaling.parameter.Tensor method)
sqrt_() (unit_scaling.parameter.Tensor method)
square() (unit_scaling.parameter.Tensor method)
square_() (unit_scaling.parameter.Tensor method)
squeeze() (unit_scaling.parameter.Tensor method)
squeeze_() (unit_scaling.parameter.Tensor method)
sspaddmm() (unit_scaling.parameter.Tensor method)
state_dict() (unit_scaling.optim.Adam method)
(unit_scaling.optim.AdamW method)
(unit_scaling.optim.SGD method)
std() (unit_scaling.parameter.Tensor method)
step() (unit_scaling.optim.Adam method)
(unit_scaling.optim.AdamW method)
(unit_scaling.optim.SGD method)
stft() (unit_scaling.parameter.Tensor method)
storage() (unit_scaling.parameter.Tensor method)
storage_offset() (unit_scaling.parameter.Tensor method)
storage_type() (unit_scaling.parameter.Tensor method)
stride() (unit_scaling.parameter.Tensor method)
sub() (unit_scaling.parameter.Tensor method)
sub_() (unit_scaling.parameter.Tensor method)
subtract() (unit_scaling.parameter.Tensor method)
subtract_() (unit_scaling.parameter.Tensor method)
sum() (unit_scaling.parameter.Tensor method)
sum_to_size() (unit_scaling.parameter.Tensor method)
svd() (unit_scaling.parameter.Tensor method)
swapaxes() (unit_scaling.parameter.Tensor method)
swapaxes_() (unit_scaling.parameter.Tensor method)
swapdims() (unit_scaling.parameter.Tensor method)
swapdims_() (unit_scaling.parameter.Tensor method)
T
T (unit_scaling.parameter.Tensor attribute)
t() (unit_scaling.parameter.Tensor method)
t_() (unit_scaling.parameter.Tensor method)
take() (unit_scaling.parameter.Tensor method)
take_along_dim() (unit_scaling.parameter.Tensor method)
tan() (unit_scaling.parameter.Tensor method)
tan_() (unit_scaling.parameter.Tensor method)
tanh() (unit_scaling.parameter.Tensor method)
tanh_() (unit_scaling.parameter.Tensor method)
Tensor (class in unit_scaling.parameter)
tensor_split() (unit_scaling.parameter.Tensor method)
tile() (unit_scaling.parameter.Tensor method)
to() (unit_scaling.parameter.Tensor method)
to_dense() (unit_scaling.parameter.Tensor method)
to_grad_input_scale() (in module unit_scaling.constraints)
to_left_grad_scale() (in module unit_scaling.constraints)
to_mkldnn() (unit_scaling.parameter.Tensor method)
to_output_scale() (in module unit_scaling.constraints)
to_padded_tensor() (unit_scaling.parameter.Tensor method)
to_right_grad_scale() (in module unit_scaling.constraints)
to_sparse() (unit_scaling.parameter.Tensor method)
to_sparse_bsc() (unit_scaling.parameter.Tensor method)
to_sparse_bsr() (unit_scaling.parameter.Tensor method)
to_sparse_coo() (unit_scaling.parameter.Tensor method)
to_sparse_csc() (unit_scaling.parameter.Tensor method)
to_sparse_csr() (unit_scaling.parameter.Tensor method)
tolist() (unit_scaling.parameter.Tensor method)
topk() (unit_scaling.parameter.Tensor method)
torch_nn_modules_to_user_modules() (in module unit_scaling.transforms.utils)
trace() (unit_scaling.parameter.Tensor method)
track_scales() (in module unit_scaling.transforms)
transformer_residual_scaling_rule() (in module unit_scaling)
(in module unit_scaling.core.functional)
TransformerDecoder (class in unit_scaling)
TransformerLayer (class in unit_scaling)
transpose() (unit_scaling.parameter.Tensor method)
transpose_() (unit_scaling.parameter.Tensor method)
triangular_solve() (unit_scaling.parameter.Tensor method)
tril() (unit_scaling.parameter.Tensor method)
tril_() (unit_scaling.parameter.Tensor method)
triu() (unit_scaling.parameter.Tensor method)
triu_() (unit_scaling.parameter.Tensor method)
true_divide() (unit_scaling.parameter.Tensor method)
true_divide_() (unit_scaling.parameter.Tensor method)
trunc() (unit_scaling.parameter.Tensor method)
trunc_() (unit_scaling.parameter.Tensor method)
tuple_to_format() (in module unit_scaling.formats)
type() (unit_scaling.parameter.Tensor method)
type_as() (unit_scaling.parameter.Tensor method)
U
unbind() (unit_scaling.parameter.Tensor method)
unflatten() (unit_scaling.parameter.Tensor method)
unfold() (unit_scaling.parameter.Tensor method)
uniform_() (unit_scaling.parameter.Tensor method)
unique() (unit_scaling.parameter.Tensor method)
unique_consecutive() (unit_scaling.parameter.Tensor method)
unit_scale() (in module unit_scaling.transforms)
unit_scaling
module
unit_scaling.analysis
module
unit_scaling.constraints
module
unit_scaling.core
module
unit_scaling.core.functional
module
unit_scaling.formats
module
unit_scaling.functional
module
unit_scaling.optim
module
unit_scaling.parameter
module
unit_scaling.scale
module
unit_scaling.transforms
module
unit_scaling.transforms.utils
module
unit_scaling.utils
module
unsafe_chunk() (unit_scaling.parameter.Tensor method)
unsafe_split() (unit_scaling.parameter.Tensor method)
unsqueeze() (unit_scaling.parameter.Tensor method)
unsqueeze_() (unit_scaling.parameter.Tensor method)
untyped_storage() (unit_scaling.parameter.Tensor method)
update() (unit_scaling.parameter.OrderedDict method)
V
values() (unit_scaling.parameter.OrderedDict method)
(unit_scaling.parameter.Tensor method)
var() (unit_scaling.parameter.Tensor method)
vdot() (unit_scaling.parameter.Tensor method)
view() (unit_scaling.parameter.Tensor method)
view_as() (unit_scaling.parameter.Tensor method)
visualiser() (in module unit_scaling)
(in module unit_scaling.analysis)
vjp() (unit_scaling.utils.ScaleTracker static method)
vmap() (unit_scaling.utils.ScaleTracker static method)
vsplit() (unit_scaling.parameter.Tensor method)
W
weight (unit_scaling.Conv1d attribute)
(unit_scaling.Embedding attribute)
(unit_scaling.LayerNorm attribute)
(unit_scaling.Linear attribute)
(unit_scaling.LinearReadout attribute)
(unit_scaling.RMSNorm attribute)
where() (unit_scaling.parameter.Tensor method)
X
xlogy() (unit_scaling.parameter.Tensor method)
xlogy_() (unit_scaling.parameter.Tensor method)
xpu() (unit_scaling.parameter.Tensor method)
Z
zero_() (unit_scaling.parameter.Tensor method)
zero_grad() (unit_scaling.optim.Adam method)
(unit_scaling.optim.AdamW method)
(unit_scaling.optim.SGD method)