From dd1880ca7469f1da68222303231e68edfa9101b8 Mon Sep 17 00:00:00 2001 From: muyou <952349605@qq.com> Date: Sat, 20 Apr 2024 19:52:50 +0800 Subject: [PATCH] add comments --- improved_diffusion/gaussian_diffusion.py | 8 ++++---- improved_diffusion/resample.py | 6 +++--- improved_diffusion/script_util.py | 2 +- improved_diffusion/train_util.py | 4 ++-- improved_diffusion/unet.py | 4 ++-- scripts/image_sample.py | 2 +- 6 files changed, 13 insertions(+), 13 deletions(-) diff --git a/improved_diffusion/gaussian_diffusion.py b/improved_diffusion/gaussian_diffusion.py index 403d474f3b..7837a37be7 100644 --- a/improved_diffusion/gaussian_diffusion.py +++ b/improved_diffusion/gaussian_diffusion.py @@ -138,9 +138,9 @@ def __init__( self.num_timesteps = int(betas.shape[0]) alphas = 1.0 - betas - self.alphas_cumprod = np.cumprod(alphas, axis=0) - self.alphas_cumprod_prev = np.append(1.0, self.alphas_cumprod[:-1]) - self.alphas_cumprod_next = np.append(self.alphas_cumprod[1:], 0.0) + self.alphas_cumprod = np.cumprod(alphas, axis=0) # alpha_bar_t + self.alphas_cumprod_prev = np.append(1.0, self.alphas_cumprod[:-1]) # alpha_bar_t-1 + self.alphas_cumprod_next = np.append(self.alphas_cumprod[1:], 0.0) # alpha_bar_t+1 assert self.alphas_cumprod_prev.shape == (self.num_timesteps,) # calculations for diffusion q(x_t | x_{t-1}) and others @@ -661,7 +661,7 @@ def _vb_terms_bpd( kl = normal_kl( true_mean, true_log_variance_clipped, out["mean"], out["log_variance"] ) - kl = mean_flat(kl) / np.log(2.0) + kl = mean_flat(kl) / np.log(2.0) # np.log和torch.log都是已自然对数为底, 除以np.log(2.0)后, 转为已2为底 decoder_nll = -discretized_gaussian_log_likelihood( x_start, means=out["mean"], log_scales=0.5 * out["log_variance"] diff --git a/improved_diffusion/resample.py b/improved_diffusion/resample.py index c82eccdcd4..7a1768bd72 100644 --- a/improved_diffusion/resample.py +++ b/improved_diffusion/resample.py @@ -134,15 +134,15 @@ def __init__(self, diffusion, history_per_term=10, uniform_prob=0.001): def weights(self): if not self._warmed_up(): return np.ones([self.diffusion.num_timesteps], dtype=np.float64) - weights = np.sqrt(np.mean(self._loss_history ** 2, axis=-1)) + weights = np.sqrt(np.mean(self._loss_history ** 2, axis=-1)) # 损失越大 权重越大 weights /= np.sum(weights) - weights *= 1 - self.uniform_prob + weights *= 1 - self.uniform_prob # loss权重+均匀权重相结合 weights += self.uniform_prob / len(weights) return weights def update_with_all_losses(self, ts, losses): for t, loss in zip(ts, losses): - if self._loss_counts[t] == self.history_per_term: + if self._loss_counts[t] == self.history_per_term: # 每个时刻保留10个历史loss值 # Shift out the oldest loss term. self._loss_history[t, :-1] = self._loss_history[t, 1:] self._loss_history[t, -1] = loss diff --git a/improved_diffusion/script_util.py b/improved_diffusion/script_util.py index f14e474ff2..a6ba40ed65 100644 --- a/improved_diffusion/script_util.py +++ b/improved_diffusion/script_util.py @@ -112,7 +112,7 @@ def create_model( return UNetModel( in_channels=3, model_channels=num_channels, - out_channels=(3 if not learn_sigma else 6), + out_channels=(3 if not learn_sigma else 6), # 需要预测方差的话, 方差的维度和x相同, 故此时输出通道设置为6 num_res_blocks=num_res_blocks, attention_resolutions=tuple(attention_ds), dropout=dropout, diff --git a/improved_diffusion/train_util.py b/improved_diffusion/train_util.py index 1867604145..6bd38f3942 100644 --- a/improved_diffusion/train_util.py +++ b/improved_diffusion/train_util.py @@ -194,7 +194,7 @@ def forward_backward(self, batch, cond): for k, v in cond.items() } last_batch = (i + self.microbatch) >= batch.shape[0] - t, weights = self.schedule_sampler.sample(micro.shape[0], dist_util.dev()) + t, weights = self.schedule_sampler.sample(micro.shape[0], dist_util.dev()) # 按权重采样t compute_losses = functools.partial( self.diffusion.training_losses, @@ -215,7 +215,7 @@ def forward_backward(self, batch, cond): t, losses["loss"].detach() ) - loss = (losses["loss"] * weights).mean() + loss = (losses["loss"] * weights).mean() # 按权重刷新loss值 log_loss_dict( self.diffusion, t, {k: v * weights for k, v in losses.items()} ) diff --git a/improved_diffusion/unet.py b/improved_diffusion/unet.py index a8087a02f0..83e8e17028 100644 --- a/improved_diffusion/unet.py +++ b/improved_diffusion/unet.py @@ -311,7 +311,7 @@ def __init__( dims=2, num_classes=None, use_checkpoint=False, - num_heads=1, + num_heads=1, # 使用了多头注意力 num_heads_upsample=-1, use_scale_shift_norm=False, ): @@ -341,7 +341,7 @@ def __init__( ) if self.num_classes is not None: - self.label_emb = nn.Embedding(num_classes, time_embed_dim) + self.label_emb = nn.Embedding(num_classes, time_embed_dim) # 除了输入噪声、时间步骤, 还可以添加类别标签, 那么就可以生成该类别的图像, 类似于条件Gan self.input_blocks = nn.ModuleList( [ diff --git a/scripts/image_sample.py b/scripts/image_sample.py index 66326f6908..7e10fc1511 100644 --- a/scripts/image_sample.py +++ b/scripts/image_sample.py @@ -45,7 +45,7 @@ def main(): classes = th.randint( low=0, high=NUM_CLASSES, size=(args.batch_size,), device=dist_util.dev() ) - model_kwargs["y"] = classes + model_kwargs["y"] = classes # 采样的时候可以给类别标签, 则会生成该类别的图像 sample_fn = ( diffusion.p_sample_loop if not args.use_ddim else diffusion.ddim_sample_loop )