AYYasaswini commited on
Commit
822c10e
1 Parent(s): 13dcc8e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -55
app.py CHANGED
@@ -147,10 +147,10 @@ def latents_to_pil(latents):
147
 
148
  If you uncomment the cell below you'll see that in this case the `scheduler.add_noise` function literally just adds noise scaled by sigma: `noisy_samples = original_samples + noise * sigmas`
149
  """
150
- encoded = pil_to_latent(input_image)
151
- encoded.shape
152
- decoded = latents_to_pil(encoded)[0]
153
- decoded
154
  # ??scheduler.add_noise
155
 
156
  """Other diffusion models may be trained with different noising and scheduling approaches, some of which keep the variance fairly constant across noise levels ('variance preserving') with different scaling and mixing tricks instead of having noisy latents with higher and higher variance as more noise is added ('variance exploding').
@@ -170,57 +170,6 @@ To noise our image we'll use code like that shown above, using the scheduler to
170
  """
171
 
172
  # Settings (same as before except for the new prompt)
173
- prompt = ["A colorful dancer, nat geo photo"]
174
- height = 512 # default height of Stable Diffusion
175
- width = 512 # default width of Stable Diffusion
176
- num_inference_steps = 50 # Number of denoising steps
177
- guidance_scale = 8 # Scale for classifier-free guidance
178
- generator = torch.manual_seed(32) # Seed generator to create the inital latent noise
179
- batch_size = 1
180
-
181
- # Prep text (same as before)
182
- text_input = tokenizer(prompt, padding="max_length", max_length=tokenizer.model_max_length, truncation=True, return_tensors="pt")
183
- with torch.no_grad():
184
- text_embeddings = text_encoder(text_input.input_ids.to(torch_device))[0]
185
- max_length = text_input.input_ids.shape[-1]
186
- uncond_input = tokenizer(
187
- [""] * batch_size, padding="max_length", max_length=max_length, return_tensors="pt"
188
- )
189
- with torch.no_grad():
190
- uncond_embeddings = text_encoder(uncond_input.input_ids.to(torch_device))[0]
191
- text_embeddings = torch.cat([uncond_embeddings, text_embeddings])
192
-
193
- # Prep Scheduler (setting the number of inference steps)
194
- set_timesteps(scheduler, num_inference_steps)
195
-
196
- # Prep latents (noising appropriately for start_step)
197
- start_step = 10
198
- start_sigma = scheduler.sigmas[start_step]
199
- noise = torch.randn_like(encoded)
200
- latents = scheduler.add_noise(encoded, noise, timesteps=torch.tensor([scheduler.timesteps[start_step]]))
201
- latents = latents.to(torch_device).float()
202
-
203
- # Loop
204
- for i, t in tqdm(enumerate(scheduler.timesteps), total=len(scheduler.timesteps)):
205
- if i >= start_step: # << This is the only modification to the loop we do
206
-
207
- # expand the latents if we are doing classifier-free guidance to avoid doing two forward passes.
208
- latent_model_input = torch.cat([latents] * 2)
209
- sigma = scheduler.sigmas[i]
210
- latent_model_input = scheduler.scale_model_input(latent_model_input, t)
211
-
212
- # predict the noise residual
213
- with torch.no_grad():
214
- noise_pred = unet(latent_model_input, t, encoder_hidden_states=text_embeddings)["sample"]
215
-
216
- # perform guidance
217
- noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
218
- noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond)
219
-
220
- # compute the previous noisy sample x_t -> x_t-1
221
- latents = scheduler.step(noise_pred, t, latents).prev_sample
222
-
223
- latents_to_pil(latents)[0]
224
 
225
  """You can see that some colours and structure from the image are kept, but we now have a new picture! The more noise you add and the more steps you do, the further away it gets from the input image.
226
 
 
147
 
148
  If you uncomment the cell below you'll see that in this case the `scheduler.add_noise` function literally just adds noise scaled by sigma: `noisy_samples = original_samples + noise * sigmas`
149
  """
150
+ #encoded = pil_to_latent(input_image)
151
+ #encoded.shape
152
+ #decoded = latents_to_pil(encoded)[0]
153
+ #decoded
154
  # ??scheduler.add_noise
155
 
156
  """Other diffusion models may be trained with different noising and scheduling approaches, some of which keep the variance fairly constant across noise levels ('variance preserving') with different scaling and mixing tricks instead of having noisy latents with higher and higher variance as more noise is added ('variance exploding').
 
170
  """
171
 
172
  # Settings (same as before except for the new prompt)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
173
 
174
  """You can see that some colours and structure from the image are kept, but we now have a new picture! The more noise you add and the more steps you do, the further away it gets from the input image.
175