sdfacid

Running on Zero

App Files Files Community

sdfacid / app.py

aiqtech

Update app.py

a320607 verified 17 days ago

raw

history blame contribute delete

No virus

7.93 kB

	import spaces
	import random
	import torch
	import cv2
	import insightface
	import gradio as gr
	import numpy as np
	import os
	from huggingface_hub import snapshot_download
	from transformers import CLIPVisionModelWithProjection, CLIPImageProcessor, pipeline
	from SAK.pipelines.pipeline_stable_diffusion_xl_chatglm_256_ipadapter_FaceID import StableDiffusionXLPipeline
	from SAK.models.modeling_chatglm import ChatGLMModel
	from SAK.models.tokenization_chatglm import ChatGLMTokenizer
	from diffusers import AutoencoderKL
	from SAK.models.unet_2d_condition import UNet2DConditionModel
	from diffusers import EulerDiscreteScheduler
	from PIL import Image
	from insightface.app import FaceAnalysis
	from insightface.data import get_image as ins_get_image

	device = "cuda"
	ckpt_dir = snapshot_download(repo_id="SunderAli17/SAK")
	ckpt_dir_faceid = snapshot_download(repo_id="SunderAli17/SAK-IP-Adapter-FaceTransform-Plus")

	text_encoder = ChatGLMModel.from_pretrained(f'{ckpt_dir}/text_encoder', torch_dtype=torch.float16).half().to(device)
	tokenizer = ChatGLMTokenizer.from_pretrained(f'{ckpt_dir}/text_encoder')
	vae = AutoencoderKL.from_pretrained(f"{ckpt_dir}/vae", revision=None).half().to(device)
	scheduler = EulerDiscreteScheduler.from_pretrained(f"{ckpt_dir}/scheduler")
	unet = UNet2DConditionModel.from_pretrained(f"{ckpt_dir}/unet", revision=None).half().to(device)
	clip_image_encoder = CLIPVisionModelWithProjection.from_pretrained(f'{ckpt_dir_faceid}/clip-vit-large-patch14-336', ignore_mismatched_sizes=True)
	clip_image_encoder.to(device)
	clip_image_processor = CLIPImageProcessor(size = 336, crop_size = 336)

	pipe = StableDiffusionXLPipeline(
	vae = vae,
	text_encoder = text_encoder,
	tokenizer = tokenizer,
	unet = unet,
	scheduler = scheduler,
	face_clip_encoder = clip_image_encoder,
	face_clip_processor = clip_image_processor,
	force_zeros_for_empty_prompt = False,
	)

	# 번역 모델 초기화
	translator = pipeline("translation", model="Helsinki-NLP/opus-mt-ko-en")

	# 프롬프트 처리 함수 추가
	def process_prompt(prompt):
	if any('\u3131' <= char <= '\u3163' or '\uac00' <= char <= '\ud7a3' for char in prompt):
	translated = translator(prompt)[0]['translation_text']
	return prompt, translated
	return prompt, prompt

	class FaceInfoGenerator():
	def __init__(self, root_dir = "./.insightface/"):
	self.app = FaceAnalysis(name = 'antelopev2', root = root_dir, providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
	self.app.prepare(ctx_id = 0, det_size = (640, 640))

	def get_faceinfo_one_img(self, face_image):
	face_info = self.app.get(cv2.cvtColor(np.array(face_image), cv2.COLOR_RGB2BGR))

	if len(face_info) == 0:
	face_info = None
	else:
	face_info = sorted(face_info, key=lambda x:(x['bbox'][2]-x['bbox'][0])*(x['bbox'][3]-x['bbox'][1]))[-1] # only use the maximum face
	return face_info

	def face_bbox_to_square(bbox):
	## l, t, r, b to square l, t, r, b
	l,t,r,b = bbox
	cent_x = (l + r) / 2
	cent_y = (t + b) / 2
	w, h = r - l, b - t
	r = max(w, h) / 2

	l0 = cent_x - r
	r0 = cent_x + r
	t0 = cent_y - r
	b0 = cent_y + r

	return [l0, t0, r0, b0]

	MAX_SEED = np.iinfo(np.int32).max
	MAX_IMAGE_SIZE = 1024
	face_info_generator = FaceInfoGenerator()

	@spaces.GPU
	def infer(prompt,
	image = None,
	negative_prompt = "nsfw，Face shadows，Low resolution，JPEG artifacts、Vague、bad，Neon lights",
	seed = 66,
	randomize_seed = False,
	guidance_scale = 5.0,
	num_inference_steps = 50
	):
	original_prompt, english_prompt = process_prompt(prompt)
	if randomize_seed:
	seed = random.randint(0, MAX_SEED)
	generator = torch.Generator().manual_seed(seed)
	global pipe
	pipe = pipe.to(device)
	pipe.load_ip_adapter_faceid_plus(f'{ckpt_dir_faceid}/ipa-faceid-plus.bin', device = device)
	scale = 0.8
	pipe.set_face_fidelity_scale(scale)

	face_info = face_info_generator.get_faceinfo_one_img(image)
	face_bbox_square = face_bbox_to_square(face_info["bbox"])
	crop_image = image.crop(face_bbox_square)
	crop_image = crop_image.resize((336, 336))
	crop_image = [crop_image]
	face_embeds = torch.from_numpy(np.array([face_info["embedding"]]))
	face_embeds = face_embeds.to(device, dtype = torch.float16)

	image = pipe(
	prompt = english_prompt,
	negative_prompt = negative_prompt,
	height = 1024,
	width = 1024,
	num_inference_steps = num_inference_steps,
	guidance_scale = guidance_scale,
	num_images_per_prompt = 1,
	generator = generator,
	face_crop_image = crop_image,
	face_insightface_embeds = face_embeds
	).images[0]

	return image, seed, original_prompt, english_prompt

	examples = [
	["wearing a full suit sitting in a restaurant with candle lights", "image/test0.png"],
	["Wild cowboy hat with western town and horses in the background", "image/test1.png"]
	]

	def load_description(fp):
	with open(fp, 'r', encoding='utf-8') as f:
	content = f.read()
	return content

	css = """
	footer {
	visibility: hidden;
	}
	"""

	with gr.Blocks(theme="Nymbo/Nymbo_Theme", css=css) as SAK:
	with gr.Row():
	with gr.Column(elem_id="col-left"):
	with gr.Row():
	prompt = gr.Textbox(
	label="프롬프트",
	placeholder="프롬프트를 입력하세요 (한글 또는 영어)",
	lines=2
	)
	with gr.Row():
	image = gr.Image(label="이미지", type="pil")
	with gr.Accordion("고급 설정", open=False):
	negative_prompt = gr.Textbox(
	label="네거티브 프롬프트",
	placeholder="네거티브 프롬프트를 입력하세요",
	visible=True,
	value="nsfw, 얼굴 그림자, 저해상도, jpeg 아티팩트, 흐릿함, 열악함, 네온 조명"
	)
	seed = gr.Slider(
	label="시드",
	minimum=0,
	maximum=MAX_SEED,
	step=1,
	value=0,
	)
	randomize_seed = gr.Checkbox(label="시드 무작위화", value=True)
	with gr.Row():
	guidance_scale = gr.Slider(
	label="가이던스 스케일",
	minimum=0.0,
	maximum=10.0,
	step=0.1,
	value=5.0,
	)
	num_inference_steps = gr.Slider(
	label="추론 단계 수",
	minimum=10,
	maximum=50,
	step=1,
	value=25,
	)
	with gr.Row():
	button = gr.Button("실행", elem_id="button")

	with gr.Column(elem_id="col-right"):
	result = gr.Image(label="결과", show_label=False)
	seed_used = gr.Number(label="사용된 시드")
	original_prompt_display = gr.Textbox(label="원본 프롬프트")
	english_prompt_display = gr.Textbox(label="영어 프롬프트")

	with gr.Row():
	gr.Examples(
	fn = infer,
	examples = examples,
	inputs = [prompt, image],
	outputs = [result, seed_used, original_prompt_display, english_prompt_display],
	)

	button.click(
	fn = infer,
	inputs = [prompt, image, negative_prompt, seed, randomize_seed, guidance_scale, num_inference_steps],
	outputs = [result, seed_used, original_prompt_display, english_prompt_display]
	)

	SAK.queue().launch(debug=True, share=True)