Spaces:

yashvii
/

IDfy-Avatarify

Paused

App Files Files Community

IDfy-Avatarify / gradio_demo /test.py

yashvii

Upload folder using huggingface_hub

b2cbfed verified about 1 month ago

raw

history blame

No virus

15.7 kB

	import sys
	sys.path.append('./')

	from typing import Tuple

	import os
	import cv2
	import math
	import torch
	import random
	import numpy as np
	import argparse
	import pandas as pd

	import PIL
	from PIL import Image

	import diffusers
	from diffusers.utils import load_image
	from diffusers.models import ControlNetModel
	from diffusers import LCMScheduler

	from huggingface_hub import hf_hub_download

	import insightface
	from insightface.app import FaceAnalysis

	from style_template import styles
	from pipeline_stable_diffusion_xl_instantid_full import StableDiffusionXLInstantIDPipeline
	from model_util import load_models_xl, get_torch_device, torch_gc


	# global variable
	MAX_SEED = np.iinfo(np.int32).max
	device = get_torch_device()
	dtype = torch.float16 if str(device).__contains__("cuda") else torch.float32
	STYLE_NAMES = list(styles.keys())
	DEFAULT_STYLE_NAME = "Watercolor"

	# Load face encoder
	app = FaceAnalysis(name='antelopev2', root='./', providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
	app.prepare(ctx_id=0, det_size=(320, 320))

	# Path to InstantID models
	face_adapter = f'./checkpoints/ip-adapter.bin'
	controlnet_path = f'./checkpoints/ControlNetModel'

	# Load pipeline
	controlnet = ControlNetModel.from_pretrained(controlnet_path, torch_dtype=dtype)

	logo = Image.open("./gradio_demo/logo.png")

	from cv2 import imencode
	import base64

	# def encode_pil_to_base64_new(pil_image):
	# print("AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA")
	# image_arr = np.asarray(pil_image)[:,:,::-1]
	# _, byte_data = imencode('.png', image_arr)
	# base64_data = base64.b64encode(byte_data)
	# base64_string_opencv = base64_data.decode("utf-8")
	# return "data:image/png;base64," + base64_string_opencv

	import gradio as gr

	# gr.processing_utils.encode_pil_to_base64 = encode_pil_to_base64_new

	def main(pretrained_model_name_or_path="wangqixun/YamerMIX_v8", enable_lcm_arg=False):

	if pretrained_model_name_or_path.endswith(
	".ckpt"
	) or pretrained_model_name_or_path.endswith(".safetensors"):
	scheduler_kwargs = hf_hub_download(
	repo_id="wangqixun/YamerMIX_v8",
	subfolder="scheduler",
	filename="scheduler_config.json",
	)

	(tokenizers, text_encoders, unet, _, vae) = load_models_xl(
	pretrained_model_name_or_path=pretrained_model_name_or_path,
	scheduler_name=None,
	weight_dtype=dtype,
	)

	scheduler = diffusers.EulerDiscreteScheduler.from_config(scheduler_kwargs)
	pipe = StableDiffusionXLInstantIDPipeline(
	vae=vae,
	text_encoder=text_encoders[0],
	text_encoder_2=text_encoders[1],
	tokenizer=tokenizers[0],
	tokenizer_2=tokenizers[1],
	unet=unet,
	scheduler=scheduler,
	controlnet=controlnet,
	).to(device)

	else:
	pipe = StableDiffusionXLInstantIDPipeline.from_pretrained(
	pretrained_model_name_or_path,
	controlnet=controlnet,
	torch_dtype=dtype,
	safety_checker=None,
	feature_extractor=None,
	).to(device)

	pipe.scheduler = diffusers.EulerDiscreteScheduler.from_config(pipe.scheduler.config)

	pipe.load_ip_adapter_instantid(face_adapter)
	# load and disable LCM
	pipe.load_lora_weights("latent-consistency/lcm-lora-sdxl")
	pipe.disable_lora()

	def remove_tips():
	return gr.update(visible=False)


	# prompts = [
	# ["superman","Vibrant Color"], ["japanese anime character with white/neon hair","Watercolor"],
	# # ["Suited professional","(No style)"],
	# ["Scooba diver","Line art"], ["eskimo","Snow"]
	# ]

	def convert_from_cv2_to_image(img: np.ndarray) -> Image:
	return Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))

	def convert_from_image_to_cv2(img: Image) -> np.ndarray:
	return cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)

	def run_for_prompts1(face_file,style,progress=gr.Progress(track_tqdm=True)):
	# if email != "":
	p,n = styles.get(style, styles.get(STYLE_NAMES[1]))
	return generate_image(face_file, p[0], n)
	# else:
	# raise gr.Error("Email ID is compulsory")
	def run_for_prompts2(face_file,style,progress=gr.Progress(track_tqdm=True)):
	# if email != "":
	p,n = styles.get(style, styles.get(STYLE_NAMES[1]))
	return generate_image(face_file, p[1], n)
	def run_for_prompts3(face_file,style,progress=gr.Progress(track_tqdm=True)):
	# if email != "":
	p,n = styles.get(style, styles.get(STYLE_NAMES[1]))
	return generate_image(face_file, p[2], n)
	def run_for_prompts4(face_file,style,progress=gr.Progress(track_tqdm=True)):
	# if email != "":
	p,n = styles.get(style, styles.get(STYLE_NAMES[1]))
	return generate_image(face_file, p[3], n)

	# def validate_and_process(face_file, style, email):

	# # Your processing logic here
	# gallery1, gallery2, gallery3, gallery4 = run_for_prompts1(face_file, style), run_for_prompts2(face_file, style), run_for_prompts3(face_file, style), run_for_prompts4(face_file, style)
	# return gallery1, gallery2, gallery3, gallery4

	def draw_kps(image_pil, kps, color_list=[(255,0,0), (0,255,0), (0,0,255), (255,255,0), (255,0,255)]):
	stickwidth = 4
	limbSeq = np.array([[0, 2], [1, 2], [3, 2], [4, 2]])
	kps = np.array(kps)

	w, h = image_pil.size
	out_img = np.zeros([h, w, 3])

	for i in range(len(limbSeq)):
	index = limbSeq[i]
	color = color_list[index[0]]

	x = kps[index][:, 0]
	y = kps[index][:, 1]
	length = ((x[0] - x[1]) 2 + (y[0] - y[1]) 2) ** 0.5
	angle = math.degrees(math.atan2(y[0] - y[1], x[0] - x[1]))
	polygon = cv2.ellipse2Poly((int(np.mean(x)), int(np.mean(y))), (int(length / 2), stickwidth), int(angle), 0, 360, 1)
	out_img = cv2.fillConvexPoly(out_img.copy(), polygon, color)
	out_img = (out_img * 0.6).astype(np.uint8)

	for idx_kp, kp in enumerate(kps):
	color = color_list[idx_kp]
	x, y = kp
	out_img = cv2.circle(out_img.copy(), (int(x), int(y)), 10, color, -1)

	out_img_pil = Image.fromarray(out_img.astype(np.uint8))
	return out_img_pil

	def resize_img(input_image, max_side=640, min_side=640, size=None,
	pad_to_max_side=True, mode=PIL.Image.BILINEAR, base_pixel_number=64):

	w, h = input_image.size
	print(w)
	print(h)
	if size is not None:
	w_resize_new, h_resize_new = size
	else:
	ratio = min_side / min(h, w)
	w, h = round(ratiow), round(ratioh)
	ratio = max_side / max(h, w)
	input_image = input_image.resize([round(ratiow), round(ratioh)], mode)
	w_resize_new = (round(ratio * w) // base_pixel_number) * base_pixel_number
	h_resize_new = (round(ratio * h) // base_pixel_number) * base_pixel_number
	input_image = input_image.resize([w_resize_new, h_resize_new], mode)

	if pad_to_max_side:
	res = np.ones([max_side, max_side, 3], dtype=np.uint8) * 255
	offset_x = (max_side - w_resize_new) // 2
	offset_y = (max_side - h_resize_new) // 2
	res[offset_y:offset_y+h_resize_new, offset_x:offset_x+w_resize_new] = np.array(input_image)
	input_image = Image.fromarray(res)
	return input_image

	def store_images(email, gallery1, gallery2, gallery3, gallery4):
	galleries = []
	for i, img in enumerate([gallery1, gallery2, gallery3, gallery4], start=1):
	if isinstance(img, np.ndarray):
	img = Image.fromarray(img)
	print(f"Gallery {i} type after conversion: {type(img)}")
	galleries.append(img)
	# Create the images directory if it doesn't exist
	if not os.path.exists('images'):
	os.makedirs('images')

	# Define image file paths
	image_paths = []
	for i, img in enumerate(galleries, start=1):
	img_path = f'images/{email}_gallery{i}.png'
	img.save(img_path)
	image_paths.append(img_path)

	# Define the CSV file path
	csv_file_path = 'image_data.csv'

	# Create a DataFrame for the email and image paths
	df = pd.DataFrame({
	'email': [email],
	'img1_path': [image_paths[0]],
	'img2_path': [image_paths[1]],
	'img3_path': [image_paths[2]],
	'img4_path': [image_paths[3]],
	})

	# Write to CSV (append if the file exists, create a new one if it doesn't)
	if not os.path.isfile(csv_file_path):
	df.to_csv(csv_file_path, index=False)
	else:
	df.to_csv(csv_file_path, mode='a', header=False, index=False)


	def generate_image(face_image,prompt,negative_prompt):
	pose_image_path = None
	# prompt = "superman"
	enable_LCM = False
	identitynet_strength_ratio = 0.95
	adapter_strength_ratio = 0.60
	num_steps = 15
	guidance_scale = 8.5
	seed = random.randint(0, MAX_SEED)
	# negative_prompt = ""
	# negative_prompt += neg
	enhance_face_region = True
	if enable_LCM:
	pipe.enable_lora()
	pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
	else:
	pipe.disable_lora()
	pipe.scheduler = diffusers.EulerDiscreteScheduler.from_config(pipe.scheduler.config)

	if face_image is None:
	raise gr.Error(f"Cannot find any input face image! Please upload the face image")

	# if prompt is None:
	# prompt = "a person"

	# apply the style template
	# prompt, negative_prompt = apply_style(style_name, prompt, negative_prompt)

	# face_image = load_image(face_image_path)
	face_image = resize_img(face_image)
	face_image_cv2 = convert_from_image_to_cv2(face_image)
	height, width, _ = face_image_cv2.shape

	# Extract face features
	face_info = app.get(face_image_cv2)

	if len(face_info) == 0:
	raise gr.Error(f"Cannot find any face in the image! Please upload another person image")

	face_info = sorted(face_info, key=lambda x:(x['bbox'][2]-x['bbox'][0])*(x['bbox'][3]-x['bbox'][1]))[-1] # only use the maximum face
	face_emb = face_info['embedding']
	face_kps = draw_kps(convert_from_cv2_to_image(face_image_cv2), face_info['kps'])

	if pose_image_path is not None:
	pose_image = load_image(pose_image_path)
	pose_image = resize_img(pose_image)
	pose_image_cv2 = convert_from_image_to_cv2(pose_image)

	face_info = app.get(pose_image_cv2)

	if len(face_info) == 0:
	raise gr.Error(f"Cannot find any face in the reference image! Please upload another person image")

	face_info = face_info[-1]
	face_kps = draw_kps(pose_image, face_info['kps'])

	width, height = face_kps.size

	if enhance_face_region:
	control_mask = np.zeros([height, width, 3])
	x1, y1, x2, y2 = face_info["bbox"]
	x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
	control_mask[y1:y2, x1:x2] = 255
	control_mask = Image.fromarray(control_mask.astype(np.uint8))
	else:
	control_mask = None

	generator = torch.Generator(device=device).manual_seed(seed)

	print("Start inference...")
	print(f"[Debug] Prompt: {prompt}, \n[Debug] Neg Prompt: {negative_prompt}")

	pipe.set_ip_adapter_scale(adapter_strength_ratio)
	images = pipe(
	prompt=prompt,
	negative_prompt=negative_prompt,
	image_embeds=face_emb,
	image=face_kps,
	control_mask=control_mask,
	controlnet_conditioning_scale=float(identitynet_strength_ratio),
	num_inference_steps=num_steps,
	guidance_scale=guidance_scale,
	height=height,
	width=width,
	generator=generator,
	# num_images_per_prompt = 4
	).images

	print(images[0])

	return images[0]


	### Description
	title = r"""
	<h1 align="center">Choose your AVATAR</h1>
	"""

	description = r"""
	<h2> Powered by IDfy </h2>"""

	article = r""""""

	tips = r""""""

	css = '''
	.gradio-container {width: 95% !important; background-color: #E6F3FF;}
	.image-gallery {height: 100vh !important; overflow: auto;}
	.gradio-row .gradio-element { margin: 0 !important; }
	'''
	with gr.Blocks(css=css) as demo:
	title = "<h1 align='center'>Choose your AVATAR</h1>"
	description = "<h2> Powered by IDfy </h2>"

	# Description
	gr.Markdown(title)
	with gr.Row():
	gr.Image("./gradio_demo/logo.png",scale=0,min_width=50,show_label=False,show_download_button=False)
	gr.Markdown(description)
	with gr.Row():
	with gr.Column():
	style = gr.Dropdown(label="Choose your STYLE", choices=STYLE_NAMES)
	face_file = gr.Image(label="Upload a photo of your face", type="pil")
	submit = gr.Button("Submit", variant="primary")
	with gr.Column():
	with gr.Row():
	gallery1 = gr.Image(label="Generated Images")
	gallery2 = gr.Image(label="Generated Images")
	with gr.Row():
	gallery3 = gr.Image(label="Generated Images")
	gallery4 = gr.Image(label="Generated Images")
	email = gr.Textbox(label="Email",
	info="Enter your email address",
	value="")
	submit1 = gr.Button("STORE", variant="primary")
	usage_tips = gr.Markdown(label="Usage tips of InstantID", value="", visible=False)

	# Image upload and processing chain
	face_file.upload(remove_tips, outputs=usage_tips).then(run_for_prompts1, inputs=[face_file, style], outputs=[gallery1]).then(run_for_prompts2, inputs=[face_file, style], outputs=[gallery2]).then(run_for_prompts3, inputs=[face_file, style], outputs=[gallery3]).then(run_for_prompts4, inputs=[face_file, style], outputs=[gallery4])
	submit.click(remove_tips, outputs=usage_tips).then(run_for_prompts1, inputs=[face_file, style], outputs=[gallery1]).then(run_for_prompts2, inputs=[face_file, style], outputs=[gallery2]).then(run_for_prompts3, inputs=[face_file, style], outputs=[gallery3]).then(run_for_prompts4, inputs=[face_file, style], outputs=[gallery4])

	# Store data on button click
	submit1.click(
	fn=store_images,
	inputs=[email,gallery1,gallery2,gallery3,gallery4],
	outputs=None)

	gr.Markdown("")

	demo.launch(share=True)

	if __name__ == "__main__":
	parser = argparse.ArgumentParser()
	parser.add_argument("--pretrained_model_name_or_path", type=str, default="wangqixun/YamerMIX_v8")
	args = parser.parse_args()

	main(args.pretrained_model_name_or_path, False)