yashvii commited on
Commit
b2cbfed
1 Parent(s): 59f9c29

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .dockerignore +40 -0
  2. .gitattributes +13 -0
  3. .gitignore +186 -0
  4. LICENSE +201 -0
  5. README.md +3 -9
  6. Untitled.ipynb +432 -0
  7. checkpoints/ControlNetModel/config.json +57 -0
  8. checkpoints/ControlNetModel/diffusion_pytorch_model.safetensors +3 -0
  9. checkpoints/ip-adapter.bin +3 -0
  10. checkpoints/pytorch_lora_weights.safetensors +3 -0
  11. cog.yaml +40 -0
  12. cog/README.md +60 -0
  13. cog/predict.py +756 -0
  14. feature-extractor/preprocessor_config.json +27 -0
  15. generated_images/20240723_053704_668578_0.png +3 -0
  16. generated_images/20240723_053801_148984_0.png +3 -0
  17. generated_images/20240723_053853_022841_0.png +3 -0
  18. generated_images/20240723_053948_468290_0.png +3 -0
  19. generated_images/20240723_054025_692605_0.png +3 -0
  20. generated_images/20240723_054124_697176_0.png +3 -0
  21. generation_log.csv +7 -0
  22. gradio_demo/aaa.py +957 -0
  23. gradio_demo/app-multicontrolnet.py +670 -0
  24. gradio_demo/app.py +656 -0
  25. gradio_demo/app1.py +434 -0
  26. gradio_demo/background.jpg +0 -0
  27. gradio_demo/controlnet_util.py +39 -0
  28. gradio_demo/demo.py +369 -0
  29. gradio_demo/download_models.py +27 -0
  30. gradio_demo/logo.png +0 -0
  31. gradio_demo/logo1.png +0 -0
  32. gradio_demo/model_util.py +472 -0
  33. gradio_demo/preprocess.py +232 -0
  34. gradio_demo/requirements.txt +19 -0
  35. gradio_demo/style_template.py +136 -0
  36. gradio_demo/test.py +400 -0
  37. gradio_demo/watermark.png +0 -0
  38. image_data.csv +15 -0
  39. images/aa.ll_gallery1.png +3 -0
  40. images/aa.ll_gallery2.png +0 -0
  41. images/aa.ll_gallery3.png +0 -0
  42. images/aa.ll_gallery4.png +0 -0
  43. images/heeral@img_gallery1.png +0 -0
  44. images/heeral@img_gallery2.png +0 -0
  45. images/heeral@img_gallery3.png +0 -0
  46. images/heeral@img_gallery4.png +0 -0
  47. images/kajal@img_gallery1.png +0 -0
  48. images/kajal@img_gallery2.png +0 -0
  49. images/kajal@img_gallery3.png +0 -0
  50. images/kajal@img_gallery4.png +0 -0
.dockerignore ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # The .dockerignore file excludes files from the container build process.
2
+ #
3
+ # https://docs.docker.com/engine/reference/builder/#dockerignore-file
4
+
5
+ # Replicate
6
+ /safety-cache/
7
+ /gradio_cached_examples/
8
+ *.mp4
9
+ *.pth
10
+ *.pt
11
+ *.bin
12
+ *.ckpt
13
+ *.onnx
14
+ *.tar
15
+ *.tar.gz
16
+ *.h5
17
+ *.pb
18
+ *.caffemodel
19
+ *.weights
20
+ *.tar
21
+ *.jpg
22
+ *.jpeg
23
+ *.png
24
+ *.webp
25
+ .vscode
26
+
27
+ # Exclude Git files
28
+ .git
29
+ .github
30
+ .gitignore
31
+
32
+ # Exclude Python cache files
33
+ __pycache__
34
+ .pytest_cache/
35
+ .mypy_cache
36
+ .pytest_cache
37
+ .ruff_cache
38
+
39
+ # Exclude Python virtual environment
40
+ /venv
.gitattributes CHANGED
@@ -33,3 +33,16 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ generated_images/20240723_053704_668578_0.png filter=lfs diff=lfs merge=lfs -text
37
+ generated_images/20240723_053801_148984_0.png filter=lfs diff=lfs merge=lfs -text
38
+ generated_images/20240723_053853_022841_0.png filter=lfs diff=lfs merge=lfs -text
39
+ generated_images/20240723_053948_468290_0.png filter=lfs diff=lfs merge=lfs -text
40
+ generated_images/20240723_054025_692605_0.png filter=lfs diff=lfs merge=lfs -text
41
+ generated_images/20240723_054124_697176_0.png filter=lfs diff=lfs merge=lfs -text
42
+ images/aa.ll_gallery1.png filter=lfs diff=lfs merge=lfs -text
43
+ images/yashvi_gallery1.png filter=lfs diff=lfs merge=lfs -text
44
+ images/yashvi_gallery4.png filter=lfs diff=lfs merge=lfs -text
45
+ images/yashviwhy@instantid.com_gallery1.png filter=lfs diff=lfs merge=lfs -text
46
+ images/yashviwhy@instantid.com_gallery2.png filter=lfs diff=lfs merge=lfs -text
47
+ images/yashviwhy@instantid.com_gallery3.png filter=lfs diff=lfs merge=lfs -text
48
+ images/yashviwhy@instantid.com_gallery4.png filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,186 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py,cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # poetry
98
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102
+ #poetry.lock
103
+
104
+ # pdm
105
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106
+ #pdm.lock
107
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108
+ # in version control.
109
+ # https://pdm.fming.dev/#use-with-ide
110
+ .pdm.toml
111
+
112
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113
+ __pypackages__/
114
+
115
+ # Celery stuff
116
+ celerybeat-schedule
117
+ celerybeat.pid
118
+
119
+ # SageMath parsed files
120
+ *.sage.py
121
+
122
+ # Environments
123
+ .env
124
+ .venv
125
+ env/
126
+ venv/
127
+ ENV/
128
+ env.bak/
129
+ venv.bak/
130
+
131
+ # Spyder project settings
132
+ .spyderproject
133
+ .spyproject
134
+
135
+ # Rope project settings
136
+ .ropeproject
137
+
138
+ # mkdocs documentation
139
+ /site
140
+
141
+ # mypy
142
+ .mypy_cache/
143
+ .dmypy.json
144
+ dmypy.json
145
+
146
+ # Pyre type checker
147
+ .pyre/
148
+
149
+ # pytype static type analyzer
150
+ .pytype/
151
+
152
+ # Cython debug symbols
153
+ cython_debug/
154
+
155
+ # PyCharm
156
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
159
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
160
+ #.idea/
161
+ huggingface/
162
+
163
+ # Cog
164
+ /.cog/
165
+ /safety-cache/
166
+ *.tar
167
+ .vscode
168
+ gradio_cached_examples
169
+ cog/test_batchsize.py
170
+ input.png
171
+ output_*.png
172
+ output.*.png
173
+ output_image_*.png
174
+ output_image.*.png
175
+ output_*.webp
176
+ output.*.webp
177
+ output_image_*.webp
178
+ output_image.*.webp
179
+ output_*.jpg
180
+ output.*.jpg
181
+ output_image_*.jpg
182
+ output_image.*.jpg
183
+ output_*.jpeg
184
+ output.*.jpeg
185
+ output_image_*.jpeg
186
+ output_image.*.jpeg
LICENSE ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Apache License
2
+ Version 2.0, January 2004
3
+ http://www.apache.org/licenses/
4
+
5
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6
+
7
+ 1. Definitions.
8
+
9
+ "License" shall mean the terms and conditions for use, reproduction,
10
+ and distribution as defined by Sections 1 through 9 of this document.
11
+
12
+ "Licensor" shall mean the copyright owner or entity authorized by
13
+ the copyright owner that is granting the License.
14
+
15
+ "Legal Entity" shall mean the union of the acting entity and all
16
+ other entities that control, are controlled by, or are under common
17
+ control with that entity. For the purposes of this definition,
18
+ "control" means (i) the power, direct or indirect, to cause the
19
+ direction or management of such entity, whether by contract or
20
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
21
+ outstanding shares, or (iii) beneficial ownership of such entity.
22
+
23
+ "You" (or "Your") shall mean an individual or Legal Entity
24
+ exercising permissions granted by this License.
25
+
26
+ "Source" form shall mean the preferred form for making modifications,
27
+ including but not limited to software source code, documentation
28
+ source, and configuration files.
29
+
30
+ "Object" form shall mean any form resulting from mechanical
31
+ transformation or translation of a Source form, including but
32
+ not limited to compiled object code, generated documentation,
33
+ and conversions to other media types.
34
+
35
+ "Work" shall mean the work of authorship, whether in Source or
36
+ Object form, made available under the License, as indicated by a
37
+ copyright notice that is included in or attached to the work
38
+ (an example is provided in the Appendix below).
39
+
40
+ "Derivative Works" shall mean any work, whether in Source or Object
41
+ form, that is based on (or derived from) the Work and for which the
42
+ editorial revisions, annotations, elaborations, or other modifications
43
+ represent, as a whole, an original work of authorship. For the purposes
44
+ of this License, Derivative Works shall not include works that remain
45
+ separable from, or merely link (or bind by name) to the interfaces of,
46
+ the Work and Derivative Works thereof.
47
+
48
+ "Contribution" shall mean any work of authorship, including
49
+ the original version of the Work and any modifications or additions
50
+ to that Work or Derivative Works thereof, that is intentionally
51
+ submitted to Licensor for inclusion in the Work by the copyright owner
52
+ or by an individual or Legal Entity authorized to submit on behalf of
53
+ the copyright owner. For the purposes of this definition, "submitted"
54
+ means any form of electronic, verbal, or written communication sent
55
+ to the Licensor or its representatives, including but not limited to
56
+ communication on electronic mailing lists, source code control systems,
57
+ and issue tracking systems that are managed by, or on behalf of, the
58
+ Licensor for the purpose of discussing and improving the Work, but
59
+ excluding communication that is conspicuously marked or otherwise
60
+ designated in writing by the copyright owner as "Not a Contribution."
61
+
62
+ "Contributor" shall mean Licensor and any individual or Legal Entity
63
+ on behalf of whom a Contribution has been received by Licensor and
64
+ subsequently incorporated within the Work.
65
+
66
+ 2. Grant of Copyright License. Subject to the terms and conditions of
67
+ this License, each Contributor hereby grants to You a perpetual,
68
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69
+ copyright license to reproduce, prepare Derivative Works of,
70
+ publicly display, publicly perform, sublicense, and distribute the
71
+ Work and such Derivative Works in Source or Object form.
72
+
73
+ 3. Grant of Patent License. Subject to the terms and conditions of
74
+ this License, each Contributor hereby grants to You a perpetual,
75
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76
+ (except as stated in this section) patent license to make, have made,
77
+ use, offer to sell, sell, import, and otherwise transfer the Work,
78
+ where such license applies only to those patent claims licensable
79
+ by such Contributor that are necessarily infringed by their
80
+ Contribution(s) alone or by combination of their Contribution(s)
81
+ with the Work to which such Contribution(s) was submitted. If You
82
+ institute patent litigation against any entity (including a
83
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
84
+ or a Contribution incorporated within the Work constitutes direct
85
+ or contributory patent infringement, then any patent licenses
86
+ granted to You under this License for that Work shall terminate
87
+ as of the date such litigation is filed.
88
+
89
+ 4. Redistribution. You may reproduce and distribute copies of the
90
+ Work or Derivative Works thereof in any medium, with or without
91
+ modifications, and in Source or Object form, provided that You
92
+ meet the following conditions:
93
+
94
+ (a) You must give any other recipients of the Work or
95
+ Derivative Works a copy of this License; and
96
+
97
+ (b) You must cause any modified files to carry prominent notices
98
+ stating that You changed the files; and
99
+
100
+ (c) You must retain, in the Source form of any Derivative Works
101
+ that You distribute, all copyright, patent, trademark, and
102
+ attribution notices from the Source form of the Work,
103
+ excluding those notices that do not pertain to any part of
104
+ the Derivative Works; and
105
+
106
+ (d) If the Work includes a "NOTICE" text file as part of its
107
+ distribution, then any Derivative Works that You distribute must
108
+ include a readable copy of the attribution notices contained
109
+ within such NOTICE file, excluding those notices that do not
110
+ pertain to any part of the Derivative Works, in at least one
111
+ of the following places: within a NOTICE text file distributed
112
+ as part of the Derivative Works; within the Source form or
113
+ documentation, if provided along with the Derivative Works; or,
114
+ within a display generated by the Derivative Works, if and
115
+ wherever such third-party notices normally appear. The contents
116
+ of the NOTICE file are for informational purposes only and
117
+ do not modify the License. You may add Your own attribution
118
+ notices within Derivative Works that You distribute, alongside
119
+ or as an addendum to the NOTICE text from the Work, provided
120
+ that such additional attribution notices cannot be construed
121
+ as modifying the License.
122
+
123
+ You may add Your own copyright statement to Your modifications and
124
+ may provide additional or different license terms and conditions
125
+ for use, reproduction, or distribution of Your modifications, or
126
+ for any such Derivative Works as a whole, provided Your use,
127
+ reproduction, and distribution of the Work otherwise complies with
128
+ the conditions stated in this License.
129
+
130
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
131
+ any Contribution intentionally submitted for inclusion in the Work
132
+ by You to the Licensor shall be under the terms and conditions of
133
+ this License, without any additional terms or conditions.
134
+ Notwithstanding the above, nothing herein shall supersede or modify
135
+ the terms of any separate license agreement you may have executed
136
+ with Licensor regarding such Contributions.
137
+
138
+ 6. Trademarks. This License does not grant permission to use the trade
139
+ names, trademarks, service marks, or product names of the Licensor,
140
+ except as required for reasonable and customary use in describing the
141
+ origin of the Work and reproducing the content of the NOTICE file.
142
+
143
+ 7. Disclaimer of Warranty. Unless required by applicable law or
144
+ agreed to in writing, Licensor provides the Work (and each
145
+ Contributor provides its Contributions) on an "AS IS" BASIS,
146
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147
+ implied, including, without limitation, any warranties or conditions
148
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149
+ PARTICULAR PURPOSE. You are solely responsible for determining the
150
+ appropriateness of using or redistributing the Work and assume any
151
+ risks associated with Your exercise of permissions under this License.
152
+
153
+ 8. Limitation of Liability. In no event and under no legal theory,
154
+ whether in tort (including negligence), contract, or otherwise,
155
+ unless required by applicable law (such as deliberate and grossly
156
+ negligent acts) or agreed to in writing, shall any Contributor be
157
+ liable to You for damages, including any direct, indirect, special,
158
+ incidental, or consequential damages of any character arising as a
159
+ result of this License or out of the use or inability to use the
160
+ Work (including but not limited to damages for loss of goodwill,
161
+ work stoppage, computer failure or malfunction, or any and all
162
+ other commercial damages or losses), even if such Contributor
163
+ has been advised of the possibility of such damages.
164
+
165
+ 9. Accepting Warranty or Additional Liability. While redistributing
166
+ the Work or Derivative Works thereof, You may choose to offer,
167
+ and charge a fee for, acceptance of support, warranty, indemnity,
168
+ or other liability obligations and/or rights consistent with this
169
+ License. However, in accepting such obligations, You may act only
170
+ on Your own behalf and on Your sole responsibility, not on behalf
171
+ of any other Contributor, and only if You agree to indemnify,
172
+ defend, and hold each Contributor harmless for any liability
173
+ incurred by, or claims asserted against, such Contributor by reason
174
+ of your accepting any such warranty or additional liability.
175
+
176
+ END OF TERMS AND CONDITIONS
177
+
178
+ APPENDIX: How to apply the Apache License to your work.
179
+
180
+ To apply the Apache License to your work, attach the following
181
+ boilerplate notice, with the fields enclosed by brackets "[]"
182
+ replaced with your own identifying information. (Don't include
183
+ the brackets!) The text should be enclosed in the appropriate
184
+ comment syntax for the file format. We also recommend that a
185
+ file or class name and description of purpose be included on the
186
+ same "printed page" as the copyright notice for easier
187
+ identification within third-party archives.
188
+
189
+ Copyright [yyyy] [name of copyright owner]
190
+
191
+ Licensed under the Apache License, Version 2.0 (the "License");
192
+ you may not use this file except in compliance with the License.
193
+ You may obtain a copy of the License at
194
+
195
+ http://www.apache.org/licenses/LICENSE-2.0
196
+
197
+ Unless required by applicable law or agreed to in writing, software
198
+ distributed under the License is distributed on an "AS IS" BASIS,
199
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200
+ See the License for the specific language governing permissions and
201
+ limitations under the License.
README.md CHANGED
@@ -1,12 +1,6 @@
1
  ---
2
- title: IDfy Avatarify
3
- emoji: 👁
4
- colorFrom: indigo
5
- colorTo: purple
6
  sdk: gradio
7
- sdk_version: 4.42.0
8
- app_file: app.py
9
- pinned: false
10
  ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: IDfy-Avatarify
3
+ app_file: gradio_demo/app.py
 
 
4
  sdk: gradio
5
+ sdk_version: 4.38.1
 
 
6
  ---
 
 
Untitled.ipynb ADDED
@@ -0,0 +1,432 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "id": "82c97d2c-16bf-4b2c-b16c-d9f7a8a5b12f",
7
+ "metadata": {
8
+ "tags": []
9
+ },
10
+ "outputs": [
11
+ {
12
+ "name": "stdout",
13
+ "output_type": "stream",
14
+ "text": [
15
+ "Collecting diffusers==0.25.1 (from -r gradio_demo/requirements.txt (line 1))\n",
16
+ " Downloading diffusers-0.25.1-py3-none-any.whl.metadata (19 kB)\n",
17
+ "Requirement already satisfied: torch==2.0.0 in /opt/conda/lib/python3.10/site-packages (from -r gradio_demo/requirements.txt (line 2)) (2.0.0+cu118)\n",
18
+ "Requirement already satisfied: torchvision==0.15.1 in /opt/conda/lib/python3.10/site-packages (from -r gradio_demo/requirements.txt (line 3)) (0.15.1+cu118)\n",
19
+ "Collecting transformers==4.37.1 (from -r gradio_demo/requirements.txt (line 4))\n",
20
+ " Downloading transformers-4.37.1-py3-none-any.whl.metadata (129 kB)\n",
21
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m129.4/129.4 kB\u001b[0m \u001b[31m1.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
22
+ "\u001b[?25hCollecting accelerate (from -r gradio_demo/requirements.txt (line 5))\n",
23
+ " Downloading accelerate-0.32.1-py3-none-any.whl.metadata (18 kB)\n",
24
+ "Collecting safetensors (from -r gradio_demo/requirements.txt (line 6))\n",
25
+ " Downloading safetensors-0.4.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.8 kB)\n",
26
+ "Collecting einops (from -r gradio_demo/requirements.txt (line 7))\n",
27
+ " Downloading einops-0.8.0-py3-none-any.whl.metadata (12 kB)\n",
28
+ "Collecting onnxruntime-gpu (from -r gradio_demo/requirements.txt (line 8))\n",
29
+ " Downloading onnxruntime_gpu-1.18.1-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (4.4 kB)\n",
30
+ "Collecting spaces==0.19.4 (from -r gradio_demo/requirements.txt (line 9))\n",
31
+ " Downloading spaces-0.19.4-py3-none-any.whl.metadata (972 bytes)\n",
32
+ "Collecting omegaconf (from -r gradio_demo/requirements.txt (line 10))\n",
33
+ " Downloading omegaconf-2.3.0-py3-none-any.whl.metadata (3.9 kB)\n",
34
+ "Collecting peft (from -r gradio_demo/requirements.txt (line 11))\n",
35
+ " Downloading peft-0.11.1-py3-none-any.whl.metadata (13 kB)\n",
36
+ "Collecting huggingface-hub==0.20.2 (from -r gradio_demo/requirements.txt (line 12))\n",
37
+ " Downloading huggingface_hub-0.20.2-py3-none-any.whl.metadata (12 kB)\n",
38
+ "Collecting opencv-python (from -r gradio_demo/requirements.txt (line 13))\n",
39
+ " Downloading opencv_python-4.10.0.84-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (20 kB)\n",
40
+ "Collecting insightface (from -r gradio_demo/requirements.txt (line 14))\n",
41
+ " Downloading insightface-0.7.3.tar.gz (439 kB)\n",
42
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m439.5/439.5 kB\u001b[0m \u001b[31m10.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m\n",
43
+ "\u001b[?25h Installing build dependencies ... \u001b[?25ldone\n",
44
+ "\u001b[?25h Getting requirements to build wheel ... \u001b[?25ldone\n",
45
+ "\u001b[?25h Preparing metadata (pyproject.toml) ... \u001b[?25ldone\n",
46
+ "\u001b[?25hCollecting gradio (from -r gradio_demo/requirements.txt (line 15))\n",
47
+ " Downloading gradio-4.38.1-py3-none-any.whl.metadata (15 kB)\n",
48
+ "Collecting controlnet_aux (from -r gradio_demo/requirements.txt (line 16))\n",
49
+ " Downloading controlnet_aux-0.0.9-py3-none-any.whl.metadata (6.5 kB)\n",
50
+ "Collecting gdown (from -r gradio_demo/requirements.txt (line 17))\n",
51
+ " Downloading gdown-5.2.0-py3-none-any.whl.metadata (5.8 kB)\n",
52
+ "Requirement already satisfied: importlib-metadata in /opt/conda/lib/python3.10/site-packages (from diffusers==0.25.1->-r gradio_demo/requirements.txt (line 1)) (7.0.0)\n",
53
+ "Requirement already satisfied: filelock in /opt/conda/lib/python3.10/site-packages (from diffusers==0.25.1->-r gradio_demo/requirements.txt (line 1)) (3.15.4)\n",
54
+ "Requirement already satisfied: numpy in /opt/conda/lib/python3.10/site-packages (from diffusers==0.25.1->-r gradio_demo/requirements.txt (line 1)) (1.25.2)\n",
55
+ "Collecting regex!=2019.12.17 (from diffusers==0.25.1->-r gradio_demo/requirements.txt (line 1))\n",
56
+ " Downloading regex-2024.5.15-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (40 kB)\n",
57
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m40.9/40.9 kB\u001b[0m \u001b[31m5.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
58
+ "\u001b[?25hRequirement already satisfied: requests in /opt/conda/lib/python3.10/site-packages (from diffusers==0.25.1->-r gradio_demo/requirements.txt (line 1)) (2.32.3)\n",
59
+ "Requirement already satisfied: Pillow in /opt/conda/lib/python3.10/site-packages (from diffusers==0.25.1->-r gradio_demo/requirements.txt (line 1)) (10.4.0)\n",
60
+ "Requirement already satisfied: typing-extensions in /opt/conda/lib/python3.10/site-packages (from torch==2.0.0->-r gradio_demo/requirements.txt (line 2)) (4.12.2)\n",
61
+ "Requirement already satisfied: sympy in /opt/conda/lib/python3.10/site-packages (from torch==2.0.0->-r gradio_demo/requirements.txt (line 2)) (1.13.0)\n",
62
+ "Requirement already satisfied: networkx in /opt/conda/lib/python3.10/site-packages (from torch==2.0.0->-r gradio_demo/requirements.txt (line 2)) (3.3)\n",
63
+ "Requirement already satisfied: jinja2 in /opt/conda/lib/python3.10/site-packages (from torch==2.0.0->-r gradio_demo/requirements.txt (line 2)) (3.1.4)\n",
64
+ "Requirement already satisfied: triton==2.0.0 in /opt/conda/lib/python3.10/site-packages (from torch==2.0.0->-r gradio_demo/requirements.txt (line 2)) (2.0.0)\n",
65
+ "Requirement already satisfied: packaging>=20.0 in /opt/conda/lib/python3.10/site-packages (from transformers==4.37.1->-r gradio_demo/requirements.txt (line 4)) (24.1)\n",
66
+ "Requirement already satisfied: pyyaml>=5.1 in /opt/conda/lib/python3.10/site-packages (from transformers==4.37.1->-r gradio_demo/requirements.txt (line 4)) (6.0.1)\n",
67
+ "Collecting tokenizers<0.19,>=0.14 (from transformers==4.37.1->-r gradio_demo/requirements.txt (line 4))\n",
68
+ " Downloading tokenizers-0.15.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)\n",
69
+ "Requirement already satisfied: tqdm>=4.27 in /opt/conda/lib/python3.10/site-packages (from transformers==4.37.1->-r gradio_demo/requirements.txt (line 4)) (4.66.4)\n",
70
+ "Requirement already satisfied: httpx>=0.20 in /opt/conda/lib/python3.10/site-packages (from spaces==0.19.4->-r gradio_demo/requirements.txt (line 9)) (0.27.0)\n",
71
+ "Requirement already satisfied: psutil<6,>=2 in /opt/conda/lib/python3.10/site-packages (from spaces==0.19.4->-r gradio_demo/requirements.txt (line 9)) (5.9.3)\n",
72
+ "Requirement already satisfied: pydantic<3,>=1 in /opt/conda/lib/python3.10/site-packages (from spaces==0.19.4->-r gradio_demo/requirements.txt (line 9)) (1.10.17)\n",
73
+ "Requirement already satisfied: fsspec>=2023.5.0 in /opt/conda/lib/python3.10/site-packages (from huggingface-hub==0.20.2->-r gradio_demo/requirements.txt (line 12)) (2024.6.1)\n",
74
+ "Requirement already satisfied: cmake in /opt/conda/lib/python3.10/site-packages (from triton==2.0.0->torch==2.0.0->-r gradio_demo/requirements.txt (line 2)) (3.30.0)\n",
75
+ "Requirement already satisfied: lit in /opt/conda/lib/python3.10/site-packages (from triton==2.0.0->torch==2.0.0->-r gradio_demo/requirements.txt (line 2)) (18.1.8)\n",
76
+ "Collecting coloredlogs (from onnxruntime-gpu->-r gradio_demo/requirements.txt (line 8))\n",
77
+ " Downloading coloredlogs-15.0.1-py2.py3-none-any.whl.metadata (12 kB)\n",
78
+ "Collecting flatbuffers (from onnxruntime-gpu->-r gradio_demo/requirements.txt (line 8))\n",
79
+ " Downloading flatbuffers-24.3.25-py2.py3-none-any.whl.metadata (850 bytes)\n",
80
+ "Requirement already satisfied: protobuf in /opt/conda/lib/python3.10/site-packages (from onnxruntime-gpu->-r gradio_demo/requirements.txt (line 8)) (3.20.3)\n",
81
+ "Collecting antlr4-python3-runtime==4.9.* (from omegaconf->-r gradio_demo/requirements.txt (line 10))\n",
82
+ " Downloading antlr4-python3-runtime-4.9.3.tar.gz (117 kB)\n",
83
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m117.0/117.0 kB\u001b[0m \u001b[31m14.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
84
+ "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25ldone\n",
85
+ "\u001b[?25hCollecting onnx (from insightface->-r gradio_demo/requirements.txt (line 14))\n",
86
+ " Downloading onnx-1.16.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (16 kB)\n",
87
+ "Requirement already satisfied: matplotlib in /opt/conda/lib/python3.10/site-packages (from insightface->-r gradio_demo/requirements.txt (line 14)) (3.7.3)\n",
88
+ "Requirement already satisfied: scipy in /opt/conda/lib/python3.10/site-packages (from insightface->-r gradio_demo/requirements.txt (line 14)) (1.11.4)\n",
89
+ "Requirement already satisfied: scikit-learn in /opt/conda/lib/python3.10/site-packages (from insightface->-r gradio_demo/requirements.txt (line 14)) (1.5.1)\n",
90
+ "Requirement already satisfied: scikit-image in /opt/conda/lib/python3.10/site-packages (from insightface->-r gradio_demo/requirements.txt (line 14)) (0.24.0)\n",
91
+ "Collecting easydict (from insightface->-r gradio_demo/requirements.txt (line 14))\n",
92
+ " Downloading easydict-1.13-py3-none-any.whl.metadata (4.2 kB)\n",
93
+ "Requirement already satisfied: cython in /opt/conda/lib/python3.10/site-packages (from insightface->-r gradio_demo/requirements.txt (line 14)) (3.0.10)\n",
94
+ "Collecting albumentations (from insightface->-r gradio_demo/requirements.txt (line 14))\n",
95
+ " Downloading albumentations-1.4.11-py3-none-any.whl.metadata (39 kB)\n",
96
+ "Requirement already satisfied: prettytable in /opt/conda/lib/python3.10/site-packages (from insightface->-r gradio_demo/requirements.txt (line 14)) (3.10.0)\n",
97
+ "Requirement already satisfied: aiofiles<24.0,>=22.0 in /opt/conda/lib/python3.10/site-packages (from gradio->-r gradio_demo/requirements.txt (line 15)) (22.1.0)\n",
98
+ "Collecting altair<6.0,>=5.0 (from gradio->-r gradio_demo/requirements.txt (line 15))\n",
99
+ " Downloading altair-5.3.0-py3-none-any.whl.metadata (9.2 kB)\n",
100
+ "Requirement already satisfied: fastapi in /opt/conda/lib/python3.10/site-packages (from gradio->-r gradio_demo/requirements.txt (line 15)) (0.111.0)\n",
101
+ "Collecting ffmpy (from gradio->-r gradio_demo/requirements.txt (line 15))\n",
102
+ " Downloading ffmpy-0.3.2.tar.gz (5.5 kB)\n",
103
+ " Preparing metadata (setup.py) ... \u001b[?25ldone\n",
104
+ "\u001b[?25hCollecting gradio-client==1.1.0 (from gradio->-r gradio_demo/requirements.txt (line 15))\n",
105
+ " Downloading gradio_client-1.1.0-py3-none-any.whl.metadata (7.1 kB)\n",
106
+ "Requirement already satisfied: importlib-resources<7.0,>=1.3 in /opt/conda/lib/python3.10/site-packages (from gradio->-r gradio_demo/requirements.txt (line 15)) (6.4.0)\n",
107
+ "Requirement already satisfied: markupsafe~=2.0 in /opt/conda/lib/python3.10/site-packages (from gradio->-r gradio_demo/requirements.txt (line 15)) (2.1.5)\n",
108
+ "Requirement already satisfied: orjson~=3.0 in /opt/conda/lib/python3.10/site-packages (from gradio->-r gradio_demo/requirements.txt (line 15)) (3.10.6)\n",
109
+ "Requirement already satisfied: pandas<3.0,>=1.0 in /opt/conda/lib/python3.10/site-packages (from gradio->-r gradio_demo/requirements.txt (line 15)) (2.0.3)\n",
110
+ "Collecting pydantic<3,>=1 (from spaces==0.19.4->-r gradio_demo/requirements.txt (line 9))\n",
111
+ " Downloading pydantic-2.8.2-py3-none-any.whl.metadata (125 kB)\n",
112
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m125.2/125.2 kB\u001b[0m \u001b[31m16.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
113
+ "\u001b[?25hCollecting pydub (from gradio->-r gradio_demo/requirements.txt (line 15))\n",
114
+ " Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)\n",
115
+ "Requirement already satisfied: python-multipart>=0.0.9 in /opt/conda/lib/python3.10/site-packages (from gradio->-r gradio_demo/requirements.txt (line 15)) (0.0.9)\n",
116
+ "Collecting ruff>=0.2.2 (from gradio->-r gradio_demo/requirements.txt (line 15))\n",
117
+ " Downloading ruff-0.5.4-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (24 kB)\n",
118
+ "Collecting semantic-version~=2.0 (from gradio->-r gradio_demo/requirements.txt (line 15))\n",
119
+ " Downloading semantic_version-2.10.0-py2.py3-none-any.whl.metadata (9.7 kB)\n",
120
+ "Collecting tomlkit==0.12.0 (from gradio->-r gradio_demo/requirements.txt (line 15))\n",
121
+ " Downloading tomlkit-0.12.0-py3-none-any.whl.metadata (2.7 kB)\n",
122
+ "Requirement already satisfied: typer<1.0,>=0.12 in /opt/conda/lib/python3.10/site-packages (from gradio->-r gradio_demo/requirements.txt (line 15)) (0.12.3)\n",
123
+ "Collecting urllib3~=2.0 (from gradio->-r gradio_demo/requirements.txt (line 15))\n",
124
+ " Downloading urllib3-2.2.2-py3-none-any.whl.metadata (6.4 kB)\n",
125
+ "Requirement already satisfied: uvicorn>=0.14.0 in /opt/conda/lib/python3.10/site-packages (from gradio->-r gradio_demo/requirements.txt (line 15)) (0.30.1)\n",
126
+ "Collecting websockets<12.0,>=10.0 (from gradio-client==1.1.0->gradio->-r gradio_demo/requirements.txt (line 15))\n",
127
+ " Downloading websockets-11.0.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.6 kB)\n",
128
+ "Collecting opencv-python-headless (from controlnet_aux->-r gradio_demo/requirements.txt (line 16))\n",
129
+ " Downloading opencv_python_headless-4.10.0.84-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (20 kB)\n",
130
+ "Collecting timm<=0.6.7 (from controlnet_aux->-r gradio_demo/requirements.txt (line 16))\n",
131
+ " Downloading timm-0.6.7-py3-none-any.whl.metadata (33 kB)\n",
132
+ "Requirement already satisfied: beautifulsoup4 in /opt/conda/lib/python3.10/site-packages (from gdown->-r gradio_demo/requirements.txt (line 17)) (4.12.3)\n",
133
+ "Requirement already satisfied: jsonschema>=3.0 in /opt/conda/lib/python3.10/site-packages (from altair<6.0,>=5.0->gradio->-r gradio_demo/requirements.txt (line 15)) (4.22.0)\n",
134
+ "Requirement already satisfied: toolz in /opt/conda/lib/python3.10/site-packages (from altair<6.0,>=5.0->gradio->-r gradio_demo/requirements.txt (line 15)) (0.12.1)\n",
135
+ "Requirement already satisfied: anyio in /opt/conda/lib/python3.10/site-packages (from httpx>=0.20->spaces==0.19.4->-r gradio_demo/requirements.txt (line 9)) (4.4.0)\n",
136
+ "Requirement already satisfied: certifi in /opt/conda/lib/python3.10/site-packages (from httpx>=0.20->spaces==0.19.4->-r gradio_demo/requirements.txt (line 9)) (2024.7.4)\n",
137
+ "Requirement already satisfied: httpcore==1.* in /opt/conda/lib/python3.10/site-packages (from httpx>=0.20->spaces==0.19.4->-r gradio_demo/requirements.txt (line 9)) (1.0.5)\n",
138
+ "Requirement already satisfied: idna in /opt/conda/lib/python3.10/site-packages (from httpx>=0.20->spaces==0.19.4->-r gradio_demo/requirements.txt (line 9)) (3.7)\n",
139
+ "Requirement already satisfied: sniffio in /opt/conda/lib/python3.10/site-packages (from httpx>=0.20->spaces==0.19.4->-r gradio_demo/requirements.txt (line 9)) (1.3.1)\n",
140
+ "Requirement already satisfied: h11<0.15,>=0.13 in /opt/conda/lib/python3.10/site-packages (from httpcore==1.*->httpx>=0.20->spaces==0.19.4->-r gradio_demo/requirements.txt (line 9)) (0.14.0)\n",
141
+ "Requirement already satisfied: contourpy>=1.0.1 in /opt/conda/lib/python3.10/site-packages (from matplotlib->insightface->-r gradio_demo/requirements.txt (line 14)) (1.2.1)\n",
142
+ "Requirement already satisfied: cycler>=0.10 in /opt/conda/lib/python3.10/site-packages (from matplotlib->insightface->-r gradio_demo/requirements.txt (line 14)) (0.12.1)\n",
143
+ "Requirement already satisfied: fonttools>=4.22.0 in /opt/conda/lib/python3.10/site-packages (from matplotlib->insightface->-r gradio_demo/requirements.txt (line 14)) (4.53.1)\n",
144
+ "Requirement already satisfied: kiwisolver>=1.0.1 in /opt/conda/lib/python3.10/site-packages (from matplotlib->insightface->-r gradio_demo/requirements.txt (line 14)) (1.4.5)\n",
145
+ "Requirement already satisfied: pyparsing>=2.3.1 in /opt/conda/lib/python3.10/site-packages (from matplotlib->insightface->-r gradio_demo/requirements.txt (line 14)) (3.1.2)\n",
146
+ "Requirement already satisfied: python-dateutil>=2.7 in /opt/conda/lib/python3.10/site-packages (from matplotlib->insightface->-r gradio_demo/requirements.txt (line 14)) (2.9.0)\n",
147
+ "Requirement already satisfied: pytz>=2020.1 in /opt/conda/lib/python3.10/site-packages (from pandas<3.0,>=1.0->gradio->-r gradio_demo/requirements.txt (line 15)) (2024.1)\n",
148
+ "Requirement already satisfied: tzdata>=2022.1 in /opt/conda/lib/python3.10/site-packages (from pandas<3.0,>=1.0->gradio->-r gradio_demo/requirements.txt (line 15)) (2024.1)\n",
149
+ "Collecting annotated-types>=0.4.0 (from pydantic<3,>=1->spaces==0.19.4->-r gradio_demo/requirements.txt (line 9))\n",
150
+ " Downloading annotated_types-0.7.0-py3-none-any.whl.metadata (15 kB)\n",
151
+ "Collecting pydantic-core==2.20.1 (from pydantic<3,>=1->spaces==0.19.4->-r gradio_demo/requirements.txt (line 9))\n",
152
+ " Downloading pydantic_core-2.20.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.6 kB)\n",
153
+ "Requirement already satisfied: charset-normalizer<4,>=2 in /opt/conda/lib/python3.10/site-packages (from requests->diffusers==0.25.1->-r gradio_demo/requirements.txt (line 1)) (3.3.2)\n",
154
+ "Requirement already satisfied: click>=8.0.0 in /opt/conda/lib/python3.10/site-packages (from typer<1.0,>=0.12->gradio->-r gradio_demo/requirements.txt (line 15)) (8.1.7)\n",
155
+ "Requirement already satisfied: shellingham>=1.3.0 in /opt/conda/lib/python3.10/site-packages (from typer<1.0,>=0.12->gradio->-r gradio_demo/requirements.txt (line 15)) (1.5.4)\n",
156
+ "Requirement already satisfied: rich>=10.11.0 in /opt/conda/lib/python3.10/site-packages (from typer<1.0,>=0.12->gradio->-r gradio_demo/requirements.txt (line 15)) (13.7.1)\n",
157
+ "Collecting albucore>=0.0.11 (from albumentations->insightface->-r gradio_demo/requirements.txt (line 14))\n",
158
+ " Downloading albucore-0.0.12-py3-none-any.whl.metadata (3.1 kB)\n",
159
+ "Collecting eval-type-backport (from albumentations->insightface->-r gradio_demo/requirements.txt (line 14))\n",
160
+ " Downloading eval_type_backport-0.2.0-py3-none-any.whl.metadata (2.2 kB)\n",
161
+ "Requirement already satisfied: imageio>=2.33 in /opt/conda/lib/python3.10/site-packages (from scikit-image->insightface->-r gradio_demo/requirements.txt (line 14)) (2.34.2)\n",
162
+ "Requirement already satisfied: tifffile>=2022.8.12 in /opt/conda/lib/python3.10/site-packages (from scikit-image->insightface->-r gradio_demo/requirements.txt (line 14)) (2024.7.2)\n",
163
+ "Requirement already satisfied: lazy-loader>=0.4 in /opt/conda/lib/python3.10/site-packages (from scikit-image->insightface->-r gradio_demo/requirements.txt (line 14)) (0.4)\n",
164
+ "Requirement already satisfied: joblib>=1.2.0 in /opt/conda/lib/python3.10/site-packages (from scikit-learn->insightface->-r gradio_demo/requirements.txt (line 14)) (1.4.2)\n",
165
+ "Requirement already satisfied: threadpoolctl>=3.1.0 in /opt/conda/lib/python3.10/site-packages (from scikit-learn->insightface->-r gradio_demo/requirements.txt (line 14)) (3.5.0)\n",
166
+ "Requirement already satisfied: soupsieve>1.2 in /opt/conda/lib/python3.10/site-packages (from beautifulsoup4->gdown->-r gradio_demo/requirements.txt (line 17)) (2.5)\n",
167
+ "Collecting humanfriendly>=9.1 (from coloredlogs->onnxruntime-gpu->-r gradio_demo/requirements.txt (line 8))\n",
168
+ " Downloading humanfriendly-10.0-py2.py3-none-any.whl.metadata (9.2 kB)\n",
169
+ "Requirement already satisfied: starlette<0.38.0,>=0.37.2 in /opt/conda/lib/python3.10/site-packages (from fastapi->gradio->-r gradio_demo/requirements.txt (line 15)) (0.37.2)\n",
170
+ "Requirement already satisfied: fastapi-cli>=0.0.2 in /opt/conda/lib/python3.10/site-packages (from fastapi->gradio->-r gradio_demo/requirements.txt (line 15)) (0.0.4)\n",
171
+ "Requirement already satisfied: ujson!=4.0.2,!=4.1.0,!=4.2.0,!=4.3.0,!=5.0.0,!=5.1.0,>=4.0.1 in /opt/conda/lib/python3.10/site-packages (from fastapi->gradio->-r gradio_demo/requirements.txt (line 15)) (5.10.0)\n",
172
+ "Requirement already satisfied: email_validator>=2.0.0 in /opt/conda/lib/python3.10/site-packages (from fastapi->gradio->-r gradio_demo/requirements.txt (line 15)) (2.2.0)\n",
173
+ "Requirement already satisfied: zipp>=0.5 in /opt/conda/lib/python3.10/site-packages (from importlib-metadata->diffusers==0.25.1->-r gradio_demo/requirements.txt (line 1)) (3.19.2)\n",
174
+ "Requirement already satisfied: wcwidth in /opt/conda/lib/python3.10/site-packages (from prettytable->insightface->-r gradio_demo/requirements.txt (line 14)) (0.2.13)\n",
175
+ "Requirement already satisfied: PySocks!=1.5.7,>=1.5.6 in /opt/conda/lib/python3.10/site-packages (from requests[socks]->gdown->-r gradio_demo/requirements.txt (line 17)) (1.7.1)\n",
176
+ "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /opt/conda/lib/python3.10/site-packages (from sympy->torch==2.0.0->-r gradio_demo/requirements.txt (line 2)) (1.3.0)\n",
177
+ "Requirement already satisfied: tomli>=2.0.1 in /opt/conda/lib/python3.10/site-packages (from albucore>=0.0.11->albumentations->insightface->-r gradio_demo/requirements.txt (line 14)) (2.0.1)\n",
178
+ "Requirement already satisfied: dnspython>=2.0.0 in /opt/conda/lib/python3.10/site-packages (from email_validator>=2.0.0->fastapi->gradio->-r gradio_demo/requirements.txt (line 15)) (2.6.1)\n",
179
+ "Requirement already satisfied: attrs>=22.2.0 in /opt/conda/lib/python3.10/site-packages (from jsonschema>=3.0->altair<6.0,>=5.0->gradio->-r gradio_demo/requirements.txt (line 15)) (23.2.0)\n",
180
+ "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /opt/conda/lib/python3.10/site-packages (from jsonschema>=3.0->altair<6.0,>=5.0->gradio->-r gradio_demo/requirements.txt (line 15)) (2023.12.1)\n",
181
+ "Requirement already satisfied: referencing>=0.28.4 in /opt/conda/lib/python3.10/site-packages (from jsonschema>=3.0->altair<6.0,>=5.0->gradio->-r gradio_demo/requirements.txt (line 15)) (0.35.1)\n",
182
+ "Requirement already satisfied: rpds-py>=0.7.1 in /opt/conda/lib/python3.10/site-packages (from jsonschema>=3.0->altair<6.0,>=5.0->gradio->-r gradio_demo/requirements.txt (line 15)) (0.19.0)\n",
183
+ "Requirement already satisfied: six>=1.5 in /opt/conda/lib/python3.10/site-packages (from python-dateutil>=2.7->matplotlib->insightface->-r gradio_demo/requirements.txt (line 14)) (1.16.0)\n",
184
+ "Requirement already satisfied: markdown-it-py>=2.2.0 in /opt/conda/lib/python3.10/site-packages (from rich>=10.11.0->typer<1.0,>=0.12->gradio->-r gradio_demo/requirements.txt (line 15)) (3.0.0)\n",
185
+ "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /opt/conda/lib/python3.10/site-packages (from rich>=10.11.0->typer<1.0,>=0.12->gradio->-r gradio_demo/requirements.txt (line 15)) (2.18.0)\n",
186
+ "Requirement already satisfied: exceptiongroup>=1.0.2 in /opt/conda/lib/python3.10/site-packages (from anyio->httpx>=0.20->spaces==0.19.4->-r gradio_demo/requirements.txt (line 9)) (1.2.0)\n",
187
+ "Requirement already satisfied: httptools>=0.5.0 in /opt/conda/lib/python3.10/site-packages (from uvicorn[standard]>=0.12.0->fastapi->gradio->-r gradio_demo/requirements.txt (line 15)) (0.6.1)\n",
188
+ "Requirement already satisfied: python-dotenv>=0.13 in /opt/conda/lib/python3.10/site-packages (from uvicorn[standard]>=0.12.0->fastapi->gradio->-r gradio_demo/requirements.txt (line 15)) (1.0.1)\n",
189
+ "Requirement already satisfied: uvloop!=0.15.0,!=0.15.1,>=0.14.0 in /opt/conda/lib/python3.10/site-packages (from uvicorn[standard]>=0.12.0->fastapi->gradio->-r gradio_demo/requirements.txt (line 15)) (0.19.0)\n",
190
+ "Requirement already satisfied: watchfiles>=0.13 in /opt/conda/lib/python3.10/site-packages (from uvicorn[standard]>=0.12.0->fastapi->gradio->-r gradio_demo/requirements.txt (line 15)) (0.22.0)\n",
191
+ "Requirement already satisfied: mdurl~=0.1 in /opt/conda/lib/python3.10/site-packages (from markdown-it-py>=2.2.0->rich>=10.11.0->typer<1.0,>=0.12->gradio->-r gradio_demo/requirements.txt (line 15)) (0.1.2)\n",
192
+ "Downloading diffusers-0.25.1-py3-none-any.whl (1.8 MB)\n",
193
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━���━━━\u001b[0m \u001b[32m1.8/1.8 MB\u001b[0m \u001b[31m33.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
194
+ "\u001b[?25hDownloading transformers-4.37.1-py3-none-any.whl (8.4 MB)\n",
195
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m8.4/8.4 MB\u001b[0m \u001b[31m87.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n",
196
+ "\u001b[?25hDownloading spaces-0.19.4-py3-none-any.whl (15 kB)\n",
197
+ "Downloading huggingface_hub-0.20.2-py3-none-any.whl (330 kB)\n",
198
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m330.3/330.3 kB\u001b[0m \u001b[31m37.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
199
+ "\u001b[?25hDownloading accelerate-0.32.1-py3-none-any.whl (314 kB)\n",
200
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m314.1/314.1 kB\u001b[0m \u001b[31m36.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
201
+ "\u001b[?25hDownloading safetensors-0.4.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)\n",
202
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.2/1.2 MB\u001b[0m \u001b[31m74.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
203
+ "\u001b[?25hDownloading einops-0.8.0-py3-none-any.whl (43 kB)\n",
204
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m43.2/43.2 kB\u001b[0m \u001b[31m6.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
205
+ "\u001b[?25hDownloading onnxruntime_gpu-1.18.1-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (200.8 MB)\n",
206
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m200.8/200.8 MB\u001b[0m \u001b[31m5.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n",
207
+ "\u001b[?25hDownloading omegaconf-2.3.0-py3-none-any.whl (79 kB)\n",
208
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m79.5/79.5 kB\u001b[0m \u001b[31m133.1 kB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
209
+ "\u001b[?25hDownloading peft-0.11.1-py3-none-any.whl (251 kB)\n",
210
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m251.6/251.6 kB\u001b[0m \u001b[31m3.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m\n",
211
+ "\u001b[?25hDownloading opencv_python-4.10.0.84-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (62.5 MB)\n",
212
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m62.5/62.5 MB\u001b[0m \u001b[31m21.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n",
213
+ "\u001b[?25hDownloading gradio-4.38.1-py3-none-any.whl (12.4 MB)\n",
214
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m12.4/12.4 MB\u001b[0m \u001b[31m84.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m0:01\u001b[0m\n",
215
+ "\u001b[?25hDownloading gradio_client-1.1.0-py3-none-any.whl (318 kB)\n",
216
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m318.1/318.1 kB\u001b[0m \u001b[31m38.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
217
+ "\u001b[?25hDownloading tomlkit-0.12.0-py3-none-any.whl (37 kB)\n",
218
+ "Downloading controlnet_aux-0.0.9-py3-none-any.whl (282 kB)\n",
219
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m282.4/282.4 kB\u001b[0m \u001b[31m31.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
220
+ "\u001b[?25hDownloading gdown-5.2.0-py3-none-any.whl (18 kB)\n",
221
+ "Downloading altair-5.3.0-py3-none-any.whl (857 kB)\n",
222
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m857.8/857.8 kB\u001b[0m \u001b[31m64.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
223
+ "\u001b[?25hDownloading pydantic-2.8.2-py3-none-any.whl (423 kB)\n",
224
+ "\u001b[2K \u001b[90m━━━━━━━━━━━���━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m423.9/423.9 kB\u001b[0m \u001b[31m46.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
225
+ "\u001b[?25hDownloading pydantic_core-2.20.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.1 MB)\n",
226
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.1/2.1 MB\u001b[0m \u001b[31m101.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
227
+ "\u001b[?25hDownloading regex-2024.5.15-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (775 kB)\n",
228
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m775.1/775.1 kB\u001b[0m \u001b[31m58.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
229
+ "\u001b[?25hDownloading ruff-0.5.4-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (10.1 MB)\n",
230
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m10.1/10.1 MB\u001b[0m \u001b[31m82.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m0:01\u001b[0m\n",
231
+ "\u001b[?25hDownloading semantic_version-2.10.0-py2.py3-none-any.whl (15 kB)\n",
232
+ "Downloading timm-0.6.7-py3-none-any.whl (509 kB)\n",
233
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m510.0/510.0 kB\u001b[0m \u001b[31m45.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
234
+ "\u001b[?25hDownloading tokenizers-0.15.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.6 MB)\n",
235
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.6/3.6 MB\u001b[0m \u001b[31m90.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m\n",
236
+ "\u001b[?25hDownloading urllib3-2.2.2-py3-none-any.whl (121 kB)\n",
237
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m121.4/121.4 kB\u001b[0m \u001b[31m18.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
238
+ "\u001b[?25hDownloading albumentations-1.4.11-py3-none-any.whl (165 kB)\n",
239
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m165.3/165.3 kB\u001b[0m \u001b[31m23.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
240
+ "\u001b[?25hDownloading opencv_python_headless-4.10.0.84-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (49.9 MB)\n",
241
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m49.9/49.9 MB\u001b[0m \u001b[31m27.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n",
242
+ "\u001b[?25hDownloading coloredlogs-15.0.1-py2.py3-none-any.whl (46 kB)\n",
243
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m46.0/46.0 kB\u001b[0m \u001b[31m6.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
244
+ "\u001b[?25hDownloading easydict-1.13-py3-none-any.whl (6.8 kB)\n",
245
+ "Downloading flatbuffers-24.3.25-py2.py3-none-any.whl (26 kB)\n",
246
+ "Downloading onnx-1.16.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (15.9 MB)\n",
247
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m15.9/15.9 MB\u001b[0m \u001b[31m80.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n",
248
+ "\u001b[?25hDownloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)\n",
249
+ "Downloading albucore-0.0.12-py3-none-any.whl (8.4 kB)\n",
250
+ "Downloading annotated_types-0.7.0-py3-none-any.whl (13 kB)\n",
251
+ "Downloading humanfriendly-10.0-py2.py3-none-any.whl (86 kB)\n",
252
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m86.8/86.8 kB\u001b[0m \u001b[31m13.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
253
+ "\u001b[?25hDownloading websockets-11.0.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (129 kB)\n",
254
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m129.9/129.9 kB\u001b[0m \u001b[31m18.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
255
+ "\u001b[?25hDownloading eval_type_backport-0.2.0-py3-none-any.whl (5.9 kB)\n",
256
+ "Building wheels for collected packages: antlr4-python3-runtime, insightface, ffmpy\n",
257
+ " Building wheel for antlr4-python3-runtime (setup.py) ... \u001b[?25ldone\n",
258
+ "\u001b[?25h Created wheel for antlr4-python3-runtime: filename=antlr4_python3_runtime-4.9.3-py3-none-any.whl size=144552 sha256=e9ae543340a864dee947980bab8fc7d8fc3b8a5a04b28963252f790444a5cd1f\n",
259
+ " Stored in directory: /home/jupyter/.cache/pip/wheels/12/93/dd/1f6a127edc45659556564c5730f6d4e300888f4bca2d4c5a88\n",
260
+ " Building wheel for insightface (pyproject.toml) ... \u001b[?25ldone\n",
261
+ "\u001b[?25h Created wheel for insightface: filename=insightface-0.7.3-cp310-cp310-linux_x86_64.whl size=874168 sha256=f381a87957a87ca37e1795c8ba2b854664428cf5117760cc1f7d58368918523b\n",
262
+ " Stored in directory: /home/jupyter/.cache/pip/wheels/e3/d0/80/e3773fb8b6d1cca87ea1d33d9b1f20a223a6493c896da249b5\n",
263
+ " Building wheel for ffmpy (setup.py) ... \u001b[?25ldone\n",
264
+ "\u001b[?25h Created wheel for ffmpy: filename=ffmpy-0.3.2-py3-none-any.whl size=5581 sha256=d671b217ecfc883cea0aa0408a98e3d187bd0e888ba4e85318ea4b8bfa539786\n",
265
+ " Stored in directory: /home/jupyter/.cache/pip/wheels/bd/65/9a/671fc6dcde07d4418df0c592f8df512b26d7a0029c2a23dd81\n",
266
+ "Successfully built antlr4-python3-runtime insightface ffmpy\n",
267
+ "Installing collected packages: pydub, flatbuffers, ffmpy, easydict, antlr4-python3-runtime, websockets, urllib3, tomlkit, semantic-version, safetensors, ruff, regex, pydantic-core, opencv-python-headless, opencv-python, onnx, omegaconf, humanfriendly, eval-type-backport, einops, annotated-types, pydantic, coloredlogs, albucore, onnxruntime-gpu, huggingface-hub, albumentations, tokenizers, insightface, gradio-client, gdown, diffusers, altair, transformers, gradio, spaces, timm, accelerate, peft, controlnet_aux\n",
268
+ " Attempting uninstall: websockets\n",
269
+ " Found existing installation: websockets 12.0\n",
270
+ " Uninstalling websockets-12.0:\n",
271
+ " Successfully uninstalled websockets-12.0\n",
272
+ " Attempting uninstall: urllib3\n",
273
+ " Found existing installation: urllib3 1.26.19\n",
274
+ " Uninstalling urllib3-1.26.19:\n",
275
+ " Successfully uninstalled urllib3-1.26.19\n",
276
+ " Attempting uninstall: pydantic\n",
277
+ " Found existing installation: pydantic 1.10.17\n",
278
+ " Uninstalling pydantic-1.10.17:\n",
279
+ " Successfully uninstalled pydantic-1.10.17\n",
280
+ "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
281
+ "dataproc-jupyter-plugin 0.1.79 requires pydantic~=1.10.0, but you have pydantic 2.8.2 which is incompatible.\n",
282
+ "kfp 2.5.0 requires urllib3<2.0.0, but you have urllib3 2.2.2 which is incompatible.\n",
283
+ "ydata-profiling 4.6.0 requires pydantic<2,>=1.8.1, but you have pydantic 2.8.2 which is incompatible.\u001b[0m\u001b[31m\n",
284
+ "\u001b[0mSuccessfully installed accelerate-0.32.1 albucore-0.0.12 albumentations-1.4.11 altair-5.3.0 annotated-types-0.7.0 antlr4-python3-runtime-4.9.3 coloredlogs-15.0.1 controlnet_aux-0.0.9 diffusers-0.25.1 easydict-1.13 einops-0.8.0 eval-type-backport-0.2.0 ffmpy-0.3.2 flatbuffers-24.3.25 gdown-5.2.0 gradio-4.38.1 gradio-client-1.1.0 huggingface-hub-0.20.2 humanfriendly-10.0 insightface-0.7.3 omegaconf-2.3.0 onnx-1.16.1 onnxruntime-gpu-1.18.1 opencv-python-4.10.0.84 opencv-python-headless-4.10.0.84 peft-0.11.1 pydantic-2.8.2 pydantic-core-2.20.1 pydub-0.25.1 regex-2024.5.15 ruff-0.5.4 safetensors-0.4.3 semantic-version-2.10.0 spaces-0.19.4 timm-0.6.7 tokenizers-0.15.2 tomlkit-0.12.0 transformers-4.37.1 urllib3-2.2.2 websockets-11.0.3\n"
285
+ ]
286
+ }
287
+ ],
288
+ "source": [
289
+ "!pip install -r gradio_demo/requirements.txt"
290
+ ]
291
+ },
292
+ {
293
+ "cell_type": "code",
294
+ "execution_count": 3,
295
+ "id": "dec146ca-0832-4c71-8b31-1586af435d67",
296
+ "metadata": {
297
+ "tags": []
298
+ },
299
+ "outputs": [
300
+ {
301
+ "name": "stdout",
302
+ "output_type": "stream",
303
+ "text": [
304
+ "ControlNetModel/config.json: 100%|█████████| 1.38k/1.38k [00:00<00:00, 7.07MB/s]\n",
305
+ "diffusion_pytorch_model.safetensors: 100%|██| 2.50G/2.50G [00:05<00:00, 449MB/s]\n",
306
+ "ip-adapter.bin: 100%|███████████████████████| 1.69G/1.69G [00:09<00:00, 180MB/s]\n",
307
+ "pytorch_lora_weights.safetensors: 100%|███████| 394M/394M [00:02<00:00, 169MB/s]\n",
308
+ "Downloading...\n",
309
+ "From (original): https://drive.google.com/uc?id=18wEUfMNohBJ4K3Ly5wpTejPfDzp-8fI8\n",
310
+ "From (redirected): https://drive.google.com/uc?id=18wEUfMNohBJ4K3Ly5wpTejPfDzp-8fI8&confirm=t&uuid=abca1ed1-5c28-423b-a1c7-4fe1fa0d4dbc\n",
311
+ "To: /home/jupyter/InstantID/models/antelopev2.zip\n",
312
+ "100%|████████████████████████████████████████| 361M/361M [00:21<00:00, 16.9MB/s]\n",
313
+ "Archive: ./models/antelopev2.zip\n",
314
+ " creating: ./models/antelopev2/\n",
315
+ " inflating: ./models/antelopev2/genderage.onnx \n",
316
+ " inflating: ./models/antelopev2/2d106det.onnx \n",
317
+ " inflating: ./models/antelopev2/1k3d68.onnx \n",
318
+ " inflating: ./models/antelopev2/glintr100.onnx \n",
319
+ " inflating: ./models/antelopev2/scrfd_10g_bnkps.onnx \n"
320
+ ]
321
+ }
322
+ ],
323
+ "source": [
324
+ "!python gradio_demo/download_models.py"
325
+ ]
326
+ },
327
+ {
328
+ "cell_type": "code",
329
+ "execution_count": 4,
330
+ "id": "e4900619-4519-4ec9-bb32-a620128d1727",
331
+ "metadata": {
332
+ "tags": []
333
+ },
334
+ "outputs": [
335
+ {
336
+ "name": "stdout",
337
+ "output_type": "stream",
338
+ "text": [
339
+ "Collecting setuptools==69.5.1\n",
340
+ " Downloading setuptools-69.5.1-py3-none-any.whl.metadata (6.2 kB)\n",
341
+ "Downloading setuptools-69.5.1-py3-none-any.whl (894 kB)\n",
342
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m894.6/894.6 kB\u001b[0m \u001b[31m7.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n",
343
+ "\u001b[?25hInstalling collected packages: setuptools\n",
344
+ " Attempting uninstall: setuptools\n",
345
+ " Found existing installation: setuptools 70.1.1\n",
346
+ " Uninstalling setuptools-70.1.1:\n",
347
+ " Successfully uninstalled setuptools-70.1.1\n",
348
+ "Successfully installed setuptools-69.5.1\n"
349
+ ]
350
+ }
351
+ ],
352
+ "source": [
353
+ "!pip install setuptools==69.5.1"
354
+ ]
355
+ },
356
+ {
357
+ "cell_type": "code",
358
+ "execution_count": 6,
359
+ "id": "9584f180-48a2-46bc-968d-9c99bc56f06c",
360
+ "metadata": {
361
+ "tags": []
362
+ },
363
+ "outputs": [
364
+ {
365
+ "name": "stdout",
366
+ "output_type": "stream",
367
+ "text": [
368
+ "Collecting huggingface-hub==0.23.4\n",
369
+ " Downloading huggingface_hub-0.23.4-py3-none-any.whl.metadata (12 kB)\n",
370
+ "Requirement already satisfied: filelock in /opt/conda/lib/python3.10/site-packages (from huggingface-hub==0.23.4) (3.15.4)\n",
371
+ "Requirement already satisfied: fsspec>=2023.5.0 in /opt/conda/lib/python3.10/site-packages (from huggingface-hub==0.23.4) (2024.6.1)\n",
372
+ "Requirement already satisfied: packaging>=20.9 in /opt/conda/lib/python3.10/site-packages (from huggingface-hub==0.23.4) (24.1)\n",
373
+ "Requirement already satisfied: pyyaml>=5.1 in /opt/conda/lib/python3.10/site-packages (from huggingface-hub==0.23.4) (6.0.1)\n",
374
+ "Requirement already satisfied: requests in /opt/conda/lib/python3.10/site-packages (from huggingface-hub==0.23.4) (2.32.3)\n",
375
+ "Requirement already satisfied: tqdm>=4.42.1 in /opt/conda/lib/python3.10/site-packages (from huggingface-hub==0.23.4) (4.66.4)\n",
376
+ "Requirement already satisfied: typing-extensions>=3.7.4.3 in /opt/conda/lib/python3.10/site-packages (from huggingface-hub==0.23.4) (4.12.2)\n",
377
+ "Requirement already satisfied: charset-normalizer<4,>=2 in /opt/conda/lib/python3.10/site-packages (from requests->huggingface-hub==0.23.4) (3.3.2)\n",
378
+ "Requirement already satisfied: idna<4,>=2.5 in /opt/conda/lib/python3.10/site-packages (from requests->huggingface-hub==0.23.4) (3.7)\n",
379
+ "Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/conda/lib/python3.10/site-packages (from requests->huggingface-hub==0.23.4) (2.2.2)\n",
380
+ "Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.10/site-packages (from requests->huggingface-hub==0.23.4) (2024.7.4)\n",
381
+ "Downloading huggingface_hub-0.23.4-py3-none-any.whl (402 kB)\n",
382
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m402.6/402.6 kB\u001b[0m \u001b[31m3.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
383
+ "\u001b[?25hInstalling collected packages: huggingface-hub\n",
384
+ " Attempting uninstall: huggingface-hub\n",
385
+ " Found existing installation: huggingface-hub 0.20.2\n",
386
+ " Uninstalling huggingface-hub-0.20.2:\n",
387
+ " Successfully uninstalled huggingface-hub-0.20.2\n",
388
+ "Successfully installed huggingface-hub-0.23.4\n"
389
+ ]
390
+ }
391
+ ],
392
+ "source": [
393
+ "!pip install huggingface-hub==0.23.4"
394
+ ]
395
+ },
396
+ {
397
+ "cell_type": "code",
398
+ "execution_count": null,
399
+ "id": "56683081-5e9f-4378-84df-b957c84b23ad",
400
+ "metadata": {},
401
+ "outputs": [],
402
+ "source": []
403
+ }
404
+ ],
405
+ "metadata": {
406
+ "environment": {
407
+ "kernel": "python3",
408
+ "name": ".m123",
409
+ "type": "gcloud",
410
+ "uri": "us-docker.pkg.dev/deeplearning-platform-release/gcr.io/:m123"
411
+ },
412
+ "kernelspec": {
413
+ "display_name": "Python 3 (Local)",
414
+ "language": "python",
415
+ "name": "python3"
416
+ },
417
+ "language_info": {
418
+ "codemirror_mode": {
419
+ "name": "ipython",
420
+ "version": 3
421
+ },
422
+ "file_extension": ".py",
423
+ "mimetype": "text/x-python",
424
+ "name": "python",
425
+ "nbconvert_exporter": "python",
426
+ "pygments_lexer": "ipython3",
427
+ "version": "3.10.14"
428
+ }
429
+ },
430
+ "nbformat": 4,
431
+ "nbformat_minor": 5
432
+ }
checkpoints/ControlNetModel/config.json ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "ControlNetModel",
3
+ "_diffusers_version": "0.21.2",
4
+ "_name_or_path": "/mnt/nj-aigc/usr/guiwan/workspace/diffusion_output/face_xl_ipc_v4_2_XiezhenAnimeForeigner/checkpoint-150000/ControlNetModel",
5
+ "act_fn": "silu",
6
+ "addition_embed_type": "text_time",
7
+ "addition_embed_type_num_heads": 64,
8
+ "addition_time_embed_dim": 256,
9
+ "attention_head_dim": [
10
+ 5,
11
+ 10,
12
+ 20
13
+ ],
14
+ "block_out_channels": [
15
+ 320,
16
+ 640,
17
+ 1280
18
+ ],
19
+ "class_embed_type": null,
20
+ "conditioning_channels": 3,
21
+ "conditioning_embedding_out_channels": [
22
+ 16,
23
+ 32,
24
+ 96,
25
+ 256
26
+ ],
27
+ "controlnet_conditioning_channel_order": "rgb",
28
+ "cross_attention_dim": 2048,
29
+ "down_block_types": [
30
+ "DownBlock2D",
31
+ "CrossAttnDownBlock2D",
32
+ "CrossAttnDownBlock2D"
33
+ ],
34
+ "downsample_padding": 1,
35
+ "encoder_hid_dim": null,
36
+ "encoder_hid_dim_type": null,
37
+ "flip_sin_to_cos": true,
38
+ "freq_shift": 0,
39
+ "global_pool_conditions": false,
40
+ "in_channels": 4,
41
+ "layers_per_block": 2,
42
+ "mid_block_scale_factor": 1,
43
+ "norm_eps": 1e-05,
44
+ "norm_num_groups": 32,
45
+ "num_attention_heads": null,
46
+ "num_class_embeds": null,
47
+ "only_cross_attention": false,
48
+ "projection_class_embeddings_input_dim": 2816,
49
+ "resnet_time_scale_shift": "default",
50
+ "transformer_layers_per_block": [
51
+ 1,
52
+ 2,
53
+ 10
54
+ ],
55
+ "upcast_attention": null,
56
+ "use_linear_projection": true
57
+ }
checkpoints/ControlNetModel/diffusion_pytorch_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8127be9f174101ebdafee9964d856b49b634435cf6daa396d3f593cf0bbbb05
3
+ size 2502139136
checkpoints/ip-adapter.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02b3618e36d803784166660520098089a81388e61a93ef8002aa79a5b1c546e1
3
+ size 1691134141
checkpoints/pytorch_lora_weights.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a764e6859b6e04047cd761c08ff0cee96413a8e004c9f07707530cd776b19141
3
+ size 393855224
cog.yaml ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Configuration for Cog ⚙️
2
+ # Reference: https://github.com/replicate/cog/blob/main/docs/yaml.md
3
+
4
+ build:
5
+ # set to true if your model requires a GPU
6
+ gpu: true
7
+ cuda: "12.1"
8
+
9
+ # a list of ubuntu apt packages to install
10
+ system_packages:
11
+ - "libgl1-mesa-glx"
12
+ - "libglib2.0-0"
13
+
14
+ # python version in the form '3.11' or '3.11.4'
15
+ python_version: "3.11"
16
+
17
+ # a list of packages in the format <package-name>==<version>
18
+ python_packages:
19
+ - "opencv-python==4.9.0.80"
20
+ - "transformers==4.37.0"
21
+ - "accelerate==0.26.1"
22
+ - "insightface==0.7.3"
23
+ - "diffusers==0.25.1"
24
+ - "onnxruntime==1.16.3"
25
+ - "omegaconf==2.3.0"
26
+ - "gradio==3.50.2"
27
+ - "peft==0.8.2"
28
+ - "transformers==4.37.0"
29
+ - "controlnet-aux==0.0.7"
30
+
31
+ # fix for pydantic issues in cog
32
+ # https://github.com/replicate/cog/issues/1623
33
+ - albumentations==1.4.3
34
+
35
+ # commands run after the environment is setup
36
+ run:
37
+ - curl -o /usr/local/bin/pget -L "https://github.com/replicate/pget/releases/download/v0.6.0/pget_linux_x86_64" && chmod +x /usr/local/bin/pget
38
+
39
+ # predict.py defines how predictions are run on your model
40
+ predict: "cog/predict.py:Predictor"
cog/README.md ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # InstantID Cog Model
2
+
3
+ [![Replicate](https://replicate.com/zsxkib/instant-id/badge)](https://replicate.com/zsxkib/instant-id)
4
+
5
+ ## Overview
6
+ This repository contains the implementation of [InstantID](https://github.com/InstantID/InstantID) as a [Cog](https://github.com/replicate/cog) model.
7
+
8
+ Using [Cog](https://github.com/replicate/cog) allows any users with a GPU to run the model locally easily, without the hassle of downloading weights, installing libraries, or managing CUDA versions. Everything just works.
9
+
10
+ ## Development
11
+ To push your own fork of InstantID to [Replicate](https://replicate.com), follow the [Model Pushing Guide](https://replicate.com/docs/guides/push-a-model).
12
+
13
+ ## Basic Usage
14
+ To make predictions using the model, execute the following command from the root of this project:
15
+
16
+ ```bash
17
+ cog predict \
18
+ -i image=@examples/sam_resize.png \
19
+ -i prompt="analog film photo of a man. faded film, desaturated, 35mm photo, grainy, vignette, vintage, Kodachrome, Lomography, stained, highly detailed, found footage, masterpiece, best quality" \
20
+ -i negative_prompt="nsfw" \
21
+ -i width=680 \
22
+ -i height=680 \
23
+ -i ip_adapter_scale=0.8 \
24
+ -i controlnet_conditioning_scale=0.8 \
25
+ -i num_inference_steps=30 \
26
+ -i guidance_scale=5
27
+ ```
28
+
29
+ <table>
30
+ <tr>
31
+ <td>
32
+ <p align="center">Input</p>
33
+ <img src="https://replicate.delivery/pbxt/KGy0R72cMwriR9EnCLu6hgVkQNd60mY01mDZAQqcUic9rVw4/musk_resize.jpeg" alt="Sample Input Image" width="90%"/>
34
+ </td>
35
+ <td>
36
+ <p align="center">Output</p>
37
+ <img src="https://replicate.delivery/pbxt/oGOxXELcLcpaMBeIeffwdxKZAkuzwOzzoxKadjhV8YgQWk8IB/result.jpg" alt="Sample Output Image" width="100%"/>
38
+ </td>
39
+ </tr>
40
+ </table>
41
+
42
+ ## Input Parameters
43
+
44
+ The following table provides details about each input parameter for the `predict` function:
45
+
46
+ | Parameter | Description | Default Value | Range |
47
+ | ------------------------------- | ---------------------------------- | -------------------------------------------------------------------------------------------------------------- | ----------- |
48
+ | `image` | Input image | A path to the input image file | Path string |
49
+ | `prompt` | Input prompt | "analog film photo of a man. faded film, desaturated, 35mm photo, grainy, vignette, vintage, Kodachrome, ... " | String |
50
+ | `negative_prompt` | Input Negative Prompt | (empty string) | String |
51
+ | `width` | Width of output image | 640 | 512 - 2048 |
52
+ | `height` | Height of output image | 640 | 512 - 2048 |
53
+ | `ip_adapter_scale` | Scale for IP adapter | 0.8 | 0.0 - 1.0 |
54
+ | `controlnet_conditioning_scale` | Scale for ControlNet conditioning | 0.8 | 0.0 - 1.0 |
55
+ | `num_inference_steps` | Number of denoising steps | 30 | 1 - 500 |
56
+ | `guidance_scale` | Scale for classifier-free guidance | 5 | 1 - 50 |
57
+
58
+ This table provides a quick reference to understand and modify the inputs for generating predictions using the model.
59
+
60
+
cog/predict.py ADDED
@@ -0,0 +1,756 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Prediction interface for Cog ⚙️
2
+ # https://github.com/replicate/cog/blob/main/docs/python.md
3
+
4
+ import os
5
+ import sys
6
+
7
+ sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
8
+ sys.path.append(os.path.join(os.path.dirname(__file__), "../gradio_demo"))
9
+
10
+ import cv2
11
+ import time
12
+ import torch
13
+ import mimetypes
14
+ import subprocess
15
+ import numpy as np
16
+ from typing import List
17
+ from cog import BasePredictor, Input, Path
18
+
19
+ import PIL
20
+ from PIL import Image
21
+
22
+ import diffusers
23
+ from diffusers import LCMScheduler
24
+ from diffusers.utils import load_image
25
+ from diffusers.models import ControlNetModel
26
+ from diffusers.pipelines.controlnet.multicontrolnet import MultiControlNetModel
27
+
28
+ from model_util import get_torch_device
29
+ from insightface.app import FaceAnalysis
30
+ from transformers import CLIPImageProcessor
31
+ from controlnet_util import openpose, get_depth_map, get_canny_image
32
+
33
+ from diffusers.pipelines.stable_diffusion.safety_checker import (
34
+ StableDiffusionSafetyChecker,
35
+ )
36
+ from pipeline_stable_diffusion_xl_instantid_full import (
37
+ StableDiffusionXLInstantIDPipeline,
38
+ draw_kps,
39
+ )
40
+
41
+ mimetypes.add_type("image/webp", ".webp")
42
+
43
+ # GPU global variables
44
+ DEVICE = get_torch_device()
45
+ DTYPE = torch.float16 if str(DEVICE).__contains__("cuda") else torch.float32
46
+
47
+ # for `ip-adapter`, `ControlNetModel`, and `stable-diffusion-xl-base-1.0`
48
+ CHECKPOINTS_CACHE = "./checkpoints"
49
+ CHECKPOINTS_URL = "https://weights.replicate.delivery/default/InstantID/checkpoints.tar"
50
+
51
+ # for `models/antelopev2`
52
+ MODELS_CACHE = "./models"
53
+ MODELS_URL = "https://weights.replicate.delivery/default/InstantID/models.tar"
54
+
55
+ # for the safety checker
56
+ SAFETY_CACHE = "./safety-cache"
57
+ FEATURE_EXTRACTOR = "./feature-extractor"
58
+ SAFETY_URL = "https://weights.replicate.delivery/default/playgroundai/safety-cache.tar"
59
+
60
+ SDXL_NAME_TO_PATHLIKE = {
61
+ # These are all huggingface models that we host via gcp + pget
62
+ "stable-diffusion-xl-base-1.0": {
63
+ "slug": "stabilityai/stable-diffusion-xl-base-1.0",
64
+ "url": "https://weights.replicate.delivery/default/InstantID/models--stabilityai--stable-diffusion-xl-base-1.0.tar",
65
+ "path": "checkpoints/models--stabilityai--stable-diffusion-xl-base-1.0",
66
+ },
67
+ "afrodite-xl-v2": {
68
+ "slug": "stablediffusionapi/afrodite-xl-v2",
69
+ "url": "https://weights.replicate.delivery/default/InstantID/models--stablediffusionapi--afrodite-xl-v2.tar",
70
+ "path": "checkpoints/models--stablediffusionapi--afrodite-xl-v2",
71
+ },
72
+ "albedobase-xl-20": {
73
+ "slug": "stablediffusionapi/albedobase-xl-20",
74
+ "url": "https://weights.replicate.delivery/default/InstantID/models--stablediffusionapi--albedobase-xl-20.tar",
75
+ "path": "checkpoints/models--stablediffusionapi--albedobase-xl-20",
76
+ },
77
+ "albedobase-xl-v13": {
78
+ "slug": "stablediffusionapi/albedobase-xl-v13",
79
+ "url": "https://weights.replicate.delivery/default/InstantID/models--stablediffusionapi--albedobase-xl-v13.tar",
80
+ "path": "checkpoints/models--stablediffusionapi--albedobase-xl-v13",
81
+ },
82
+ "animagine-xl-30": {
83
+ "slug": "stablediffusionapi/animagine-xl-30",
84
+ "url": "https://weights.replicate.delivery/default/InstantID/models--stablediffusionapi--animagine-xl-30.tar",
85
+ "path": "checkpoints/models--stablediffusionapi--animagine-xl-30",
86
+ },
87
+ "anime-art-diffusion-xl": {
88
+ "slug": "stablediffusionapi/anime-art-diffusion-xl",
89
+ "url": "https://weights.replicate.delivery/default/InstantID/models--stablediffusionapi--anime-art-diffusion-xl.tar",
90
+ "path": "checkpoints/models--stablediffusionapi--anime-art-diffusion-xl",
91
+ },
92
+ "anime-illust-diffusion-xl": {
93
+ "slug": "stablediffusionapi/anime-illust-diffusion-xl",
94
+ "url": "https://weights.replicate.delivery/default/InstantID/models--stablediffusionapi--anime-illust-diffusion-xl.tar",
95
+ "path": "checkpoints/models--stablediffusionapi--anime-illust-diffusion-xl",
96
+ },
97
+ "dreamshaper-xl": {
98
+ "slug": "stablediffusionapi/dreamshaper-xl",
99
+ "url": "https://weights.replicate.delivery/default/InstantID/models--stablediffusionapi--dreamshaper-xl.tar",
100
+ "path": "checkpoints/models--stablediffusionapi--dreamshaper-xl",
101
+ },
102
+ "dynavision-xl-v0610": {
103
+ "slug": "stablediffusionapi/dynavision-xl-v0610",
104
+ "url": "https://weights.replicate.delivery/default/InstantID/models--stablediffusionapi--dynavision-xl-v0610.tar",
105
+ "path": "checkpoints/models--stablediffusionapi--dynavision-xl-v0610",
106
+ },
107
+ "guofeng4-xl": {
108
+ "slug": "stablediffusionapi/guofeng4-xl",
109
+ "url": "https://weights.replicate.delivery/default/InstantID/models--stablediffusionapi--guofeng4-xl.tar",
110
+ "path": "checkpoints/models--stablediffusionapi--guofeng4-xl",
111
+ },
112
+ "juggernaut-xl-v8": {
113
+ "slug": "stablediffusionapi/juggernaut-xl-v8",
114
+ "url": "https://weights.replicate.delivery/default/InstantID/models--stablediffusionapi--juggernaut-xl-v8.tar",
115
+ "path": "checkpoints/models--stablediffusionapi--juggernaut-xl-v8",
116
+ },
117
+ "nightvision-xl-0791": {
118
+ "slug": "stablediffusionapi/nightvision-xl-0791",
119
+ "url": "https://weights.replicate.delivery/default/InstantID/models--stablediffusionapi--nightvision-xl-0791.tar",
120
+ "path": "checkpoints/models--stablediffusionapi--nightvision-xl-0791",
121
+ },
122
+ "omnigen-xl": {
123
+ "slug": "stablediffusionapi/omnigen-xl",
124
+ "url": "https://weights.replicate.delivery/default/InstantID/models--stablediffusionapi--omnigen-xl.tar",
125
+ "path": "checkpoints/models--stablediffusionapi--omnigen-xl",
126
+ },
127
+ "pony-diffusion-v6-xl": {
128
+ "slug": "stablediffusionapi/pony-diffusion-v6-xl",
129
+ "url": "https://weights.replicate.delivery/default/InstantID/models--stablediffusionapi--pony-diffusion-v6-xl.tar",
130
+ "path": "checkpoints/models--stablediffusionapi--pony-diffusion-v6-xl",
131
+ },
132
+ "protovision-xl-high-fidel": {
133
+ "slug": "stablediffusionapi/protovision-xl-high-fidel",
134
+ "url": "https://weights.replicate.delivery/default/InstantID/models--stablediffusionapi--protovision-xl-high-fidel.tar",
135
+ "path": "checkpoints/models--stablediffusionapi--protovision-xl-high-fidel",
136
+ },
137
+ "RealVisXL_V3.0_Turbo": {
138
+ "slug": "SG161222/RealVisXL_V3.0_Turbo",
139
+ "url": "https://weights.replicate.delivery/default/InstantID/models--SG161222--RealVisXL_V3.0_Turbo.tar",
140
+ "path": "checkpoints/models--SG161222--RealVisXL_V3.0_Turbo",
141
+ },
142
+ "RealVisXL_V4.0_Lightning": {
143
+ "slug": "SG161222/RealVisXL_V4.0_Lightning",
144
+ "url": "https://weights.replicate.delivery/default/InstantID/models--SG161222--RealVisXL_V4.0_Lightning.tar",
145
+ "path": "checkpoints/models--SG161222--RealVisXL_V4.0_Lightning",
146
+ },
147
+ }
148
+
149
+
150
+ def convert_from_cv2_to_image(img: np.ndarray) -> Image:
151
+ return Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
152
+
153
+
154
+ def convert_from_image_to_cv2(img: Image) -> np.ndarray:
155
+ return cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
156
+
157
+
158
+ def resize_img(
159
+ input_image,
160
+ max_side=1280,
161
+ min_side=1024,
162
+ size=None,
163
+ pad_to_max_side=False,
164
+ mode=PIL.Image.BILINEAR,
165
+ base_pixel_number=64,
166
+ ):
167
+ w, h = input_image.size
168
+ if size is not None:
169
+ w_resize_new, h_resize_new = size
170
+ else:
171
+ ratio = min_side / min(h, w)
172
+ w, h = round(ratio * w), round(ratio * h)
173
+ ratio = max_side / max(h, w)
174
+ input_image = input_image.resize([round(ratio * w), round(ratio * h)], mode)
175
+ w_resize_new = (round(ratio * w) // base_pixel_number) * base_pixel_number
176
+ h_resize_new = (round(ratio * h) // base_pixel_number) * base_pixel_number
177
+ input_image = input_image.resize([w_resize_new, h_resize_new], mode)
178
+
179
+ if pad_to_max_side:
180
+ res = np.ones([max_side, max_side, 3], dtype=np.uint8) * 255
181
+ offset_x = (max_side - w_resize_new) // 2
182
+ offset_y = (max_side - h_resize_new) // 2
183
+ res[offset_y : offset_y + h_resize_new, offset_x : offset_x + w_resize_new] = (
184
+ np.array(input_image)
185
+ )
186
+ input_image = Image.fromarray(res)
187
+ return input_image
188
+
189
+
190
+ def download_weights(url, dest):
191
+ start = time.time()
192
+ print("[!] Initiating download from URL: ", url)
193
+ print("[~] Destination path: ", dest)
194
+ command = ["pget", "-vf", url, dest]
195
+ if ".tar" in url:
196
+ command.append("-x")
197
+ try:
198
+ subprocess.check_call(command, close_fds=False)
199
+ except subprocess.CalledProcessError as e:
200
+ print(
201
+ f"[ERROR] Failed to download weights. Command '{' '.join(e.cmd)}' returned non-zero exit status {e.returncode}."
202
+ )
203
+ raise
204
+ print("[+] Download completed in: ", time.time() - start, "seconds")
205
+
206
+
207
+ class Predictor(BasePredictor):
208
+ def setup(self) -> None:
209
+ """Load the model into memory to make running multiple predictions efficient"""
210
+
211
+ if not os.path.exists(CHECKPOINTS_CACHE):
212
+ download_weights(CHECKPOINTS_URL, CHECKPOINTS_CACHE)
213
+
214
+ if not os.path.exists(MODELS_CACHE):
215
+ download_weights(MODELS_URL, MODELS_CACHE)
216
+
217
+ self.face_detection_input_width, self.face_detection_input_height = 640, 640
218
+ self.app = FaceAnalysis(
219
+ name="antelopev2",
220
+ root="./",
221
+ providers=["CUDAExecutionProvider", "CPUExecutionProvider"],
222
+ )
223
+ self.app.prepare(ctx_id=0, det_size=(self.face_detection_input_width, self.face_detection_input_height))
224
+
225
+ # Path to InstantID models
226
+ self.face_adapter = f"./checkpoints/ip-adapter.bin"
227
+ controlnet_path = f"./checkpoints/ControlNetModel"
228
+
229
+ # Load pipeline face ControlNetModel
230
+ self.controlnet_identitynet = ControlNetModel.from_pretrained(
231
+ controlnet_path,
232
+ torch_dtype=DTYPE,
233
+ cache_dir=CHECKPOINTS_CACHE,
234
+ local_files_only=True,
235
+ )
236
+ self.setup_extra_controlnets()
237
+
238
+ self.load_weights("stable-diffusion-xl-base-1.0")
239
+ self.setup_safety_checker()
240
+
241
+ def setup_safety_checker(self):
242
+ print(f"[~] Seting up safety checker")
243
+
244
+ if not os.path.exists(SAFETY_CACHE):
245
+ download_weights(SAFETY_URL, SAFETY_CACHE)
246
+
247
+ self.safety_checker = StableDiffusionSafetyChecker.from_pretrained(
248
+ SAFETY_CACHE,
249
+ torch_dtype=DTYPE,
250
+ local_files_only=True,
251
+ )
252
+ self.safety_checker.to(DEVICE)
253
+ self.feature_extractor = CLIPImageProcessor.from_pretrained(FEATURE_EXTRACTOR)
254
+
255
+ def run_safety_checker(self, image):
256
+ safety_checker_input = self.feature_extractor(image, return_tensors="pt").to(
257
+ DEVICE
258
+ )
259
+ np_image = np.array(image)
260
+ image, has_nsfw_concept = self.safety_checker(
261
+ images=[np_image],
262
+ clip_input=safety_checker_input.pixel_values.to(DTYPE),
263
+ )
264
+ return image, has_nsfw_concept
265
+
266
+ def load_weights(self, sdxl_weights):
267
+ self.base_weights = sdxl_weights
268
+ weights_info = SDXL_NAME_TO_PATHLIKE[self.base_weights]
269
+
270
+ download_url = weights_info["url"]
271
+ path_to_weights_dir = weights_info["path"]
272
+ if not os.path.exists(path_to_weights_dir):
273
+ download_weights(download_url, path_to_weights_dir)
274
+
275
+ is_hugging_face_model = "slug" in weights_info.keys()
276
+ path_to_weights_file = os.path.join(
277
+ path_to_weights_dir,
278
+ weights_info.get("file", ""),
279
+ )
280
+
281
+ print(f"[~] Loading new SDXL weights: {path_to_weights_file}")
282
+ if is_hugging_face_model:
283
+ self.pipe = StableDiffusionXLInstantIDPipeline.from_pretrained(
284
+ weights_info["slug"],
285
+ controlnet=[self.controlnet_identitynet],
286
+ torch_dtype=DTYPE,
287
+ cache_dir=CHECKPOINTS_CACHE,
288
+ local_files_only=True,
289
+ safety_checker=None,
290
+ feature_extractor=None,
291
+ )
292
+ self.pipe.scheduler = diffusers.EulerDiscreteScheduler.from_config(
293
+ self.pipe.scheduler.config
294
+ )
295
+ else: # e.g. .safetensors, NOTE: This functionality is not being used right now
296
+ self.pipe.from_single_file(
297
+ path_to_weights_file,
298
+ controlnet=self.controlnet_identitynet,
299
+ torch_dtype=DTYPE,
300
+ cache_dir=CHECKPOINTS_CACHE,
301
+ )
302
+
303
+ self.pipe.load_ip_adapter_instantid(self.face_adapter)
304
+ self.setup_lcm_lora()
305
+ self.pipe.cuda()
306
+
307
+ def setup_lcm_lora(self):
308
+ print(f"[~] Seting up LCM (just in case)")
309
+
310
+ lcm_lora_key = "models--latent-consistency--lcm-lora-sdxl"
311
+ lcm_lora_path = f"checkpoints/{lcm_lora_key}"
312
+ if not os.path.exists(lcm_lora_path):
313
+ download_weights(
314
+ f"https://weights.replicate.delivery/default/InstantID/{lcm_lora_key}.tar",
315
+ lcm_lora_path,
316
+ )
317
+ self.pipe.load_lora_weights(
318
+ "latent-consistency/lcm-lora-sdxl",
319
+ cache_dir=CHECKPOINTS_CACHE,
320
+ local_files_only=True,
321
+ weight_name="pytorch_lora_weights.safetensors",
322
+ )
323
+ self.pipe.disable_lora()
324
+
325
+ def setup_extra_controlnets(self):
326
+ print(f"[~] Seting up pose, canny, depth ControlNets")
327
+
328
+ controlnet_pose_model = "thibaud/controlnet-openpose-sdxl-1.0"
329
+ controlnet_canny_model = "diffusers/controlnet-canny-sdxl-1.0"
330
+ controlnet_depth_model = "diffusers/controlnet-depth-sdxl-1.0-small"
331
+
332
+ for controlnet_key in [
333
+ "models--diffusers--controlnet-canny-sdxl-1.0",
334
+ "models--diffusers--controlnet-depth-sdxl-1.0-small",
335
+ "models--thibaud--controlnet-openpose-sdxl-1.0",
336
+ ]:
337
+ controlnet_path = f"checkpoints/{controlnet_key}"
338
+ if not os.path.exists(controlnet_path):
339
+ download_weights(
340
+ f"https://weights.replicate.delivery/default/InstantID/{controlnet_key}.tar",
341
+ controlnet_path,
342
+ )
343
+
344
+ controlnet_pose = ControlNetModel.from_pretrained(
345
+ controlnet_pose_model,
346
+ torch_dtype=DTYPE,
347
+ cache_dir=CHECKPOINTS_CACHE,
348
+ local_files_only=True,
349
+ ).to(DEVICE)
350
+ controlnet_canny = ControlNetModel.from_pretrained(
351
+ controlnet_canny_model,
352
+ torch_dtype=DTYPE,
353
+ cache_dir=CHECKPOINTS_CACHE,
354
+ local_files_only=True,
355
+ ).to(DEVICE)
356
+ controlnet_depth = ControlNetModel.from_pretrained(
357
+ controlnet_depth_model,
358
+ torch_dtype=DTYPE,
359
+ cache_dir=CHECKPOINTS_CACHE,
360
+ local_files_only=True,
361
+ ).to(DEVICE)
362
+
363
+ self.controlnet_map = {
364
+ "pose": controlnet_pose,
365
+ "canny": controlnet_canny,
366
+ "depth": controlnet_depth,
367
+ }
368
+ self.controlnet_map_fn = {
369
+ "pose": openpose,
370
+ "canny": get_canny_image,
371
+ "depth": get_depth_map,
372
+ }
373
+
374
+ def generate_image(
375
+ self,
376
+ face_image_path,
377
+ pose_image_path,
378
+ prompt,
379
+ negative_prompt,
380
+ num_steps,
381
+ identitynet_strength_ratio,
382
+ adapter_strength_ratio,
383
+ pose_strength,
384
+ canny_strength,
385
+ depth_strength,
386
+ controlnet_selection,
387
+ guidance_scale,
388
+ seed,
389
+ scheduler,
390
+ enable_LCM,
391
+ enhance_face_region,
392
+ num_images_per_prompt,
393
+ ):
394
+ if enable_LCM:
395
+ self.pipe.enable_lora()
396
+ self.pipe.scheduler = LCMScheduler.from_config(self.pipe.scheduler.config)
397
+ else:
398
+ self.pipe.disable_lora()
399
+ scheduler_class_name = scheduler.split("-")[0]
400
+
401
+ add_kwargs = {}
402
+ if len(scheduler.split("-")) > 1:
403
+ add_kwargs["use_karras_sigmas"] = True
404
+ if len(scheduler.split("-")) > 2:
405
+ add_kwargs["algorithm_type"] = "sde-dpmsolver++"
406
+ scheduler = getattr(diffusers, scheduler_class_name)
407
+ self.pipe.scheduler = scheduler.from_config(
408
+ self.pipe.scheduler.config,
409
+ **add_kwargs,
410
+ )
411
+
412
+ if face_image_path is None:
413
+ raise Exception(
414
+ f"Cannot find any input face `image`! Please upload the face `image`"
415
+ )
416
+
417
+ face_image = load_image(face_image_path)
418
+ face_image = resize_img(face_image)
419
+ face_image_cv2 = convert_from_image_to_cv2(face_image)
420
+ height, width, _ = face_image_cv2.shape
421
+
422
+ # Extract face features
423
+ face_info = self.app.get(face_image_cv2)
424
+
425
+ if len(face_info) == 0:
426
+ raise Exception(
427
+ "Face detector could not find a face in the `image`. Please use a different `image` as input."
428
+ )
429
+
430
+ face_info = sorted(
431
+ face_info,
432
+ key=lambda x: (x["bbox"][2] - x["bbox"][0]) * x["bbox"][3] - x["bbox"][1],
433
+ )[
434
+ -1
435
+ ] # only use the maximum face
436
+ face_emb = face_info["embedding"]
437
+ face_kps = draw_kps(convert_from_cv2_to_image(face_image_cv2), face_info["kps"])
438
+
439
+ img_controlnet = face_image
440
+ if pose_image_path is not None:
441
+ pose_image = load_image(pose_image_path)
442
+ pose_image = resize_img(pose_image, max_side=1024)
443
+ img_controlnet = pose_image
444
+ pose_image_cv2 = convert_from_image_to_cv2(pose_image)
445
+
446
+ face_info = self.app.get(pose_image_cv2)
447
+
448
+ if len(face_info) == 0:
449
+ raise Exception(
450
+ "Face detector could not find a face in the `pose_image`. Please use a different `pose_image` as input."
451
+ )
452
+
453
+ face_info = face_info[-1]
454
+ face_kps = draw_kps(pose_image, face_info["kps"])
455
+
456
+ width, height = face_kps.size
457
+
458
+ if enhance_face_region:
459
+ control_mask = np.zeros([height, width, 3])
460
+ x1, y1, x2, y2 = face_info["bbox"]
461
+ x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
462
+ control_mask[y1:y2, x1:x2] = 255
463
+ control_mask = Image.fromarray(control_mask.astype(np.uint8))
464
+ else:
465
+ control_mask = None
466
+
467
+ if len(controlnet_selection) > 0:
468
+ controlnet_scales = {
469
+ "pose": pose_strength,
470
+ "canny": canny_strength,
471
+ "depth": depth_strength,
472
+ }
473
+ self.pipe.controlnet = MultiControlNetModel(
474
+ [self.controlnet_identitynet]
475
+ + [self.controlnet_map[s] for s in controlnet_selection]
476
+ )
477
+ control_scales = [float(identitynet_strength_ratio)] + [
478
+ controlnet_scales[s] for s in controlnet_selection
479
+ ]
480
+ control_images = [face_kps] + [
481
+ self.controlnet_map_fn[s](img_controlnet).resize((width, height))
482
+ for s in controlnet_selection
483
+ ]
484
+ else:
485
+ self.pipe.controlnet = self.controlnet_identitynet
486
+ control_scales = float(identitynet_strength_ratio)
487
+ control_images = face_kps
488
+
489
+ generator = torch.Generator(device=DEVICE).manual_seed(seed)
490
+
491
+ print("Start inference...")
492
+ print(f"[Debug] Prompt: {prompt}, \n[Debug] Neg Prompt: {negative_prompt}")
493
+
494
+ self.pipe.set_ip_adapter_scale(adapter_strength_ratio)
495
+ images = self.pipe(
496
+ prompt=prompt,
497
+ negative_prompt=negative_prompt,
498
+ image_embeds=face_emb,
499
+ image=control_images,
500
+ control_mask=control_mask,
501
+ controlnet_conditioning_scale=control_scales,
502
+ num_inference_steps=num_steps,
503
+ guidance_scale=guidance_scale,
504
+ height=height,
505
+ width=width,
506
+ generator=generator,
507
+ num_images_per_prompt=num_images_per_prompt,
508
+ ).images
509
+
510
+ return images
511
+
512
+ def predict(
513
+ self,
514
+ image: Path = Input(
515
+ description="Input face image",
516
+ ),
517
+ pose_image: Path = Input(
518
+ description="(Optional) reference pose image",
519
+ default=None,
520
+ ),
521
+ prompt: str = Input(
522
+ description="Input prompt",
523
+ default="a person",
524
+ ),
525
+ negative_prompt: str = Input(
526
+ description="Input Negative Prompt",
527
+ default="",
528
+ ),
529
+ sdxl_weights: str = Input(
530
+ description="Pick which base weights you want to use",
531
+ default="stable-diffusion-xl-base-1.0",
532
+ choices=[
533
+ "stable-diffusion-xl-base-1.0",
534
+ "juggernaut-xl-v8",
535
+ "afrodite-xl-v2",
536
+ "albedobase-xl-20",
537
+ "albedobase-xl-v13",
538
+ "animagine-xl-30",
539
+ "anime-art-diffusion-xl",
540
+ "anime-illust-diffusion-xl",
541
+ "dreamshaper-xl",
542
+ "dynavision-xl-v0610",
543
+ "guofeng4-xl",
544
+ "nightvision-xl-0791",
545
+ "omnigen-xl",
546
+ "pony-diffusion-v6-xl",
547
+ "protovision-xl-high-fidel",
548
+ "RealVisXL_V3.0_Turbo",
549
+ "RealVisXL_V4.0_Lightning",
550
+ ],
551
+ ),
552
+ face_detection_input_width: int = Input(
553
+ description="Width of the input image for face detection",
554
+ default=640,
555
+ ge=640,
556
+ le=4096,
557
+ ),
558
+ face_detection_input_height: int = Input(
559
+ description="Height of the input image for face detection",
560
+ default=640,
561
+ ge=640,
562
+ le=4096,
563
+ ),
564
+ scheduler: str = Input(
565
+ description="Scheduler",
566
+ choices=[
567
+ "DEISMultistepScheduler",
568
+ "HeunDiscreteScheduler",
569
+ "EulerDiscreteScheduler",
570
+ "DPMSolverMultistepScheduler",
571
+ "DPMSolverMultistepScheduler-Karras",
572
+ "DPMSolverMultistepScheduler-Karras-SDE",
573
+ ],
574
+ default="EulerDiscreteScheduler",
575
+ ),
576
+ num_inference_steps: int = Input(
577
+ description="Number of denoising steps",
578
+ default=30,
579
+ ge=1,
580
+ le=500,
581
+ ),
582
+ guidance_scale: float = Input(
583
+ description="Scale for classifier-free guidance",
584
+ default=7.5,
585
+ ge=1,
586
+ le=50,
587
+ ),
588
+ ip_adapter_scale: float = Input(
589
+ description="Scale for image adapter strength (for detail)", # adapter_strength_ratio
590
+ default=0.8,
591
+ ge=0,
592
+ le=1.5,
593
+ ),
594
+ controlnet_conditioning_scale: float = Input(
595
+ description="Scale for IdentityNet strength (for fidelity)", # identitynet_strength_ratio
596
+ default=0.8,
597
+ ge=0,
598
+ le=1.5,
599
+ ),
600
+ enable_pose_controlnet: bool = Input(
601
+ description="Enable Openpose ControlNet, overrides strength if set to false",
602
+ default=True,
603
+ ),
604
+ pose_strength: float = Input(
605
+ description="Openpose ControlNet strength, effective only if `enable_pose_controlnet` is true",
606
+ default=0.4,
607
+ ge=0,
608
+ le=1,
609
+ ),
610
+ enable_canny_controlnet: bool = Input(
611
+ description="Enable Canny ControlNet, overrides strength if set to false",
612
+ default=False,
613
+ ),
614
+ canny_strength: float = Input(
615
+ description="Canny ControlNet strength, effective only if `enable_canny_controlnet` is true",
616
+ default=0.3,
617
+ ge=0,
618
+ le=1,
619
+ ),
620
+ enable_depth_controlnet: bool = Input(
621
+ description="Enable Depth ControlNet, overrides strength if set to false",
622
+ default=False,
623
+ ),
624
+ depth_strength: float = Input(
625
+ description="Depth ControlNet strength, effective only if `enable_depth_controlnet` is true",
626
+ default=0.5,
627
+ ge=0,
628
+ le=1,
629
+ ),
630
+ enable_lcm: bool = Input(
631
+ description="Enable Fast Inference with LCM (Latent Consistency Models) - speeds up inference steps, trade-off is the quality of the generated image. Performs better with close-up portrait face images",
632
+ default=False,
633
+ ),
634
+ lcm_num_inference_steps: int = Input(
635
+ description="Only used when `enable_lcm` is set to True, Number of denoising steps when using LCM",
636
+ default=5,
637
+ ge=1,
638
+ le=10,
639
+ ),
640
+ lcm_guidance_scale: float = Input(
641
+ description="Only used when `enable_lcm` is set to True, Scale for classifier-free guidance when using LCM",
642
+ default=1.5,
643
+ ge=1,
644
+ le=20,
645
+ ),
646
+ enhance_nonface_region: bool = Input(
647
+ description="Enhance non-face region", default=True
648
+ ),
649
+ output_format: str = Input(
650
+ description="Format of the output images",
651
+ choices=["webp", "jpg", "png"],
652
+ default="webp",
653
+ ),
654
+ output_quality: int = Input(
655
+ description="Quality of the output images, from 0 to 100. 100 is best quality, 0 is lowest quality.",
656
+ default=80,
657
+ ge=0,
658
+ le=100,
659
+ ),
660
+ seed: int = Input(
661
+ description="Random seed. Leave blank to randomize the seed",
662
+ default=None,
663
+ ),
664
+ num_outputs: int = Input(
665
+ description="Number of images to output",
666
+ default=1,
667
+ ge=1,
668
+ le=8,
669
+ ),
670
+ disable_safety_checker: bool = Input(
671
+ description="Disable safety checker for generated images",
672
+ default=False,
673
+ ),
674
+ ) -> List[Path]:
675
+ """Run a single prediction on the model"""
676
+
677
+ # If no seed is provided, generate a random seed
678
+ if seed is None:
679
+ seed = int.from_bytes(os.urandom(2), "big")
680
+ print(f"Using seed: {seed}")
681
+
682
+ # Load the weights if they are different from the base weights
683
+ if sdxl_weights != self.base_weights:
684
+ self.load_weights(sdxl_weights)
685
+
686
+ # Resize the output if the provided dimensions are different from the current ones
687
+ if self.face_detection_input_width != face_detection_input_width or self.face_detection_input_height != face_detection_input_height:
688
+ print(f"[!] Resizing output to {face_detection_input_width}x{face_detection_input_height}")
689
+ self.face_detection_input_width = face_detection_input_width
690
+ self.face_detection_input_height = face_detection_input_height
691
+ self.app.prepare(ctx_id=0, det_size=(self.face_detection_input_width, self.face_detection_input_height))
692
+
693
+ # Set up ControlNet selection and their respective strength values (if any)
694
+ controlnet_selection = []
695
+ if pose_strength > 0 and enable_pose_controlnet:
696
+ controlnet_selection.append("pose")
697
+ if canny_strength > 0 and enable_canny_controlnet:
698
+ controlnet_selection.append("canny")
699
+ if depth_strength > 0 and enable_depth_controlnet:
700
+ controlnet_selection.append("depth")
701
+
702
+ # Switch to LCM inference steps and guidance scale if LCM is enabled
703
+ if enable_lcm:
704
+ num_inference_steps = lcm_num_inference_steps
705
+ guidance_scale = lcm_guidance_scale
706
+
707
+ # Generate
708
+ images = self.generate_image(
709
+ face_image_path=str(image),
710
+ pose_image_path=str(pose_image) if pose_image else None,
711
+ prompt=prompt,
712
+ negative_prompt=negative_prompt,
713
+ num_steps=num_inference_steps,
714
+ identitynet_strength_ratio=controlnet_conditioning_scale,
715
+ adapter_strength_ratio=ip_adapter_scale,
716
+ pose_strength=pose_strength,
717
+ canny_strength=canny_strength,
718
+ depth_strength=depth_strength,
719
+ controlnet_selection=controlnet_selection,
720
+ scheduler=scheduler,
721
+ guidance_scale=guidance_scale,
722
+ seed=seed,
723
+ enable_LCM=enable_lcm,
724
+ enhance_face_region=enhance_nonface_region,
725
+ num_images_per_prompt=num_outputs,
726
+ )
727
+
728
+ # Save the generated images and check for NSFW content
729
+ output_paths = []
730
+ for i, output_image in enumerate(images):
731
+ if not disable_safety_checker:
732
+ _, has_nsfw_content_list = self.run_safety_checker(output_image)
733
+ has_nsfw_content = any(has_nsfw_content_list)
734
+ print(f"NSFW content detected: {has_nsfw_content}")
735
+ if has_nsfw_content:
736
+ raise Exception(
737
+ "NSFW content detected. Try running it again, or try a different prompt."
738
+ )
739
+
740
+ extension = output_format.lower()
741
+ extension = "jpeg" if extension == "jpg" else extension
742
+ output_path = f"/tmp/out_{i}.{extension}"
743
+
744
+ print(f"[~] Saving to {output_path}...")
745
+ print(f"[~] Output format: {extension.upper()}")
746
+ if output_format != "png":
747
+ print(f"[~] Output quality: {output_quality}")
748
+
749
+ save_params = {"format": extension.upper()}
750
+ if output_format != "png":
751
+ save_params["quality"] = output_quality
752
+ save_params["optimize"] = True
753
+
754
+ output_image.save(output_path, **save_params)
755
+ output_paths.append(Path(output_path))
756
+ return output_paths
feature-extractor/preprocessor_config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "crop_size": {
3
+ "height": 224,
4
+ "width": 224
5
+ },
6
+ "do_center_crop": true,
7
+ "do_convert_rgb": true,
8
+ "do_normalize": true,
9
+ "do_rescale": true,
10
+ "do_resize": true,
11
+ "image_mean": [
12
+ 0.48145466,
13
+ 0.4578275,
14
+ 0.40821073
15
+ ],
16
+ "image_processor_type": "CLIPImageProcessor",
17
+ "image_std": [
18
+ 0.26862954,
19
+ 0.26130258,
20
+ 0.27577711
21
+ ],
22
+ "resample": 3,
23
+ "rescale_factor": 0.00392156862745098,
24
+ "size": {
25
+ "shortest_edge": 224
26
+ }
27
+ }
generated_images/20240723_053704_668578_0.png ADDED

Git LFS Details

  • SHA256: 575f77734f6e2ebe3f9d4f322d9b3855e126692c94c1e7bf4e0390f5a07c510b
  • Pointer size: 132 Bytes
  • Size of remote file: 1.75 MB
generated_images/20240723_053801_148984_0.png ADDED

Git LFS Details

  • SHA256: c005cc75b52ec5cfa74932ae0c510421e3913baae36edb2962eb04354834c118
  • Pointer size: 132 Bytes
  • Size of remote file: 1.66 MB
generated_images/20240723_053853_022841_0.png ADDED

Git LFS Details

  • SHA256: 427f133f008a53a7d4e8edf2c1b0f59cb7096c429471f7dd940aad9a2bfa3c84
  • Pointer size: 132 Bytes
  • Size of remote file: 2.47 MB
generated_images/20240723_053948_468290_0.png ADDED

Git LFS Details

  • SHA256: dbb8451142141a23c751f9c4a3efaa57c8f5bbda1048395db512cd68530efb24
  • Pointer size: 132 Bytes
  • Size of remote file: 1.52 MB
generated_images/20240723_054025_692605_0.png ADDED

Git LFS Details

  • SHA256: 51e93ef112561ae84c08b3eab6dc8c2ce2d1d20be2762d3d266d5ee6a4a231cc
  • Pointer size: 132 Bytes
  • Size of remote file: 1.02 MB
generated_images/20240723_054124_697176_0.png ADDED

Git LFS Details

  • SHA256: 774cd2a486c071fa2083ca4589d353c6374c659b1e3345d6fc7458bb052257da
  • Pointer size: 132 Bytes
  • Size of remote file: 1.66 MB
generation_log.csv ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ image_name,new_file_name,identitynet_strength_ratio,adapter_strength_ratio,num_inference_steps,guidance_scale,seed,success,error_message,style_name,prompt,negative_prompt,time_taken,current_timestamp
2
+ musk_resize.jpeg,20240723_053704_668578_0.png,1.1491785966677859,0.8654292835406997,50,10.881974934041711,4170043132,True,,(No style),"human, sharp focus","(blurry, blur, text, abstract, glitch, lowres, low quality, worst quality:1.2), (text:1.2), watermark, painting, drawing, illustration, glitch, deformed, mutated, cross-eyed, ugly, disfigured",53.160874,2024-07-23 05:37:05
3
+ sam_resize.png,20240723_053801_148984_0.png,1.0277924316289087,0.9683019180411349,53,11.111615489229361,1039000092,True,,(No style),"human, sharp focus","(blurry, blur, text, abstract, glitch, lowres, low quality, worst quality:1.2), (text:1.2), watermark, painting, drawing, illustration, glitch, deformed, mutated, cross-eyed, ugly, disfigured",56.287061,2024-07-23 05:38:01
4
+ schmidhuber_resize.png,20240723_053853_022841_0.png,1.4917970061395218,0.7393876001187043,48,11.679426057392323,3752244045,True,,(No style),"human, sharp focus","(blurry, blur, text, abstract, glitch, lowres, low quality, worst quality:1.2), (text:1.2), watermark, painting, drawing, illustration, glitch, deformed, mutated, cross-eyed, ugly, disfigured",51.661633,2024-07-23 05:38:53
5
+ kaifu_resize.png,20240723_053948_468290_0.png,1.4485948536834086,0.8122224472625851,52,9.984434112216853,2295950491,True,,(No style),"human, sharp focus","(blurry, blur, text, abstract, glitch, lowres, low quality, worst quality:1.2), (text:1.2), watermark, painting, drawing, illustration, glitch, deformed, mutated, cross-eyed, ugly, disfigured",55.366897,2024-07-23 05:39:49
6
+ pp_0.jpg,20240723_054025_692605_0.png,1.1794069160183727,0.9857350785784462,51,8.76420747179281,2648835109,True,,(No style),"human, sharp focus","(blurry, blur, text, abstract, glitch, lowres, low quality, worst quality:1.2), (text:1.2), watermark, painting, drawing, illustration, glitch, deformed, mutated, cross-eyed, ugly, disfigured",36.771416,2024-07-23 05:40:26
7
+ yann-lecun_resize.jpg,20240723_054124_697176_0.png,1.2770220875965888,0.8245108249424827,56,9.372671733967127,3933691473,True,,(No style),"human, sharp focus","(blurry, blur, text, abstract, glitch, lowres, low quality, worst quality:1.2), (text:1.2), watermark, painting, drawing, illustration, glitch, deformed, mutated, cross-eyed, ugly, disfigured",59.069521,2024-07-23 05:41:25
gradio_demo/aaa.py ADDED
@@ -0,0 +1,957 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ sys.path.append('./')
3
+
4
+ from typing import Tuple
5
+
6
+ import os
7
+ import cv2
8
+ import math
9
+ import torch
10
+ import random
11
+ import numpy as np
12
+ import argparse
13
+
14
+ import PIL
15
+ from PIL import Image
16
+
17
+ import diffusers
18
+ from diffusers.utils import load_image
19
+ from diffusers.models import ControlNetModel
20
+ from diffusers import LCMScheduler
21
+
22
+ from huggingface_hub import hf_hub_download
23
+
24
+ import insightface
25
+ from insightface.app import FaceAnalysis
26
+
27
+ from style_template import styles
28
+ from pipeline_stable_diffusion_xl_instantid_full import StableDiffusionXLInstantIDPipeline
29
+ from model_util import load_models_xl, get_torch_device, torch_gc
30
+
31
+
32
+ # global variable
33
+ MAX_SEED = np.iinfo(np.int32).max
34
+ device = get_torch_device()
35
+ dtype = torch.float16 if str(device).__contains__("cuda") else torch.float32
36
+ STYLE_NAMES = list(styles.keys())
37
+ DEFAULT_STYLE_NAME = "Watercolor"
38
+
39
+ # Load face encoder
40
+ app = FaceAnalysis(name='antelopev2', root='./', providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
41
+ app.prepare(ctx_id=0, det_size=(320, 320))
42
+
43
+ # Path to InstantID models
44
+ face_adapter = f'./checkpoints/ip-adapter.bin'
45
+ controlnet_path = f'./checkpoints/ControlNetModel'
46
+
47
+ # Load pipeline
48
+ controlnet = ControlNetModel.from_pretrained(controlnet_path, torch_dtype=dtype)
49
+
50
+ logo = Image.open("./gradio_demo/logo.png")
51
+
52
+ from cv2 import imencode
53
+ import base64
54
+
55
+ # def encode_pil_to_base64_new(pil_image):
56
+ # print("AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA")
57
+ # image_arr = np.asarray(pil_image)[:,:,::-1]
58
+ # _, byte_data = imencode('.png', image_arr)
59
+ # base64_data = base64.b64encode(byte_data)
60
+ # base64_string_opencv = base64_data.decode("utf-8")
61
+ # return "data:image/png;base64," + base64_string_opencv
62
+
63
+ import gradio as gr
64
+
65
+ # gr.processing_utils.encode_pil_to_base64 = encode_pil_to_base64_new
66
+
67
+ def main(pretrained_model_name_or_path="wangqixun/YamerMIX_v8", enable_lcm_arg=False):
68
+
69
+ if pretrained_model_name_or_path.endswith(
70
+ ".ckpt"
71
+ ) or pretrained_model_name_or_path.endswith(".safetensors"):
72
+ scheduler_kwargs = hf_hub_download(
73
+ repo_id="wangqixun/YamerMIX_v8",
74
+ subfolder="scheduler",
75
+ filename="scheduler_config.json",
76
+ )
77
+
78
+ (tokenizers, text_encoders, unet, _, vae) = load_models_xl(
79
+ pretrained_model_name_or_path=pretrained_model_name_or_path,
80
+ scheduler_name=None,
81
+ weight_dtype=dtype,
82
+ )
83
+
84
+ scheduler = diffusers.EulerDiscreteScheduler.from_config(scheduler_kwargs)
85
+ pipe = StableDiffusionXLInstantIDPipeline(
86
+ vae=vae,
87
+ text_encoder=text_encoders[0],
88
+ text_encoder_2=text_encoders[1],
89
+ tokenizer=tokenizers[0],
90
+ tokenizer_2=tokenizers[1],
91
+ unet=unet,
92
+ scheduler=scheduler,
93
+ controlnet=controlnet,
94
+ ).to(device)
95
+
96
+ else:
97
+ pipe = StableDiffusionXLInstantIDPipeline.from_pretrained(
98
+ pretrained_model_name_or_path,
99
+ controlnet=controlnet,
100
+ torch_dtype=dtype,
101
+ safety_checker=None,
102
+ feature_extractor=None,
103
+ ).to(device)
104
+
105
+ pipe.scheduler = diffusers.EulerDiscreteScheduler.from_config(pipe.scheduler.config)
106
+
107
+ pipe.load_ip_adapter_instantid(face_adapter)
108
+ # load and disable LCM
109
+ pipe.load_lora_weights("latent-consistency/lcm-lora-sdxl")
110
+ pipe.disable_lora()
111
+
112
+ def remove_tips():
113
+ print("GG")
114
+ return gr.update(visible=False)
115
+
116
+
117
+ # prompts = [
118
+ # ["superman","Vibrant Color"], ["japanese anime character with white/neon hair","Watercolor"],
119
+ # # ["Suited professional","(No style)"],
120
+ # ["Scooba diver","Line art"], ["eskimo","Snow"]
121
+ # ]
122
+
123
+ def convert_from_cv2_to_image(img: np.ndarray) -> Image:
124
+ return Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
125
+
126
+ def convert_from_image_to_cv2(img: Image) -> np.ndarray:
127
+ return cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
128
+
129
+ def run_for_prompts1(face_file,style,progress=gr.Progress(track_tqdm=True)):
130
+ # if email != "":
131
+ p,n = styles.get(style, styles.get(STYLE_NAMES[1]))
132
+ return generate_image(face_file, p[0], n)
133
+ # else:
134
+ # raise gr.Error("Email ID is compulsory")
135
+ def run_for_prompts2(face_file,style,progress=gr.Progress(track_tqdm=True)):
136
+ # if email != "":
137
+ p,n = styles.get(style, styles.get(STYLE_NAMES[1]))
138
+ return generate_image(face_file, p[1], n)
139
+ def run_for_prompts3(face_file,style,progress=gr.Progress(track_tqdm=True)):
140
+ # if email != "":
141
+ p,n = styles.get(style, styles.get(STYLE_NAMES[1]))
142
+ return generate_image(face_file, p[2], n)
143
+ def run_for_prompts4(face_file,style,progress=gr.Progress(track_tqdm=True)):
144
+ # if email != "":
145
+ p,n = styles.get(style, styles.get(STYLE_NAMES[1]))
146
+ return generate_image(face_file, p[3], n)
147
+
148
+ # def validate_and_process(face_file, style, email):
149
+
150
+ # # Your processing logic here
151
+ # gallery1, gallery2, gallery3, gallery4 = run_for_prompts1(face_file, style), run_for_prompts2(face_file, style), run_for_prompts3(face_file, style), run_for_prompts4(face_file, style)
152
+ # return gallery1, gallery2, gallery3, gallery4
153
+
154
+ def draw_kps(image_pil, kps, color_list=[(255,0,0), (0,255,0), (0,0,255), (255,255,0), (255,0,255)]):
155
+ stickwidth = 4
156
+ limbSeq = np.array([[0, 2], [1, 2], [3, 2], [4, 2]])
157
+ kps = np.array(kps)
158
+
159
+ w, h = image_pil.size
160
+ out_img = np.zeros([h, w, 3])
161
+
162
+ for i in range(len(limbSeq)):
163
+ index = limbSeq[i]
164
+ color = color_list[index[0]]
165
+
166
+ x = kps[index][:, 0]
167
+ y = kps[index][:, 1]
168
+ length = ((x[0] - x[1]) ** 2 + (y[0] - y[1]) ** 2) ** 0.5
169
+ angle = math.degrees(math.atan2(y[0] - y[1], x[0] - x[1]))
170
+ polygon = cv2.ellipse2Poly((int(np.mean(x)), int(np.mean(y))), (int(length / 2), stickwidth), int(angle), 0, 360, 1)
171
+ out_img = cv2.fillConvexPoly(out_img.copy(), polygon, color)
172
+ out_img = (out_img * 0.6).astype(np.uint8)
173
+
174
+ for idx_kp, kp in enumerate(kps):
175
+ color = color_list[idx_kp]
176
+ x, y = kp
177
+ out_img = cv2.circle(out_img.copy(), (int(x), int(y)), 10, color, -1)
178
+
179
+ out_img_pil = Image.fromarray(out_img.astype(np.uint8))
180
+ return out_img_pil
181
+
182
+ def resize_img(input_image, max_side=640, min_side=640, size=None,
183
+ pad_to_max_side=True, mode=PIL.Image.BILINEAR, base_pixel_number=64):
184
+
185
+ w, h = input_image.size
186
+ print(w)
187
+ print(h)
188
+ if size is not None:
189
+ w_resize_new, h_resize_new = size
190
+ else:
191
+ ratio = min_side / min(h, w)
192
+ w, h = round(ratio*w), round(ratio*h)
193
+ ratio = max_side / max(h, w)
194
+ input_image = input_image.resize([round(ratio*w), round(ratio*h)], mode)
195
+ w_resize_new = (round(ratio * w) // base_pixel_number) * base_pixel_number
196
+ h_resize_new = (round(ratio * h) // base_pixel_number) * base_pixel_number
197
+ input_image = input_image.resize([w_resize_new, h_resize_new], mode)
198
+
199
+ if pad_to_max_side:
200
+ res = np.ones([max_side, max_side, 3], dtype=np.uint8) * 255
201
+ offset_x = (max_side - w_resize_new) // 2
202
+ offset_y = (max_side - h_resize_new) // 2
203
+ res[offset_y:offset_y+h_resize_new, offset_x:offset_x+w_resize_new] = np.array(input_image)
204
+ input_image = Image.fromarray(res)
205
+ return input_image
206
+
207
+ # def apply_style(style_name: str, positive: str, negative: str = "") -> Tuple[str, str]:
208
+ # p, n = styles.get(style_name, styles[DEFAULT_STYLE_NAME])
209
+ # return p.replace("{prompt}", positive), n + ' ' + negative
210
+
211
+ def generate_image(face_image,prompt,negative_prompt):
212
+ pose_image_path = None
213
+ # prompt = "superman"
214
+ enable_LCM = False
215
+ identitynet_strength_ratio = 0.95
216
+ adapter_strength_ratio = 0.60
217
+ num_steps = 15
218
+ guidance_scale = 8.5
219
+ seed = random.randint(0, MAX_SEED)
220
+ # negative_prompt = ""
221
+ # negative_prompt += neg
222
+ enhance_face_region = True
223
+ if enable_LCM:
224
+ pipe.enable_lora()
225
+ pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
226
+ else:
227
+ pipe.disable_lora()
228
+ pipe.scheduler = diffusers.EulerDiscreteScheduler.from_config(pipe.scheduler.config)
229
+
230
+ if face_image is None:
231
+ raise gr.Error(f"Cannot find any input face image! Please upload the face image")
232
+
233
+ # if prompt is None:
234
+ # prompt = "a person"
235
+
236
+ # apply the style template
237
+ # prompt, negative_prompt = apply_style(style_name, prompt, negative_prompt)
238
+
239
+ # face_image = load_image(face_image_path)
240
+ face_image = resize_img(face_image)
241
+ face_image_cv2 = convert_from_image_to_cv2(face_image)
242
+ height, width, _ = face_image_cv2.shape
243
+
244
+ # Extract face features
245
+ face_info = app.get(face_image_cv2)
246
+
247
+ if len(face_info) == 0:
248
+ raise gr.Error(f"Cannot find any face in the image! Please upload another person image")
249
+
250
+ face_info = sorted(face_info, key=lambda x:(x['bbox'][2]-x['bbox'][0])*(x['bbox'][3]-x['bbox'][1]))[-1] # only use the maximum face
251
+ face_emb = face_info['embedding']
252
+ face_kps = draw_kps(convert_from_cv2_to_image(face_image_cv2), face_info['kps'])
253
+
254
+ if pose_image_path is not None:
255
+ pose_image = load_image(pose_image_path)
256
+ pose_image = resize_img(pose_image)
257
+ pose_image_cv2 = convert_from_image_to_cv2(pose_image)
258
+
259
+ face_info = app.get(pose_image_cv2)
260
+
261
+ if len(face_info) == 0:
262
+ raise gr.Error(f"Cannot find any face in the reference image! Please upload another person image")
263
+
264
+ face_info = face_info[-1]
265
+ face_kps = draw_kps(pose_image, face_info['kps'])
266
+
267
+ width, height = face_kps.size
268
+
269
+ if enhance_face_region:
270
+ control_mask = np.zeros([height, width, 3])
271
+ x1, y1, x2, y2 = face_info["bbox"]
272
+ x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
273
+ control_mask[y1:y2, x1:x2] = 255
274
+ control_mask = Image.fromarray(control_mask.astype(np.uint8))
275
+ else:
276
+ control_mask = None
277
+
278
+ generator = torch.Generator(device=device).manual_seed(seed)
279
+
280
+ print("Start inference...")
281
+ print(f"[Debug] Prompt: {prompt}, \n[Debug] Neg Prompt: {negative_prompt}")
282
+
283
+ pipe.set_ip_adapter_scale(adapter_strength_ratio)
284
+ images = pipe(
285
+ prompt=prompt,
286
+ negative_prompt=negative_prompt,
287
+ image_embeds=face_emb,
288
+ image=face_kps,
289
+ control_mask=control_mask,
290
+ controlnet_conditioning_scale=float(identitynet_strength_ratio),
291
+ num_inference_steps=num_steps,
292
+ guidance_scale=guidance_scale,
293
+ height=height,
294
+ width=width,
295
+ generator=generator,
296
+ # num_images_per_prompt = 4
297
+ ).images
298
+
299
+ return images[0]
300
+
301
+ ### Description
302
+ title = r"""
303
+ <h1 align="center">Choose your AVATAR</h1>
304
+ """
305
+
306
+ description = r"""
307
+ <h2> Powered by IDfy </h2>"""
308
+
309
+ article = r""""""
310
+
311
+ tips = r""""""
312
+
313
+ # js = ''' '''
314
+
315
+ css = '''
316
+ .gradio-container {width: 95% !important; background-color: #E6F3FF;}
317
+ .image-gallery {height: 100vh !important; overflow: auto;}
318
+ .gradio-row .gradio-element { margin: 0 !important; }
319
+ '''
320
+ # with gr.Blocks(css=css, js=js) as demo:
321
+
322
+ # # description
323
+ # gr.Markdown(title)
324
+ # with gr.Row():
325
+ # gr.Image("./gradio_demo/logo.png",scale=0,min_width=50,show_label=False,show_download_button=False)
326
+ # gr.Markdown(description)
327
+ # with gr.Row():
328
+ # with gr.Column():
329
+ # style = gr.Dropdown(label="Choose your STYLE", choices=STYLE_NAMES)
330
+ # face_file = gr.Image(label="Upload a photo of your face", type="pil",sources="webcam")
331
+ # submit = gr.Button("Submit", variant="primary")
332
+ # with gr.Column():
333
+ # with gr.Row():
334
+ # gallery1 = gr.Image(label="Generated Images")
335
+ # gallery2 = gr.Image(label="Generated Images")
336
+ # with gr.Row():
337
+ # gallery3 = gr.Image(label="Generated Images")
338
+ # gallery4 = gr.Image(label="Generated Images")
339
+ # email = gr.Textbox(label="Email",
340
+ # info="Enter your email address",
341
+ # value="")
342
+ # # submit1 = gr.Button("Store")
343
+
344
+ # usage_tips = gr.Markdown(label="Usage tips of InstantID", value=tips ,visible=False)
345
+
346
+ # face_file.upload(
347
+ # fn=remove_tips,
348
+ # outputs=usage_tips,
349
+ # queue=True,
350
+ # api_name=False,
351
+ # show_progress = "full"
352
+ # ).then(
353
+ # fn=run_for_prompts1,
354
+ # inputs=[face_file,style],
355
+ # outputs=[gallery1]
356
+ # ).then(
357
+ # fn=run_for_prompts2,
358
+ # inputs=[face_file,style],
359
+ # outputs=[gallery2]
360
+ # ).then(
361
+ # fn=run_for_prompts3,
362
+ # inputs=[face_file,style],
363
+ # outputs=[gallery3]
364
+ # ).then(
365
+ # fn=run_for_prompts4,
366
+ # inputs=[face_file,style],
367
+ # outputs=[gallery4]
368
+ # )
369
+ # submit.click(
370
+ # fn=remove_tips,
371
+ # outputs=usage_tips,
372
+ # queue=True,
373
+ # api_name=False,
374
+ # show_progress = "full"
375
+ # ).then(
376
+ # fn=run_for_prompts1,
377
+ # inputs=[face_file,style],
378
+ # outputs=[gallery1]
379
+ # ).then(
380
+ # fn=run_for_prompts2,
381
+ # inputs=[face_file,style],
382
+ # outputs=[gallery2]
383
+ # ).then(
384
+ # fn=run_for_prompts3,
385
+ # inputs=[face_file,style],
386
+ # outputs=[gallery3]
387
+ # ).then(
388
+ # fn=run_for_prompts4,
389
+ # inputs=[face_file,style],
390
+ # outputs=[gallery4]
391
+ # )
392
+
393
+ # # submit1.click(
394
+ # # fn=store_images,
395
+ # # inputs=[email,gallery1,gallery2,gallery3,gallery4],
396
+ # # outputs=None)
397
+
398
+
399
+
400
+ # gr.Markdown(article)
401
+
402
+ # demo.launch(share=True)
403
+
404
+ with gr.Blocks(css=css) as demo:
405
+
406
+ # description
407
+ gr.Markdown(title)
408
+ with gr.Row():
409
+ gr.Image("./gradio_demo/logo.png",scale=0,min_width=50,show_label=False,show_download_button=False)
410
+ gr.Markdown(description)
411
+ with gr.Row():
412
+ with gr.Column():
413
+ style = gr.Dropdown(label="Choose your STYLE", choices=STYLE_NAMES)
414
+ face_file = gr.Image(label="Upload a photo of your face", type="pil",sources="webcam")
415
+ submit = gr.Button("Submit", variant="primary")
416
+ with gr.Column():
417
+ with gr.Row():
418
+ gallery1 = gr.Image(label="Generated Images")
419
+ gallery2 = gr.Image(label="Generated Images")
420
+ with gr.Row():
421
+ gallery3 = gr.Image(label="Generated Images")
422
+ gallery4 = gr.Image(label="Generated Images")
423
+ email = gr.Textbox(label="Email",
424
+ info="Enter your email address",
425
+ value="")
426
+
427
+ usage_tips = gr.Markdown(label="Usage tips of InstantID", value=tips ,visible=False)
428
+ # identitynet_strength_ratio = gr.Slider(
429
+ # label="IdentityNet strength (for fidelity)",
430
+ # minimum=0,
431
+ # maximum=1.5,
432
+ # step=0.05,
433
+ # value=0.95,
434
+ # )
435
+ # adapter_strength_ratio = gr.Slider(
436
+ # label="Image adapter strength (for detail)",
437
+ # minimum=0,
438
+ # maximum=1.5,
439
+ # step=0.05,
440
+ # value=0.60,
441
+ # )
442
+ # negative_prompt = gr.Textbox(
443
+ # label="Negative Prompt",
444
+ # placeholder="low quality",
445
+ # value="(lowres, low quality, worst quality:1.2), (text:1.2), watermark, (frame:1.2), deformed, ugly, deformed eyes, blur, out of focus, blurry, deformed cat, deformed, photo, anthropomorphic cat, monochrome, pet collar, gun, weapon, blue, 3d, drones, drone, buildings in background, green",
446
+ # )
447
+ # num_steps = gr.Slider(
448
+ # label="Number of sample steps",
449
+ # minimum=15,
450
+ # maximum=100,
451
+ # step=1,
452
+ # value=5 if enable_lcm_arg else 15,
453
+ # )
454
+ # guidance_scale = gr.Slider(
455
+ # label="Guidance scale",
456
+ # minimum=0.1,
457
+ # maximum=10.0,
458
+ # step=0.1,
459
+ # value=0 if enable_lcm_arg else 8.5,
460
+ # )
461
+ # if email is None:
462
+ # print("STOPPPP")
463
+ # raise gr.Error("Email ID is compulsory")
464
+ face_file.upload(
465
+ fn=remove_tips,
466
+ outputs=usage_tips,
467
+ queue=True,
468
+ api_name=False,
469
+ show_progress = "full"
470
+ ).then(
471
+ fn=run_for_prompts1,
472
+ inputs=[face_file,style],
473
+ outputs=[gallery1]
474
+ ).then(
475
+ fn=run_for_prompts2,
476
+ inputs=[face_file,style],
477
+ outputs=[gallery2]
478
+ ).then(
479
+ fn=run_for_prompts3,
480
+ inputs=[face_file,style],
481
+ outputs=[gallery3]
482
+ ).then(
483
+ fn=run_for_prompts4,
484
+ inputs=[face_file,style],
485
+ outputs=[gallery4]
486
+ )
487
+ submit.click(
488
+ fn=remove_tips,
489
+ outputs=usage_tips,
490
+ queue=True,
491
+ api_name=False,
492
+ show_progress = "full"
493
+ ).then(
494
+ fn=run_for_prompts1,
495
+ inputs=[face_file,style],
496
+ outputs=[gallery1]
497
+ ).then(
498
+ fn=run_for_prompts2,
499
+ inputs=[face_file,style],
500
+ outputs=[gallery2]
501
+ ).then(
502
+ fn=run_for_prompts3,
503
+ inputs=[face_file,style],
504
+ outputs=[gallery3]
505
+ ).then(
506
+ fn=run_for_prompts4,
507
+ inputs=[face_file,style],
508
+ outputs=[gallery4]
509
+ )
510
+
511
+
512
+ gr.Markdown(article)
513
+
514
+ demo.launch(share=True)
515
+
516
+ if __name__ == "__main__":
517
+ parser = argparse.ArgumentParser()
518
+ parser.add_argument("--pretrained_model_name_or_path", type=str, default="wangqixun/YamerMIX_v8")
519
+ args = parser.parse_args()
520
+
521
+ main(args.pretrained_model_name_or_path, False)
522
+
523
+
524
+ # import sys
525
+ # sys.path.append('./')
526
+
527
+ # from typing import Tuple
528
+
529
+ # import os
530
+ # import cv2
531
+ # import math
532
+ # import torch
533
+ # import random
534
+ # import numpy as np
535
+ # import argparse
536
+
537
+ # import PIL
538
+ # from PIL import Image
539
+
540
+ # import diffusers
541
+ # from diffusers.utils import load_image
542
+ # from diffusers.models import ControlNetModel
543
+ # from diffusers import LCMScheduler
544
+
545
+ # from huggingface_hub import hf_hub_download
546
+
547
+ # import insightface
548
+ # from insightface.app import FaceAnalysis
549
+
550
+ # from style_template import styles
551
+ # from pipeline_stable_diffusion_xl_instantid_full import StableDiffusionXLInstantIDPipeline
552
+ # from model_util import load_models_xl, get_torch_device, torch_gc
553
+
554
+
555
+ # # global variable
556
+ # MAX_SEED = np.iinfo(np.int32).max
557
+ # device = get_torch_device()
558
+ # dtype = torch.float16 if str(device).__contains__("cuda") else torch.float32
559
+ # STYLE_NAMES = list(styles.keys())
560
+ # DEFAULT_STYLE_NAME = "Watercolor"
561
+
562
+ # # Load face encoder
563
+ # app = FaceAnalysis(name='antelopev2', root='./', providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
564
+ # app.prepare(ctx_id=0, det_size=(320, 320))
565
+
566
+ # # Path to InstantID models
567
+ # face_adapter = f'./checkpoints/ip-adapter.bin'
568
+ # controlnet_path = f'./checkpoints/ControlNetModel'
569
+
570
+ # # Load pipeline
571
+ # controlnet = ControlNetModel.from_pretrained(controlnet_path, torch_dtype=dtype)
572
+
573
+ # logo = Image.open("./gradio_demo/logo.png")
574
+
575
+ # from cv2 import imencode
576
+ # import base64
577
+
578
+ # # def encode_pil_to_base64_new(pil_image):
579
+ # # print("AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA")
580
+ # # image_arr = np.asarray(pil_image)[:,:,::-1]
581
+ # # _, byte_data = imencode('.png', image_arr)
582
+ # # base64_data = base64.b64encode(byte_data)
583
+ # # base64_string_opencv = base64_data.decode("utf-8")
584
+ # # return "data:image/png;base64," + base64_string_opencv
585
+
586
+ # import gradio as gr
587
+
588
+ # # gr.processing_utils.encode_pil_to_base64 = encode_pil_to_base64_new
589
+
590
+ # def main(pretrained_model_name_or_path="wangqixun/YamerMIX_v8", enable_lcm_arg=False):
591
+
592
+ # if pretrained_model_name_or_path.endswith(
593
+ # ".ckpt"
594
+ # ) or pretrained_model_name_or_path.endswith(".safetensors"):
595
+ # scheduler_kwargs = hf_hub_download(
596
+ # repo_id="wangqixun/YamerMIX_v8",
597
+ # subfolder="scheduler",
598
+ # filename="scheduler_config.json",
599
+ # )
600
+
601
+ # (tokenizers, text_encoders, unet, _, vae) = load_models_xl(
602
+ # pretrained_model_name_or_path=pretrained_model_name_or_path,
603
+ # scheduler_name=None,
604
+ # weight_dtype=dtype,
605
+ # )
606
+
607
+ # scheduler = diffusers.EulerDiscreteScheduler.from_config(scheduler_kwargs)
608
+ # pipe = StableDiffusionXLInstantIDPipeline(
609
+ # vae=vae,
610
+ # text_encoder=text_encoders[0],
611
+ # text_encoder_2=text_encoders[1],
612
+ # tokenizer=tokenizers[0],
613
+ # tokenizer_2=tokenizers[1],
614
+ # unet=unet,
615
+ # scheduler=scheduler,
616
+ # controlnet=controlnet,
617
+ # ).to(device)
618
+
619
+ # else:
620
+ # pipe = StableDiffusionXLInstantIDPipeline.from_pretrained(
621
+ # pretrained_model_name_or_path,
622
+ # controlnet=controlnet,
623
+ # torch_dtype=dtype,
624
+ # safety_checker=None,
625
+ # feature_extractor=None,
626
+ # ).to(device)
627
+
628
+ # pipe.scheduler = diffusers.EulerDiscreteScheduler.from_config(pipe.scheduler.config)
629
+
630
+ # pipe.load_ip_adapter_instantid(face_adapter)
631
+ # # load and disable LCM
632
+ # pipe.load_lora_weights("latent-consistency/lcm-lora-sdxl")
633
+ # pipe.disable_lora()
634
+
635
+ # def remove_tips():
636
+ # return gr.update(visible=False)
637
+
638
+
639
+ # # prompts = [
640
+ # # ["superman","Vibrant Color"], ["japanese anime character with white/neon hair","Watercolor"],
641
+ # # # ["Suited professional","(No style)"],
642
+ # # ["Scooba diver","Line art"], ["eskimo","Snow"]
643
+ # # ]
644
+
645
+ # def convert_from_cv2_to_image(img: np.ndarray) -> Image:
646
+ # return Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
647
+
648
+ # def convert_from_image_to_cv2(img: Image) -> np.ndarray:
649
+ # return cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
650
+
651
+ # def run_for_prompts1(face_file,style,progress=gr.Progress(track_tqdm=True)):
652
+ # # if email != "":
653
+ # p,n = styles.get(style, styles.get(STYLE_NAMES[1]))
654
+ # return generate_image(face_file, p[0], n)
655
+ # # else:
656
+ # # raise gr.Error("Email ID is compulsory")
657
+ # def run_for_prompts2(face_file,style,progress=gr.Progress(track_tqdm=True)):
658
+ # # if email != "":
659
+ # p,n = styles.get(style, styles.get(STYLE_NAMES[1]))
660
+ # return generate_image(face_file, p[1], n)
661
+ # def run_for_prompts3(face_file,style,progress=gr.Progress(track_tqdm=True)):
662
+ # # if email != "":
663
+ # p,n = styles.get(style, styles.get(STYLE_NAMES[1]))
664
+ # return generate_image(face_file, p[2], n)
665
+ # def run_for_prompts4(face_file,style,progress=gr.Progress(track_tqdm=True)):
666
+ # # if email != "":
667
+ # p,n = styles.get(style, styles.get(STYLE_NAMES[1]))
668
+ # return generate_image(face_file, p[3], n)
669
+
670
+ # # def validate_and_process(face_file, style, email):
671
+
672
+ # # # Your processing logic here
673
+ # # gallery1, gallery2, gallery3, gallery4 = run_for_prompts1(face_file, style), run_for_prompts2(face_file, style), run_for_prompts3(face_file, style), run_for_prompts4(face_file, style)
674
+ # # return gallery1, gallery2, gallery3, gallery4
675
+
676
+ # def draw_kps(image_pil, kps, color_list=[(255,0,0), (0,255,0), (0,0,255), (255,255,0), (255,0,255)]):
677
+ # stickwidth = 4
678
+ # limbSeq = np.array([[0, 2], [1, 2], [3, 2], [4, 2]])
679
+ # kps = np.array(kps)
680
+
681
+ # w, h = image_pil.size
682
+ # out_img = np.zeros([h, w, 3])
683
+
684
+ # for i in range(len(limbSeq)):
685
+ # index = limbSeq[i]
686
+ # color = color_list[index[0]]
687
+
688
+ # x = kps[index][:, 0]
689
+ # y = kps[index][:, 1]
690
+ # length = ((x[0] - x[1]) ** 2 + (y[0] - y[1]) ** 2) ** 0.5
691
+ # angle = math.degrees(math.atan2(y[0] - y[1], x[0] - x[1]))
692
+ # polygon = cv2.ellipse2Poly((int(np.mean(x)), int(np.mean(y))), (int(length / 2), stickwidth), int(angle), 0, 360, 1)
693
+ # out_img = cv2.fillConvexPoly(out_img.copy(), polygon, color)
694
+ # out_img = (out_img * 0.6).astype(np.uint8)
695
+
696
+ # for idx_kp, kp in enumerate(kps):
697
+ # color = color_list[idx_kp]
698
+ # x, y = kp
699
+ # out_img = cv2.circle(out_img.copy(), (int(x), int(y)), 10, color, -1)
700
+
701
+ # out_img_pil = Image.fromarray(out_img.astype(np.uint8))
702
+ # return out_img_pil
703
+
704
+ # def resize_img(input_image, max_side=640, min_side=640, size=None,
705
+ # pad_to_max_side=True, mode=PIL.Image.BILINEAR, base_pixel_number=64):
706
+
707
+ # w, h = input_image.size
708
+ # print(w)
709
+ # print(h)
710
+ # if size is not None:
711
+ # w_resize_new, h_resize_new = size
712
+ # else:
713
+ # ratio = min_side / min(h, w)
714
+ # w, h = round(ratio*w), round(ratio*h)
715
+ # ratio = max_side / max(h, w)
716
+ # input_image = input_image.resize([round(ratio*w), round(ratio*h)], mode)
717
+ # w_resize_new = (round(ratio * w) // base_pixel_number) * base_pixel_number
718
+ # h_resize_new = (round(ratio * h) // base_pixel_number) * base_pixel_number
719
+ # input_image = input_image.resize([w_resize_new, h_resize_new], mode)
720
+
721
+ # if pad_to_max_side:
722
+ # res = np.ones([max_side, max_side, 3], dtype=np.uint8) * 255
723
+ # offset_x = (max_side - w_resize_new) // 2
724
+ # offset_y = (max_side - h_resize_new) // 2
725
+ # res[offset_y:offset_y+h_resize_new, offset_x:offset_x+w_resize_new] = np.array(input_image)
726
+ # input_image = Image.fromarray(res)
727
+ # return input_image
728
+
729
+ # # def apply_style(style_name: str, positive: str, negative: str = "") -> Tuple[str, str]:
730
+ # # p, n = styles.get(style_name, styles[DEFAULT_STYLE_NAME])
731
+ # # return p.replace("{prompt}", positive), n + ' ' + negative
732
+
733
+ # def generate_image(face_image,prompt,negative_prompt):
734
+ # pose_image_path = None
735
+ # # prompt = "superman"
736
+ # enable_LCM = False
737
+ # identitynet_strength_ratio = 0.95
738
+ # adapter_strength_ratio = 0.60
739
+ # num_steps = 15
740
+ # guidance_scale = 8.5
741
+ # seed = random.randint(0, MAX_SEED)
742
+ # # negative_prompt = ""
743
+ # # negative_prompt += neg
744
+ # enhance_face_region = True
745
+ # if enable_LCM:
746
+ # pipe.enable_lora()
747
+ # pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
748
+ # else:
749
+ # pipe.disable_lora()
750
+ # pipe.scheduler = diffusers.EulerDiscreteScheduler.from_config(pipe.scheduler.config)
751
+
752
+ # if face_image is None:
753
+ # raise gr.Error(f"Cannot find any input face image! Please upload the face image")
754
+
755
+ # # if prompt is None:
756
+ # # prompt = "a person"
757
+
758
+ # # apply the style template
759
+ # # prompt, negative_prompt = apply_style(style_name, prompt, negative_prompt)
760
+
761
+ # # face_image = load_image(face_image_path)
762
+ # face_image = resize_img(face_image)
763
+ # face_image_cv2 = convert_from_image_to_cv2(face_image)
764
+ # height, width, _ = face_image_cv2.shape
765
+
766
+ # # Extract face features
767
+ # face_info = app.get(face_image_cv2)
768
+
769
+ # if len(face_info) == 0:
770
+ # raise gr.Error(f"Cannot find any face in the image! Please upload another person image")
771
+
772
+ # face_info = sorted(face_info, key=lambda x:(x['bbox'][2]-x['bbox'][0])*(x['bbox'][3]-x['bbox'][1]))[-1] # only use the maximum face
773
+ # face_emb = face_info['embedding']
774
+ # face_kps = draw_kps(convert_from_cv2_to_image(face_image_cv2), face_info['kps'])
775
+
776
+ # if pose_image_path is not None:
777
+ # pose_image = load_image(pose_image_path)
778
+ # pose_image = resize_img(pose_image)
779
+ # pose_image_cv2 = convert_from_image_to_cv2(pose_image)
780
+
781
+ # face_info = app.get(pose_image_cv2)
782
+
783
+ # if len(face_info) == 0:
784
+ # raise gr.Error(f"Cannot find any face in the reference image! Please upload another person image")
785
+
786
+ # face_info = face_info[-1]
787
+ # face_kps = draw_kps(pose_image, face_info['kps'])
788
+
789
+ # width, height = face_kps.size
790
+
791
+ # if enhance_face_region:
792
+ # control_mask = np.zeros([height, width, 3])
793
+ # x1, y1, x2, y2 = face_info["bbox"]
794
+ # x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
795
+ # control_mask[y1:y2, x1:x2] = 255
796
+ # control_mask = Image.fromarray(control_mask.astype(np.uint8))
797
+ # else:
798
+ # control_mask = None
799
+
800
+ # generator = torch.Generator(device=device).manual_seed(seed)
801
+
802
+ # print("Start inference...")
803
+ # print(f"[Debug] Prompt: {prompt}, \n[Debug] Neg Prompt: {negative_prompt}")
804
+
805
+ # pipe.set_ip_adapter_scale(adapter_strength_ratio)
806
+ # images = pipe(
807
+ # prompt=prompt,
808
+ # negative_prompt=negative_prompt,
809
+ # image_embeds=face_emb,
810
+ # image=face_kps,
811
+ # control_mask=control_mask,
812
+ # controlnet_conditioning_scale=float(identitynet_strength_ratio),
813
+ # num_inference_steps=num_steps,
814
+ # guidance_scale=guidance_scale,
815
+ # height=height,
816
+ # width=width,
817
+ # generator=generator,
818
+ # # num_images_per_prompt = 4
819
+ # ).images
820
+
821
+ # return images[0]
822
+
823
+ # ### Description
824
+ # title = r"""
825
+ # <h1 align="center">Choose your AVATAR</h1>
826
+ # """
827
+
828
+ # description = r"""
829
+ # <h2> Powered by IDfy </h2>"""
830
+
831
+ # article = r""""""
832
+
833
+ # tips = r""""""
834
+
835
+ # css = '''
836
+ # .gradio-container {width: 95% !important; background-color: #E6F3FF;}
837
+ # .image-gallery {height: 100vh !important; overflow: auto;}
838
+ # .gradio-row .gradio-element { margin: 0 !important; }
839
+ # '''
840
+ # with gr.Blocks(css=css) as demo:
841
+
842
+ # # description
843
+ # gr.Markdown(title)
844
+ # with gr.Row():
845
+ # gr.Image("./gradio_demo/logo.png",scale=0,min_width=50,show_label=False,show_download_button=False)
846
+ # gr.Markdown(description)
847
+ # with gr.Row():
848
+ # with gr.Column():
849
+ # style = gr.Dropdown(label="Choose your STYLE", choices=STYLE_NAMES)
850
+ # face_file = gr.Image(label="Upload a photo of your face", type="pil",sources="webcam")
851
+ # submit = gr.Button("Submit", variant="primary")
852
+ # with gr.Column():
853
+ # with gr.Row():
854
+ # gallery1 = gr.Image(label="Generated Images")
855
+ # gallery2 = gr.Image(label="Generated Images")
856
+ # with gr.Row():
857
+ # gallery3 = gr.Image(label="Generated Images")
858
+ # gallery4 = gr.Image(label="Generated Images")
859
+ # email = gr.Textbox(label="Email",
860
+ # info="Enter your email address",
861
+ # value="")
862
+
863
+ # usage_tips = gr.Markdown(label="Usage tips of InstantID", value=tips ,visible=False)
864
+ # # identitynet_strength_ratio = gr.Slider(
865
+ # # label="IdentityNet strength (for fidelity)",
866
+ # # minimum=0,
867
+ # # maximum=1.5,
868
+ # # step=0.05,
869
+ # # value=0.95,
870
+ # # )
871
+ # # adapter_strength_ratio = gr.Slider(
872
+ # # label="Image adapter strength (for detail)",
873
+ # # minimum=0,
874
+ # # maximum=1.5,
875
+ # # step=0.05,
876
+ # # value=0.60,
877
+ # # )
878
+ # # negative_prompt = gr.Textbox(
879
+ # # label="Negative Prompt",
880
+ # # placeholder="low quality",
881
+ # # value="(lowres, low quality, worst quality:1.2), (text:1.2), watermark, (frame:1.2), deformed, ugly, deformed eyes, blur, out of focus, blurry, deformed cat, deformed, photo, anthropomorphic cat, monochrome, pet collar, gun, weapon, blue, 3d, drones, drone, buildings in background, green",
882
+ # # )
883
+ # # num_steps = gr.Slider(
884
+ # # label="Number of sample steps",
885
+ # # minimum=15,
886
+ # # maximum=100,
887
+ # # step=1,
888
+ # # value=5 if enable_lcm_arg else 15,
889
+ # # )
890
+ # # guidance_scale = gr.Slider(
891
+ # # label="Guidance scale",
892
+ # # minimum=0.1,
893
+ # # maximum=10.0,
894
+ # # step=0.1,
895
+ # # value=0 if enable_lcm_arg else 8.5,
896
+ # # )
897
+ # # if email is None:
898
+ # # print("STOPPPP")
899
+ # # raise gr.Error("Email ID is compulsory")
900
+ # face_file.upload(
901
+ # fn=remove_tips,
902
+ # outputs=usage_tips,
903
+ # queue=True,
904
+ # api_name=False,
905
+ # show_progress = "full"
906
+ # ).then(
907
+ # fn=run_for_prompts1,
908
+ # inputs=[face_file,style],
909
+ # outputs=[gallery1]
910
+ # ).then(
911
+ # fn=run_for_prompts2,
912
+ # inputs=[face_file,style],
913
+ # outputs=[gallery2]
914
+ # ).then(
915
+ # fn=run_for_prompts3,
916
+ # inputs=[face_file,style],
917
+ # outputs=[gallery3]
918
+ # ).then(
919
+ # fn=run_for_prompts4,
920
+ # inputs=[face_file,style],
921
+ # outputs=[gallery4]
922
+ # )
923
+ # submit.click(
924
+ # fn=remove_tips,
925
+ # outputs=usage_tips,
926
+ # queue=True,
927
+ # api_name=False,
928
+ # show_progress = "full"
929
+ # ).then(
930
+ # fn=run_for_prompts1,
931
+ # inputs=[face_file,style],
932
+ # outputs=[gallery1]
933
+ # ).then(
934
+ # fn=run_for_prompts2,
935
+ # inputs=[face_file,style],
936
+ # outputs=[gallery2]
937
+ # ).then(
938
+ # fn=run_for_prompts3,
939
+ # inputs=[face_file,style],
940
+ # outputs=[gallery3]
941
+ # ).then(
942
+ # fn=run_for_prompts4,
943
+ # inputs=[face_file,style],
944
+ # outputs=[gallery4]
945
+ # )
946
+
947
+
948
+ # gr.Markdown(article)
949
+
950
+ # demo.launch(share=True)
951
+
952
+ # if __name__ == "__main__":
953
+ # parser = argparse.ArgumentParser()
954
+ # parser.add_argument("--pretrained_model_name_or_path", type=str, default="wangqixun/YamerMIX_v8")
955
+ # args = parser.parse_args()
956
+
957
+ # main(args.pretrained_model_name_or_path, False)
gradio_demo/app-multicontrolnet.py ADDED
@@ -0,0 +1,670 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ sys.path.append("./")
3
+
4
+ from typing import Tuple
5
+
6
+ import os
7
+ import cv2
8
+ import math
9
+ import torch
10
+ import random
11
+ import numpy as np
12
+ import argparse
13
+
14
+ import PIL
15
+ from PIL import Image
16
+
17
+ import diffusers
18
+ from diffusers.utils import load_image
19
+ from diffusers.models import ControlNetModel
20
+ from diffusers.pipelines.controlnet.multicontrolnet import MultiControlNetModel
21
+
22
+ from huggingface_hub import hf_hub_download
23
+
24
+ from insightface.app import FaceAnalysis
25
+
26
+ from style_template import styles
27
+ from pipeline_stable_diffusion_xl_instantid_full import StableDiffusionXLInstantIDPipeline
28
+ from model_util import load_models_xl, get_torch_device, torch_gc
29
+ from controlnet_util import openpose, get_depth_map, get_canny_image
30
+
31
+ import gradio as gr
32
+
33
+
34
+ # global variable
35
+ MAX_SEED = np.iinfo(np.int32).max
36
+ device = get_torch_device()
37
+ dtype = torch.float16 if str(device).__contains__("cuda") else torch.float32
38
+ STYLE_NAMES = list(styles.keys())
39
+ DEFAULT_STYLE_NAME = "Watercolor"
40
+
41
+ # Load face encoder
42
+ app = FaceAnalysis(
43
+ name="antelopev2",
44
+ root="./",
45
+ providers=["CUDAExecutionProvider", "CPUExecutionProvider"],
46
+ )
47
+ app.prepare(ctx_id=0, det_size=(640, 640))
48
+
49
+ # Path to InstantID models
50
+ face_adapter = f"./checkpoints/ip-adapter.bin"
51
+ controlnet_path = f"./checkpoints/ControlNetModel"
52
+
53
+ # Load pipeline face ControlNetModel
54
+ controlnet_identitynet = ControlNetModel.from_pretrained(
55
+ controlnet_path, torch_dtype=dtype
56
+ )
57
+
58
+ # controlnet-pose
59
+ controlnet_pose_model = "thibaud/controlnet-openpose-sdxl-1.0"
60
+ controlnet_canny_model = "diffusers/controlnet-canny-sdxl-1.0"
61
+ controlnet_depth_model = "diffusers/controlnet-depth-sdxl-1.0-small"
62
+
63
+ controlnet_pose = ControlNetModel.from_pretrained(
64
+ controlnet_pose_model, torch_dtype=dtype
65
+ ).to(device)
66
+ controlnet_canny = ControlNetModel.from_pretrained(
67
+ controlnet_canny_model, torch_dtype=dtype
68
+ ).to(device)
69
+ controlnet_depth = ControlNetModel.from_pretrained(
70
+ controlnet_depth_model, torch_dtype=dtype
71
+ ).to(device)
72
+
73
+ controlnet_map = {
74
+ "pose": controlnet_pose,
75
+ "canny": controlnet_canny,
76
+ "depth": controlnet_depth,
77
+ }
78
+ controlnet_map_fn = {
79
+ "pose": openpose,
80
+ "canny": get_canny_image,
81
+ "depth": get_depth_map,
82
+ }
83
+
84
+
85
+ def main(pretrained_model_name_or_path="wangqixun/YamerMIX_v8", enable_lcm_arg=False):
86
+ if pretrained_model_name_or_path.endswith(
87
+ ".ckpt"
88
+ ) or pretrained_model_name_or_path.endswith(".safetensors"):
89
+ scheduler_kwargs = hf_hub_download(
90
+ repo_id="wangqixun/YamerMIX_v8",
91
+ subfolder="scheduler",
92
+ filename="scheduler_config.json",
93
+ )
94
+
95
+ (tokenizers, text_encoders, unet, _, vae) = load_models_xl(
96
+ pretrained_model_name_or_path=pretrained_model_name_or_path,
97
+ scheduler_name=None,
98
+ weight_dtype=dtype,
99
+ )
100
+
101
+ scheduler = diffusers.EulerDiscreteScheduler.from_config(scheduler_kwargs)
102
+ pipe = StableDiffusionXLInstantIDPipeline(
103
+ vae=vae,
104
+ text_encoder=text_encoders[0],
105
+ text_encoder_2=text_encoders[1],
106
+ tokenizer=tokenizers[0],
107
+ tokenizer_2=tokenizers[1],
108
+ unet=unet,
109
+ scheduler=scheduler,
110
+ controlnet=[controlnet_identitynet],
111
+ ).to(device)
112
+
113
+ else:
114
+ pipe = StableDiffusionXLInstantIDPipeline.from_pretrained(
115
+ pretrained_model_name_or_path,
116
+ controlnet=[controlnet_identitynet],
117
+ torch_dtype=dtype,
118
+ safety_checker=None,
119
+ feature_extractor=None,
120
+ ).to(device)
121
+
122
+ pipe.scheduler = diffusers.EulerDiscreteScheduler.from_config(
123
+ pipe.scheduler.config
124
+ )
125
+
126
+ pipe.load_ip_adapter_instantid(face_adapter)
127
+ # load and disable LCM
128
+ pipe.load_lora_weights("latent-consistency/lcm-lora-sdxl")
129
+ pipe.disable_lora()
130
+
131
+ def toggle_lcm_ui(value):
132
+ if value:
133
+ return (
134
+ gr.update(minimum=0, maximum=100, step=1, value=5),
135
+ gr.update(minimum=0.1, maximum=20.0, step=0.1, value=1.5),
136
+ )
137
+ else:
138
+ return (
139
+ gr.update(minimum=5, maximum=100, step=1, value=30),
140
+ gr.update(minimum=0.1, maximum=20.0, step=0.1, value=5),
141
+ )
142
+
143
+ def randomize_seed_fn(seed: int, randomize_seed: bool) -> int:
144
+ if randomize_seed:
145
+ seed = random.randint(0, MAX_SEED)
146
+ return seed
147
+
148
+ def remove_tips():
149
+ return gr.update(visible=False)
150
+
151
+ def get_example():
152
+ case = [
153
+ [
154
+ "./examples/yann-lecun_resize.jpg",
155
+ None,
156
+ "a man",
157
+ "Snow",
158
+ "(lowres, low quality, worst quality:1.2), (text:1.2), watermark, (frame:1.2), deformed, ugly, deformed eyes, blur, out of focus, blurry, deformed cat, deformed, photo, anthropomorphic cat, monochrome, photo, pet collar, gun, weapon, blue, 3d, drones, drone, buildings in background, green",
159
+ ],
160
+ [
161
+ "./examples/musk_resize.jpeg",
162
+ "./examples/poses/pose2.jpg",
163
+ "a man flying in the sky in Mars",
164
+ "Mars",
165
+ "(lowres, low quality, worst quality:1.2), (text:1.2), watermark, (frame:1.2), deformed, ugly, deformed eyes, blur, out of focus, blurry, deformed cat, deformed, photo, anthropomorphic cat, monochrome, photo, pet collar, gun, weapon, blue, 3d, drones, drone, buildings in background, green",
166
+ ],
167
+ [
168
+ "./examples/sam_resize.png",
169
+ "./examples/poses/pose4.jpg",
170
+ "a man doing a silly pose wearing a suite",
171
+ "Jungle",
172
+ "(lowres, low quality, worst quality:1.2), (text:1.2), watermark, (frame:1.2), deformed, ugly, deformed eyes, blur, out of focus, blurry, deformed cat, deformed, photo, anthropomorphic cat, monochrome, photo, pet collar, gun, weapon, blue, 3d, drones, drone, buildings in background, gree",
173
+ ],
174
+ [
175
+ "./examples/schmidhuber_resize.png",
176
+ "./examples/poses/pose3.jpg",
177
+ "a man sit on a chair",
178
+ "Neon",
179
+ "(lowres, low quality, worst quality:1.2), (text:1.2), watermark, (frame:1.2), deformed, ugly, deformed eyes, blur, out of focus, blurry, deformed cat, deformed, photo, anthropomorphic cat, monochrome, photo, pet collar, gun, weapon, blue, 3d, drones, drone, buildings in background, green",
180
+ ],
181
+ [
182
+ "./examples/kaifu_resize.png",
183
+ "./examples/poses/pose.jpg",
184
+ "a man",
185
+ "Vibrant Color",
186
+ "(lowres, low quality, worst quality:1.2), (text:1.2), watermark, (frame:1.2), deformed, ugly, deformed eyes, blur, out of focus, blurry, deformed cat, deformed, photo, anthropomorphic cat, monochrome, photo, pet collar, gun, weapon, blue, 3d, drones, drone, buildings in background, green",
187
+ ],
188
+ ]
189
+ return case
190
+
191
+ def run_for_examples(face_file, pose_file, prompt, style, negative_prompt):
192
+ return generate_image(
193
+ face_file,
194
+ pose_file,
195
+ prompt,
196
+ negative_prompt,
197
+ style,
198
+ 20, # num_steps
199
+ 0.8, # identitynet_strength_ratio
200
+ 0.8, # adapter_strength_ratio
201
+ 0.4, # pose_strength
202
+ 0.3, # canny_strength
203
+ 0.5, # depth_strength
204
+ ["pose", "canny"], # controlnet_selection
205
+ 5.0, # guidance_scale
206
+ 42, # seed
207
+ "EulerDiscreteScheduler", # scheduler
208
+ False, # enable_LCM
209
+ True, # enable_Face_Region
210
+ )
211
+
212
+ def convert_from_cv2_to_image(img: np.ndarray) -> Image:
213
+ return Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
214
+
215
+ def convert_from_image_to_cv2(img: Image) -> np.ndarray:
216
+ return cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
217
+
218
+ def draw_kps(
219
+ image_pil,
220
+ kps,
221
+ color_list=[
222
+ (255, 0, 0),
223
+ (0, 255, 0),
224
+ (0, 0, 255),
225
+ (255, 255, 0),
226
+ (255, 0, 255),
227
+ ],
228
+ ):
229
+ stickwidth = 4
230
+ limbSeq = np.array([[0, 2], [1, 2], [3, 2], [4, 2]])
231
+ kps = np.array(kps)
232
+
233
+ w, h = image_pil.size
234
+ out_img = np.zeros([h, w, 3])
235
+
236
+ for i in range(len(limbSeq)):
237
+ index = limbSeq[i]
238
+ color = color_list[index[0]]
239
+
240
+ x = kps[index][:, 0]
241
+ y = kps[index][:, 1]
242
+ length = ((x[0] - x[1]) ** 2 + (y[0] - y[1]) ** 2) ** 0.5
243
+ angle = math.degrees(math.atan2(y[0] - y[1], x[0] - x[1]))
244
+ polygon = cv2.ellipse2Poly(
245
+ (int(np.mean(x)), int(np.mean(y))),
246
+ (int(length / 2), stickwidth),
247
+ int(angle),
248
+ 0,
249
+ 360,
250
+ 1,
251
+ )
252
+ out_img = cv2.fillConvexPoly(out_img.copy(), polygon, color)
253
+ out_img = (out_img * 0.6).astype(np.uint8)
254
+
255
+ for idx_kp, kp in enumerate(kps):
256
+ color = color_list[idx_kp]
257
+ x, y = kp
258
+ out_img = cv2.circle(out_img.copy(), (int(x), int(y)), 10, color, -1)
259
+
260
+ out_img_pil = Image.fromarray(out_img.astype(np.uint8))
261
+ return out_img_pil
262
+
263
+ def resize_img(
264
+ input_image,
265
+ max_side=1280,
266
+ min_side=1024,
267
+ size=None,
268
+ pad_to_max_side=False,
269
+ mode=PIL.Image.BILINEAR,
270
+ base_pixel_number=64,
271
+ ):
272
+ w, h = input_image.size
273
+ if size is not None:
274
+ w_resize_new, h_resize_new = size
275
+ else:
276
+ ratio = min_side / min(h, w)
277
+ w, h = round(ratio * w), round(ratio * h)
278
+ ratio = max_side / max(h, w)
279
+ input_image = input_image.resize([round(ratio * w), round(ratio * h)], mode)
280
+ w_resize_new = (round(ratio * w) // base_pixel_number) * base_pixel_number
281
+ h_resize_new = (round(ratio * h) // base_pixel_number) * base_pixel_number
282
+ input_image = input_image.resize([w_resize_new, h_resize_new], mode)
283
+
284
+ if pad_to_max_side:
285
+ res = np.ones([max_side, max_side, 3], dtype=np.uint8) * 255
286
+ offset_x = (max_side - w_resize_new) // 2
287
+ offset_y = (max_side - h_resize_new) // 2
288
+ res[
289
+ offset_y : offset_y + h_resize_new, offset_x : offset_x + w_resize_new
290
+ ] = np.array(input_image)
291
+ input_image = Image.fromarray(res)
292
+ return input_image
293
+
294
+ def apply_style(
295
+ style_name: str, positive: str, negative: str = ""
296
+ ) -> Tuple[str, str]:
297
+ p, n = styles.get(style_name, styles[DEFAULT_STYLE_NAME])
298
+ return p.replace("{prompt}", positive), n + " " + negative
299
+
300
+ def generate_image(
301
+ face_image_path,
302
+ pose_image_path,
303
+ prompt,
304
+ negative_prompt,
305
+ style_name,
306
+ num_steps,
307
+ identitynet_strength_ratio,
308
+ adapter_strength_ratio,
309
+ pose_strength,
310
+ canny_strength,
311
+ depth_strength,
312
+ controlnet_selection,
313
+ guidance_scale,
314
+ seed,
315
+ scheduler,
316
+ enable_LCM,
317
+ enhance_face_region,
318
+ progress=gr.Progress(track_tqdm=True),
319
+ ):
320
+
321
+ if enable_LCM:
322
+ pipe.scheduler = diffusers.LCMScheduler.from_config(pipe.scheduler.config)
323
+ pipe.enable_lora()
324
+ else:
325
+ pipe.disable_lora()
326
+ scheduler_class_name = scheduler.split("-")[0]
327
+
328
+ add_kwargs = {}
329
+ if len(scheduler.split("-")) > 1:
330
+ add_kwargs["use_karras_sigmas"] = True
331
+ if len(scheduler.split("-")) > 2:
332
+ add_kwargs["algorithm_type"] = "sde-dpmsolver++"
333
+ scheduler = getattr(diffusers, scheduler_class_name)
334
+ pipe.scheduler = scheduler.from_config(pipe.scheduler.config, **add_kwargs)
335
+
336
+ if face_image_path is None:
337
+ raise gr.Error(
338
+ f"Cannot find any input face image! Please upload the face image"
339
+ )
340
+
341
+ if prompt is None:
342
+ prompt = "a person"
343
+
344
+ # apply the style template
345
+ prompt, negative_prompt = apply_style(style_name, prompt, negative_prompt)
346
+
347
+ face_image = load_image(face_image_path)
348
+ face_image = resize_img(face_image, max_side=1024)
349
+ face_image_cv2 = convert_from_image_to_cv2(face_image)
350
+ height, width, _ = face_image_cv2.shape
351
+
352
+ # Extract face features
353
+ face_info = app.get(face_image_cv2)
354
+
355
+ if len(face_info) == 0:
356
+ raise gr.Error(
357
+ f"Unable to detect a face in the image. Please upload a different photo with a clear face."
358
+ )
359
+
360
+ face_info = sorted(face_info, key=lambda x:(x['bbox'][2]-x['bbox'][0])*(x['bbox'][3]-x['bbox'][1]))[-1] # only use the maximum face
361
+ face_emb = face_info["embedding"]
362
+ face_kps = draw_kps(convert_from_cv2_to_image(face_image_cv2), face_info["kps"])
363
+ img_controlnet = face_image
364
+ if pose_image_path is not None:
365
+ pose_image = load_image(pose_image_path)
366
+ pose_image = resize_img(pose_image, max_side=1024)
367
+ img_controlnet = pose_image
368
+ pose_image_cv2 = convert_from_image_to_cv2(pose_image)
369
+
370
+ face_info = app.get(pose_image_cv2)
371
+
372
+ if len(face_info) == 0:
373
+ raise gr.Error(
374
+ f"Cannot find any face in the reference image! Please upload another person image"
375
+ )
376
+
377
+ face_info = face_info[-1]
378
+ face_kps = draw_kps(pose_image, face_info["kps"])
379
+
380
+ width, height = face_kps.size
381
+
382
+ if enhance_face_region:
383
+ control_mask = np.zeros([height, width, 3])
384
+ x1, y1, x2, y2 = face_info["bbox"]
385
+ x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
386
+ control_mask[y1:y2, x1:x2] = 255
387
+ control_mask = Image.fromarray(control_mask.astype(np.uint8))
388
+ else:
389
+ control_mask = None
390
+
391
+ if len(controlnet_selection) > 0:
392
+ controlnet_scales = {
393
+ "pose": pose_strength,
394
+ "canny": canny_strength,
395
+ "depth": depth_strength,
396
+ }
397
+ pipe.controlnet = MultiControlNetModel(
398
+ [controlnet_identitynet]
399
+ + [controlnet_map[s] for s in controlnet_selection]
400
+ )
401
+ control_scales = [float(identitynet_strength_ratio)] + [
402
+ controlnet_scales[s] for s in controlnet_selection
403
+ ]
404
+ control_images = [face_kps] + [
405
+ controlnet_map_fn[s](img_controlnet).resize((width, height))
406
+ for s in controlnet_selection
407
+ ]
408
+ else:
409
+ pipe.controlnet = controlnet_identitynet
410
+ control_scales = float(identitynet_strength_ratio)
411
+ control_images = face_kps
412
+
413
+ generator = torch.Generator(device=device).manual_seed(seed)
414
+
415
+ print("Start inference...")
416
+ print(f"[Debug] Prompt: {prompt}, \n[Debug] Neg Prompt: {negative_prompt}")
417
+
418
+ pipe.set_ip_adapter_scale(adapter_strength_ratio)
419
+ images = pipe(
420
+ prompt=prompt,
421
+ negative_prompt=negative_prompt,
422
+ image_embeds=face_emb,
423
+ image=control_images,
424
+ control_mask=control_mask,
425
+ controlnet_conditioning_scale=control_scales,
426
+ num_inference_steps=num_steps,
427
+ guidance_scale=guidance_scale,
428
+ height=height,
429
+ width=width,
430
+ generator=generator,
431
+ ).images
432
+
433
+ return images[0], gr.update(visible=True)
434
+
435
+ # Description
436
+ title = r"""
437
+ <h1 align="center">InstantID: Zero-shot Identity-Preserving Generation in Seconds</h1>
438
+ """
439
+
440
+ description = r"""
441
+ <b>Official 🤗 Gradio demo</b> for <a href='https://github.com/InstantID/InstantID' target='_blank'><b>InstantID: Zero-shot Identity-Preserving Generation in Seconds</b></a>.<br>
442
+
443
+ How to use:<br>
444
+ 1. Upload an image with a face. For images with multiple faces, we will only detect the largest face. Ensure the face is not too small and is clearly visible without significant obstructions or blurring.
445
+ 2. (Optional) You can upload another image as a reference for the face pose. If you don't, we will use the first detected face image to extract facial landmarks. If you use a cropped face at step 1, it is recommended to upload it to define a new face pose.
446
+ 3. (Optional) You can select multiple ControlNet models to control the generation process. The default is to use the IdentityNet only. The ControlNet models include pose skeleton, canny, and depth. You can adjust the strength of each ControlNet model to control the generation process.
447
+ 4. Enter a text prompt, as done in normal text-to-image models.
448
+ 5. Click the <b>Submit</b> button to begin customization.
449
+ 6. Share your customized photo with your friends and enjoy! 😊"""
450
+
451
+ article = r"""
452
+ ---
453
+ 📝 **Citation**
454
+ <br>
455
+ If our work is helpful for your research or applications, please cite us via:
456
+ ```bibtex
457
+ @article{wang2024instantid,
458
+ title={InstantID: Zero-shot Identity-Preserving Generation in Seconds},
459
+ author={Wang, Qixun and Bai, Xu and Wang, Haofan and Qin, Zekui and Chen, Anthony},
460
+ journal={arXiv preprint arXiv:2401.07519},
461
+ year={2024}
462
+ }
463
+ ```
464
+ 📧 **Contact**
465
+ <br>
466
+ If you have any questions, please feel free to open an issue or directly reach us out at <b>haofanwang.ai@gmail.com</b>.
467
+ """
468
+
469
+ tips = r"""
470
+ ### Usage tips of InstantID
471
+ 1. If you're not satisfied with the similarity, try increasing the weight of "IdentityNet Strength" and "Adapter Strength."
472
+ 2. If you feel that the saturation is too high, first decrease the Adapter strength. If it remains too high, then decrease the IdentityNet strength.
473
+ 3. If you find that text control is not as expected, decrease Adapter strength.
474
+ 4. If you find that realistic style is not good enough, go for our Github repo and use a more realistic base model.
475
+ """
476
+
477
+ css = """
478
+ .gradio-container {width: 85% !important}
479
+ """
480
+ with gr.Blocks(css=css) as demo:
481
+ # description
482
+ gr.Markdown(title)
483
+ gr.Markdown(description)
484
+
485
+ with gr.Row():
486
+ with gr.Column():
487
+ with gr.Row(equal_height=True):
488
+ # upload face image
489
+ face_file = gr.Image(
490
+ label="Upload a photo of your face", type="filepath"
491
+ )
492
+ # optional: upload a reference pose image
493
+ pose_file = gr.Image(
494
+ label="Upload a reference pose image (Optional)",
495
+ type="filepath",
496
+ )
497
+
498
+ # prompt
499
+ prompt = gr.Textbox(
500
+ label="Prompt",
501
+ info="Give simple prompt is enough to achieve good face fidelity",
502
+ placeholder="A photo of a person",
503
+ value="",
504
+ )
505
+
506
+ submit = gr.Button("Submit", variant="primary")
507
+ enable_LCM = gr.Checkbox(
508
+ label="Enable Fast Inference with LCM", value=enable_lcm_arg,
509
+ info="LCM speeds up the inference step, the trade-off is the quality of the generated image. It performs better with portrait face images rather than distant faces",
510
+ )
511
+ style = gr.Dropdown(
512
+ label="Style template",
513
+ choices=STYLE_NAMES,
514
+ value=DEFAULT_STYLE_NAME,
515
+ )
516
+
517
+ # strength
518
+ identitynet_strength_ratio = gr.Slider(
519
+ label="IdentityNet strength (for fidelity)",
520
+ minimum=0,
521
+ maximum=1.5,
522
+ step=0.05,
523
+ value=0.80,
524
+ )
525
+ adapter_strength_ratio = gr.Slider(
526
+ label="Image adapter strength (for detail)",
527
+ minimum=0,
528
+ maximum=1.5,
529
+ step=0.05,
530
+ value=0.80,
531
+ )
532
+ with gr.Accordion("Controlnet"):
533
+ controlnet_selection = gr.CheckboxGroup(
534
+ ["pose", "canny", "depth"], label="Controlnet", value=["pose"],
535
+ info="Use pose for skeleton inference, canny for edge detection, and depth for depth map estimation. You can try all three to control the generation process"
536
+ )
537
+ pose_strength = gr.Slider(
538
+ label="Pose strength",
539
+ minimum=0,
540
+ maximum=1.5,
541
+ step=0.05,
542
+ value=0.40,
543
+ )
544
+ canny_strength = gr.Slider(
545
+ label="Canny strength",
546
+ minimum=0,
547
+ maximum=1.5,
548
+ step=0.05,
549
+ value=0.40,
550
+ )
551
+ depth_strength = gr.Slider(
552
+ label="Depth strength",
553
+ minimum=0,
554
+ maximum=1.5,
555
+ step=0.05,
556
+ value=0.40,
557
+ )
558
+ with gr.Accordion(open=False, label="Advanced Options"):
559
+ negative_prompt = gr.Textbox(
560
+ label="Negative Prompt",
561
+ placeholder="low quality",
562
+ value="(lowres, low quality, worst quality:1.2), (text:1.2), watermark, (frame:1.2), deformed, ugly, deformed eyes, blur, out of focus, blurry, deformed cat, deformed, photo, anthropomorphic cat, monochrome, pet collar, gun, weapon, blue, 3d, drones, drone, buildings in background, green",
563
+ )
564
+ num_steps = gr.Slider(
565
+ label="Number of sample steps",
566
+ minimum=1,
567
+ maximum=100,
568
+ step=1,
569
+ value=5 if enable_lcm_arg else 30,
570
+ )
571
+ guidance_scale = gr.Slider(
572
+ label="Guidance scale",
573
+ minimum=0.1,
574
+ maximum=20.0,
575
+ step=0.1,
576
+ value=0.0 if enable_lcm_arg else 5.0,
577
+ )
578
+ seed = gr.Slider(
579
+ label="Seed",
580
+ minimum=0,
581
+ maximum=MAX_SEED,
582
+ step=1,
583
+ value=42,
584
+ )
585
+ schedulers = [
586
+ "DEISMultistepScheduler",
587
+ "HeunDiscreteScheduler",
588
+ "EulerDiscreteScheduler",
589
+ "DPMSolverMultistepScheduler",
590
+ "DPMSolverMultistepScheduler-Karras",
591
+ "DPMSolverMultistepScheduler-Karras-SDE",
592
+ ]
593
+ scheduler = gr.Dropdown(
594
+ label="Schedulers",
595
+ choices=schedulers,
596
+ value="EulerDiscreteScheduler",
597
+ )
598
+ randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
599
+ enhance_face_region = gr.Checkbox(label="Enhance non-face region", value=True)
600
+
601
+ with gr.Column(scale=1):
602
+ gallery = gr.Image(label="Generated Images")
603
+ usage_tips = gr.Markdown(
604
+ label="InstantID Usage Tips", value=tips, visible=False
605
+ )
606
+
607
+ submit.click(
608
+ fn=remove_tips,
609
+ outputs=usage_tips,
610
+ ).then(
611
+ fn=randomize_seed_fn,
612
+ inputs=[seed, randomize_seed],
613
+ outputs=seed,
614
+ queue=False,
615
+ api_name=False,
616
+ ).then(
617
+ fn=generate_image,
618
+ inputs=[
619
+ face_file,
620
+ pose_file,
621
+ prompt,
622
+ negative_prompt,
623
+ style,
624
+ num_steps,
625
+ identitynet_strength_ratio,
626
+ adapter_strength_ratio,
627
+ pose_strength,
628
+ canny_strength,
629
+ depth_strength,
630
+ controlnet_selection,
631
+ guidance_scale,
632
+ seed,
633
+ scheduler,
634
+ enable_LCM,
635
+ enhance_face_region,
636
+ ],
637
+ outputs=[gallery, usage_tips],
638
+ )
639
+
640
+ enable_LCM.input(
641
+ fn=toggle_lcm_ui,
642
+ inputs=[enable_LCM],
643
+ outputs=[num_steps, guidance_scale],
644
+ queue=False,
645
+ )
646
+
647
+ gr.Examples(
648
+ examples=get_example(),
649
+ inputs=[face_file, pose_file, prompt, style, negative_prompt],
650
+ fn=run_for_examples,
651
+ outputs=[gallery, usage_tips],
652
+ cache_examples=True,
653
+ )
654
+
655
+ gr.Markdown(article)
656
+
657
+ demo.launch()
658
+
659
+
660
+ if __name__ == "__main__":
661
+ parser = argparse.ArgumentParser()
662
+ parser.add_argument(
663
+ "--pretrained_model_name_or_path", type=str, default="wangqixun/YamerMIX_v8"
664
+ )
665
+ parser.add_argument(
666
+ "--enable_LCM", type=bool, default=os.environ.get("ENABLE_LCM", False)
667
+ )
668
+ args = parser.parse_args()
669
+
670
+ main(args.pretrained_model_name_or_path, args.enable_LCM)
gradio_demo/app.py ADDED
@@ -0,0 +1,656 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ sys.path.append('./')
3
+
4
+ from typing import Tuple
5
+
6
+ import os
7
+ import cv2
8
+ import math
9
+ import torch
10
+ import random
11
+ import numpy as np
12
+ import argparse
13
+ import pandas as pd
14
+
15
+ import PIL
16
+ from PIL import Image
17
+
18
+ import diffusers
19
+ from diffusers.utils import load_image
20
+ from diffusers.models import ControlNetModel
21
+ from diffusers import LCMScheduler
22
+
23
+ from huggingface_hub import hf_hub_download
24
+
25
+ import insightface
26
+ from insightface.app import FaceAnalysis
27
+
28
+ from style_template import styles
29
+ from pipeline_stable_diffusion_xl_instantid_full import StableDiffusionXLInstantIDPipeline
30
+ from model_util import load_models_xl, get_torch_device, torch_gc
31
+
32
+
33
+ # global variable
34
+ MAX_SEED = np.iinfo(np.int32).max
35
+ device = get_torch_device()
36
+ dtype = torch.float16 if str(device).__contains__("cuda") else torch.float32
37
+ STYLE_NAMES = list(styles.keys())
38
+ DEFAULT_STYLE_NAME = "Watercolor"
39
+
40
+ # Load face encoder
41
+ app = FaceAnalysis(name='antelopev2', root='./', providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
42
+ app.prepare(ctx_id=0, det_size=(640, 640))
43
+
44
+ # Path to InstantID models
45
+ face_adapter = f'./checkpoints/ip-adapter.bin'
46
+ controlnet_path = f'./checkpoints/ControlNetModel'
47
+
48
+ # Load pipeline
49
+ controlnet = ControlNetModel.from_pretrained(controlnet_path, torch_dtype=dtype)
50
+
51
+ logo = Image.open("./gradio_demo/watermark.png")
52
+ logo = logo.resize((100, 70))
53
+
54
+ from cv2 import imencode
55
+ import base64
56
+
57
+ # def encode_pil_to_base64_new(pil_image):
58
+ # print("AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA")
59
+ # image_arr = np.asarray(pil_image)[:,:,::-1]
60
+ # _, byte_data = imencode('.png', image_arr)
61
+ # base64_data = base64.b64encode(byte_data)
62
+ # base64_string_opencv = base64_data.decode("utf-8")
63
+ # return "data:image/png;base64," + base64_string_opencv
64
+
65
+ import gradio as gr
66
+
67
+ # gr.processing_utils.encode_pil_to_base64 = encode_pil_to_base64_new
68
+
69
+ def main(pretrained_model_name_or_path="wangqixun/YamerMIX_v8", enable_lcm_arg=False):
70
+
71
+ if pretrained_model_name_or_path.endswith(
72
+ ".ckpt"
73
+ ) or pretrained_model_name_or_path.endswith(".safetensors"):
74
+ scheduler_kwargs = hf_hub_download(
75
+ repo_id="wangqixun/YamerMIX_v8",
76
+ subfolder="scheduler",
77
+ filename="scheduler_config.json",
78
+ )
79
+
80
+ (tokenizers, text_encoders, unet, _, vae) = load_models_xl(
81
+ pretrained_model_name_or_path=pretrained_model_name_or_path,
82
+ scheduler_name=None,
83
+ weight_dtype=dtype,
84
+ )
85
+
86
+ scheduler = diffusers.EulerDiscreteScheduler.from_config(scheduler_kwargs)
87
+ pipe = StableDiffusionXLInstantIDPipeline(
88
+ vae=vae,
89
+ text_encoder=text_encoders[0],
90
+ text_encoder_2=text_encoders[1],
91
+ tokenizer=tokenizers[0],
92
+ tokenizer_2=tokenizers[1],
93
+ unet=unet,
94
+ scheduler=scheduler,
95
+ controlnet=controlnet,
96
+ ).to(device)
97
+
98
+ else:
99
+ pipe = StableDiffusionXLInstantIDPipeline.from_pretrained(
100
+ pretrained_model_name_or_path,
101
+ controlnet=controlnet,
102
+ torch_dtype=dtype,
103
+ safety_checker=None,
104
+ feature_extractor=None,
105
+ ).to(device)
106
+
107
+ pipe.scheduler = diffusers.EulerDiscreteScheduler.from_config(pipe.scheduler.config)
108
+
109
+ pipe.load_ip_adapter_instantid(face_adapter)
110
+ # load and disable LCM
111
+ pipe.load_lora_weights("latent-consistency/lcm-lora-sdxl")
112
+ pipe.disable_lora()
113
+
114
+ def remove_tips():
115
+ print("GG")
116
+ return gr.update(visible=False)
117
+
118
+
119
+ # prompts = [
120
+ # ["superman","Vibrant Color"], ["japanese anime character with white/neon hair","Watercolor"],
121
+ # # ["Suited professional","(No style)"],
122
+ # ["Scooba diver","Line art"], ["eskimo","Snow"]
123
+ # ]
124
+
125
+ def convert_from_cv2_to_image(img: np.ndarray) -> Image:
126
+ return Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
127
+
128
+ def convert_from_image_to_cv2(img: Image) -> np.ndarray:
129
+ return cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
130
+
131
+ def run_for_prompts1(face_file,style,progress=gr.Progress(track_tqdm=True)):
132
+ # if email != "":
133
+ p,n = styles.get(style, styles.get(STYLE_NAMES[1]))
134
+ return generate_image(face_file, p[0], n)
135
+ # else:
136
+ # raise gr.Error("Email ID is compulsory")
137
+ def run_for_prompts2(face_file,style,progress=gr.Progress(track_tqdm=True)):
138
+ # if email != "":
139
+ p,n = styles.get(style, styles.get(STYLE_NAMES[1]))
140
+ return generate_image(face_file, p[1], n)
141
+ def run_for_prompts3(face_file,style,progress=gr.Progress(track_tqdm=True)):
142
+ # if email != "":
143
+ p,n = styles.get(style, styles.get(STYLE_NAMES[1]))
144
+ return generate_image(face_file, p[2], n)
145
+ def run_for_prompts4(face_file,style,progress=gr.Progress(track_tqdm=True)):
146
+ # if email != "":
147
+ p,n = styles.get(style, styles.get(STYLE_NAMES[1]))
148
+ return generate_image(face_file, p[3], n)
149
+
150
+ # def validate_and_process(face_file, style, email):
151
+
152
+ # # Your processing logic here
153
+ # gallery1, gallery2, gallery3, gallery4 = run_for_prompts1(face_file, style), run_for_prompts2(face_file, style), run_for_prompts3(face_file, style), run_for_prompts4(face_file, style)
154
+ # return gallery1, gallery2, gallery3, gallery4
155
+
156
+ def draw_kps(image_pil, kps, color_list=[(255,0,0), (0,255,0), (0,0,255), (255,255,0), (255,0,255)]):
157
+ stickwidth = 4
158
+ limbSeq = np.array([[0, 2], [1, 2], [3, 2], [4, 2]])
159
+ kps = np.array(kps)
160
+
161
+ w, h = image_pil.size
162
+ out_img = np.zeros([h, w, 3])
163
+
164
+ for i in range(len(limbSeq)):
165
+ index = limbSeq[i]
166
+ color = color_list[index[0]]
167
+
168
+ x = kps[index][:, 0]
169
+ y = kps[index][:, 1]
170
+ length = ((x[0] - x[1]) ** 2 + (y[0] - y[1]) ** 2) ** 0.5
171
+ angle = math.degrees(math.atan2(y[0] - y[1], x[0] - x[1]))
172
+ polygon = cv2.ellipse2Poly((int(np.mean(x)), int(np.mean(y))), (int(length / 2), stickwidth), int(angle), 0, 360, 1)
173
+ out_img = cv2.fillConvexPoly(out_img.copy(), polygon, color)
174
+ out_img = (out_img * 0.6).astype(np.uint8)
175
+
176
+ for idx_kp, kp in enumerate(kps):
177
+ color = color_list[idx_kp]
178
+ x, y = kp
179
+ out_img = cv2.circle(out_img.copy(), (int(x), int(y)), 10, color, -1)
180
+
181
+ out_img_pil = Image.fromarray(out_img.astype(np.uint8))
182
+ return out_img_pil
183
+
184
+ def resize_img(input_image, max_side=1280, min_side=1280, size=None,
185
+ pad_to_max_side=True, mode=PIL.Image.BILINEAR, base_pixel_number=64):
186
+
187
+ w, h = input_image.size
188
+ print(f"Original Size --> {input_image.size}")
189
+ if size is not None:
190
+ w_resize_new, h_resize_new = size
191
+ else:
192
+ ratio = min_side / min(h, w)
193
+ w, h = round(ratio*w), round(ratio*h)
194
+ ratio = max_side / max(h, w)
195
+ input_image = input_image.resize([round(ratio*w), round(ratio*h)], mode)
196
+ w_resize_new = (round(ratio * w) // base_pixel_number) * base_pixel_number
197
+ h_resize_new = (round(ratio * h) // base_pixel_number) * base_pixel_number
198
+ input_image = input_image.resize([w_resize_new, h_resize_new], mode)
199
+
200
+ if pad_to_max_side:
201
+ res = np.ones([max_side, max_side, 3], dtype=np.uint8) * 255
202
+ offset_x = (max_side - w_resize_new) // 2
203
+ offset_y = (max_side - h_resize_new) // 2
204
+ res[offset_y:offset_y+h_resize_new, offset_x:offset_x+w_resize_new] = np.array(input_image)
205
+ input_image = Image.fromarray(res)
206
+
207
+ print(f"Final modified image size --> {input_image.size}")
208
+ return input_image
209
+
210
+ # def apply_style(style_name: str, positive: str, negative: str = "") -> Tuple[str, str]:
211
+ # p, n = styles.get(style_name, styles[DEFAULT_STYLE_NAME])
212
+ # return p.replace("{prompt}", positive), n + ' ' + negative
213
+
214
+ def store_images(email, gallery1, gallery2, gallery3, gallery4,consent):
215
+ if not consent:
216
+ raise gr.Error("Consent not provided")
217
+ galleries = []
218
+ for i, img in enumerate([gallery1, gallery2, gallery3, gallery4], start=1):
219
+ if isinstance(img, np.ndarray):
220
+ img = Image.fromarray(img)
221
+ print(f"Gallery {i} type after conversion: {type(img)}")
222
+ galleries.append(img)
223
+ # Create the images directory if it doesn't exist
224
+ if not os.path.exists('images'):
225
+ os.makedirs('images')
226
+
227
+ # Define image file paths
228
+ image_paths = []
229
+ for i, img in enumerate(galleries, start=1):
230
+ img_path = f'images/{email}_gallery{i}.png'
231
+ img.save(img_path)
232
+ image_paths.append(img_path)
233
+
234
+ # Define the CSV file path
235
+ csv_file_path = 'image_data.csv'
236
+
237
+ # Create a DataFrame for the email and image paths
238
+ df = pd.DataFrame({
239
+ 'email': [email],
240
+ 'img1_path': [image_paths[0]],
241
+ 'img2_path': [image_paths[1]],
242
+ 'img3_path': [image_paths[2]],
243
+ 'img4_path': [image_paths[3]],
244
+ })
245
+
246
+ # Write to CSV (append if the file exists, create a new one if it doesn't)
247
+ if not os.path.isfile(csv_file_path):
248
+ df.to_csv(csv_file_path, index=False)
249
+ else:
250
+ df.to_csv(csv_file_path, mode='a', header=False, index=False)
251
+
252
+ gr.Info("Thankyou!! Your avatar is on the way to your inbox")
253
+
254
+ def add_watermark(image, watermark=logo, opacity=128, position="bottom_right", padding=10):
255
+ # Convert NumPy array to PIL Image if needed
256
+ if isinstance(image, np.ndarray):
257
+ image = Image.fromarray(image)
258
+
259
+ if isinstance(watermark, np.ndarray):
260
+ watermark = Image.fromarray(watermark)
261
+
262
+ # Convert images to 'RGBA' mode to handle transparency
263
+ image = image.convert("RGBA")
264
+ watermark = watermark.convert("RGBA")
265
+
266
+ # Adjust the watermark opacity
267
+ watermark = watermark.copy()
268
+ watermark.putalpha(opacity)
269
+
270
+ # Calculate the position for the watermark
271
+ if position == "bottom_right":
272
+ x = image.width - watermark.width - padding
273
+ y = image.height - watermark.height - padding
274
+ elif position == "bottom_left":
275
+ x = padding
276
+ y = image.height - watermark.height - padding
277
+ elif position == "top_right":
278
+ x = image.width - watermark.width - padding
279
+ y = padding
280
+ elif position == "top_left":
281
+ x = padding
282
+ y = padding
283
+ else:
284
+ raise ValueError("Unsupported position. Choose from 'bottom_right', 'bottom_left', 'top_right', 'top_left'.")
285
+
286
+ # Paste the watermark onto the image
287
+ image.paste(watermark, (x, y), watermark)
288
+
289
+ # Convert back to 'RGB' if the original image was not 'RGBA'
290
+ if image.mode != "RGBA":
291
+ image = image.convert("RGB")
292
+
293
+ # return resize_img(image)
294
+ return image
295
+
296
+ def generate_image(face_image,prompt,negative_prompt):
297
+ pose_image_path = None
298
+ # prompt = "superman"
299
+ enable_LCM = False
300
+ identitynet_strength_ratio = 0.90
301
+ adapter_strength_ratio = 0.60
302
+ num_steps = 15
303
+ guidance_scale = 5
304
+ seed = random.randint(0, MAX_SEED)
305
+ print(f"Seed --> {seed}")
306
+
307
+ # negative_prompt = ""
308
+ # negative_prompt += neg
309
+ enhance_face_region = True
310
+ if enable_LCM:
311
+ pipe.enable_lora()
312
+ pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
313
+ else:
314
+ pipe.disable_lora()
315
+ pipe.scheduler = diffusers.EulerDiscreteScheduler.from_config(pipe.scheduler.config)
316
+
317
+ if face_image is None:
318
+ raise gr.Error(f"Cannot find any input face image! Please upload the face image")
319
+
320
+ # if prompt is None:
321
+ # prompt = "a person"
322
+
323
+ # apply the style template
324
+ # prompt, negative_prompt = apply_style(style_name, prompt, negative_prompt)
325
+
326
+ # face_image = load_image(face_image_path)
327
+ face_image = resize_img(face_image)
328
+ face_image_cv2 = convert_from_image_to_cv2(face_image)
329
+ height, width, _ = face_image_cv2.shape
330
+
331
+ # Extract face features
332
+ face_info = app.get(face_image_cv2)
333
+
334
+ if len(face_info) == 0:
335
+ raise gr.Error(f"Cannot find any face in the image! Please upload another person image")
336
+
337
+ face_info = sorted(face_info, key=lambda x:(x['bbox'][2]-x['bbox'][0])*(x['bbox'][3]-x['bbox'][1]))[-1] # only use the maximum face
338
+ face_emb = face_info['embedding']
339
+ face_kps = draw_kps(convert_from_cv2_to_image(face_image_cv2), face_info['kps'])
340
+
341
+ if pose_image_path is not None:
342
+ pose_image = load_image(pose_image_path)
343
+ pose_image = resize_img(pose_image)
344
+ pose_image_cv2 = convert_from_image_to_cv2(pose_image)
345
+
346
+ face_info = app.get(pose_image_cv2)
347
+
348
+ if len(face_info) == 0:
349
+ raise gr.Error(f"Cannot find any face in the reference image! Please upload another person image")
350
+
351
+ face_info = face_info[-1]
352
+ face_kps = draw_kps(pose_image, face_info['kps'])
353
+
354
+ width, height = face_kps.size
355
+
356
+ if enhance_face_region:
357
+ control_mask = np.zeros([height, width, 3])
358
+ x1, y1, x2, y2 = face_info["bbox"]
359
+ x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
360
+ control_mask[y1:y2, x1:x2] = 255
361
+ control_mask = Image.fromarray(control_mask.astype(np.uint8))
362
+ else:
363
+ control_mask = None
364
+
365
+ generator = torch.Generator(device=device).manual_seed(seed)
366
+
367
+ print("Start inference...")
368
+ print(f"[Debug] Prompt: {prompt}, \n[Debug] Neg Prompt: {negative_prompt}")
369
+
370
+ pipe.set_ip_adapter_scale(adapter_strength_ratio)
371
+ images = pipe(
372
+ prompt=prompt,
373
+ negative_prompt=negative_prompt,
374
+ image_embeds=face_emb,
375
+ image=face_kps,
376
+ control_mask=control_mask,
377
+ controlnet_conditioning_scale=float(identitynet_strength_ratio),
378
+ num_inference_steps=num_steps,
379
+ guidance_scale=guidance_scale,
380
+ height=height,
381
+ width=width,
382
+ generator=generator,
383
+ # num_images_per_prompt = 4
384
+ ).images
385
+
386
+ watermarked_image = add_watermark(images[0])
387
+
388
+ # return images[0]
389
+ return watermarked_image
390
+
391
+ ### Description
392
+ title = r"""
393
+ <h1 align="center" style="color:white;">Choose your AVATAR</h1>
394
+ """
395
+
396
+ description = r"""
397
+ <h2 style="color:white;"> Powered by IDfy </h2>"""
398
+
399
+ article = r""""""
400
+
401
+ tips = r""""""
402
+ # css = '''
403
+ # .gradio-container {
404
+ # width: 95% !important;
405
+ # background-image: url('./InstantID/gradio_demo/logo.png');
406
+ # background-size: cover;
407
+ # background-position: center;
408
+ # }
409
+ # .image-gallery {
410
+ # height: 100vh !important;
411
+ # overflow: auto;
412
+ # }
413
+ # .gradio-row .gradio-element {
414
+ # margin: 0 !important;
415
+ # }
416
+ # '''
417
+ css = '''
418
+ .gradio-container {width: 100% !important; color: white; background: linear-gradient(135deg, #1C43B9, #254977, #343434);}
419
+ .gradio-row .gradio-element { margin: 0 !important; }
420
+ .centered-column {
421
+ display: flex;
422
+ justify-content: center;
423
+ align-items: center;
424
+ width: 100%;}
425
+ #store-btn {
426
+ background: #f2bb13 !important;
427
+ color: white !important;
428
+ }
429
+ '''
430
+ with gr.Blocks(css=css) as demo:
431
+
432
+ # description
433
+ gr.Markdown(title)
434
+ with gr.Column():
435
+ with gr.Row():
436
+ gr.Image("./gradio_demo/logo.png", scale=0, min_width=50, show_label=False, show_download_button=False)
437
+ gr.Markdown(description)
438
+ style = gr.Dropdown(label="Choose your STYLE", choices=STYLE_NAMES)
439
+ with gr.Row(equal_height=True): # Center the face file
440
+ with gr.Column(elem_id="centered-face", elem_classes=["centered-column"]): # Use CSS class for centering
441
+ face_file = gr.Image(label="Upload a photo of your face", type="pil", sources="webcam", height=400, width=500)
442
+ # submit = gr.Button("Submit", variant="primary")
443
+ with gr.Column():
444
+ with gr.Row():
445
+ gallery1 = gr.Image(label="Generated Images")
446
+ gallery2 = gr.Image(label="Generated Images")
447
+ with gr.Row():
448
+ gallery3 = gr.Image(label="Generated Images")
449
+ gallery4 = gr.Image(label="Generated Images")
450
+ email = gr.Textbox(label="Email", info="Enter your email address", value="")
451
+ consent = gr.Checkbox(label="I am giving my consent to use my data to share my AI Avtar and IDfy relevant information from time to time")
452
+ submit1 = gr.Button("STORE",elem_id="store-btn")
453
+ # with gr.Blocks(css=css) as demo:
454
+
455
+ # # description
456
+ # gr.Markdown(title)
457
+ # with gr.Column():
458
+ # with gr.Row():
459
+ # gr.Image("./gradio_demo/logo.png",scale=0,min_width=50,show_label=False,show_download_button=False)
460
+ # gr.Markdown(description)
461
+ # style = gr.Dropdown(label="Choose your STYLE", choices=STYLE_NAMES)
462
+ # face_file = gr.Image(label="Upload a photo of your face", type="pil",sources="webcam", height=400, width=500)
463
+ # submit = gr.Button("Submit", variant="primary")
464
+ # with gr.Column():
465
+ # with gr.Row():
466
+ # gallery1 = gr.Image(label="Generated Images")
467
+ # gallery2 = gr.Image(label="Generated Images")
468
+ # with gr.Row():
469
+ # gallery3 = gr.Image(label="Generated Images")
470
+ # gallery4 = gr.Image(label="Generated Images")
471
+ # email = gr.Textbox(label="Email",
472
+ # info="Enter your email address",
473
+ # value="")
474
+ # consent = gr.Checkbox(label="I am giving my consent to use my data to share my AI Avtar and IDfy relevant information from time to time")
475
+ # submit1 = gr.Button("STORE", variant="primary")
476
+ # # submit1 = gr.Button("Store")
477
+ usage_tips = gr.Markdown(label="Usage tips of InstantID", value=tips ,visible=False)
478
+
479
+ face_file.upload(
480
+ fn=remove_tips,
481
+ outputs=usage_tips,
482
+ queue=True,
483
+ api_name=False,
484
+ show_progress = "full"
485
+ ).then(
486
+ fn=run_for_prompts1,
487
+ inputs=[face_file,style],
488
+ outputs=[gallery1]
489
+ ).then(
490
+ fn=run_for_prompts2,
491
+ inputs=[face_file,style],
492
+ outputs=[gallery2]
493
+ ).then(
494
+ fn=run_for_prompts3,
495
+ inputs=[face_file,style],
496
+ outputs=[gallery3]
497
+ ).then(
498
+ fn=run_for_prompts4,
499
+ inputs=[face_file,style],
500
+ outputs=[gallery4]
501
+ )
502
+ # submit.click(
503
+ # fn=remove_tips,
504
+ # outputs=usage_tips,
505
+ # queue=True,
506
+ # api_name=False,
507
+ # show_progress = "full"
508
+ # ).then(
509
+ # fn=run_for_prompts1,
510
+ # inputs=[face_file,style],
511
+ # outputs=[gallery1]
512
+ # ).then(
513
+ # fn=run_for_prompts2,
514
+ # inputs=[face_file,style],
515
+ # outputs=[gallery2]
516
+ # ).then(
517
+ # fn=run_for_prompts3,
518
+ # inputs=[face_file,style],
519
+ # outputs=[gallery3]
520
+ # ).then(
521
+ # fn=run_for_prompts4,
522
+ # inputs=[face_file,style],
523
+ # outputs=[gallery4]
524
+ # )
525
+
526
+ # submit1.click(
527
+ # fn=store_images,
528
+ # inputs=[email,gallery1,gallery2,gallery3,gallery4,consent],
529
+ # outputs=None)
530
+
531
+
532
+
533
+ gr.Markdown(article)
534
+
535
+ demo.launch(share=True)
536
+
537
+ # with gr.Blocks(css=css, js=js) as demo:
538
+
539
+ # # description
540
+ # gr.Markdown(title)
541
+ # with gr.Row():
542
+ # gr.Image("./gradio_demo/logo.png",scale=0,min_width=50,show_label=False,show_download_button=False)
543
+ # gr.Markdown(description)
544
+ # with gr.Row():
545
+ # with gr.Column():
546
+ # style = gr.Dropdown(label="Choose your STYLE", choices=STYLE_NAMES)
547
+ # face_file = gr.Image(label="Upload a photo of your face", type="pil",sources="webcam")
548
+ # submit = gr.Button("Submit", variant="primary")
549
+ # with gr.Column():
550
+ # with gr.Row():
551
+ # gallery1 = gr.Image(label="Generated Images")
552
+ # gallery2 = gr.Image(label="Generated Images")
553
+ # with gr.Row():
554
+ # gallery3 = gr.Image(label="Generated Images")
555
+ # gallery4 = gr.Image(label="Generated Images")
556
+ # email = gr.Textbox(label="Email",
557
+ # info="Enter your email address",
558
+ # value="")
559
+
560
+ # usage_tips = gr.Markdown(label="Usage tips of InstantID", value=tips ,visible=False)
561
+ # # identitynet_strength_ratio = gr.Slider(
562
+ # # label="IdentityNet strength (for fidelity)",
563
+ # # minimum=0,
564
+ # # maximum=1.5,
565
+ # # step=0.05,
566
+ # # value=0.95,
567
+ # # )
568
+ # # adapter_strength_ratio = gr.Slider(
569
+ # # label="Image adapter strength (for detail)",
570
+ # # minimum=0,
571
+ # # maximum=1.5,
572
+ # # step=0.05,
573
+ # # value=0.60,
574
+ # # )
575
+ # # negative_prompt = gr.Textbox(
576
+ # # label="Negative Prompt",
577
+ # # placeholder="low quality",
578
+ # # value="(lowres, low quality, worst quality:1.2), (text:1.2), watermark, (frame:1.2), deformed, ugly, deformed eyes, blur, out of focus, blurry, deformed cat, deformed, photo, anthropomorphic cat, monochrome, pet collar, gun, weapon, blue, 3d, drones, drone, buildings in background, green",
579
+ # # )
580
+ # # num_steps = gr.Slider(
581
+ # # label="Number of sample steps",
582
+ # # minimum=15,
583
+ # # maximum=100,
584
+ # # step=1,
585
+ # # value=5 if enable_lcm_arg else 15,
586
+ # # )
587
+ # # guidance_scale = gr.Slider(
588
+ # # label="Guidance scale",
589
+ # # minimum=0.1,
590
+ # # maximum=10.0,
591
+ # # step=0.1,
592
+ # # value=0 if enable_lcm_arg else 8.5,
593
+ # # )
594
+ # # if email is None:
595
+ # # print("STOPPPP")
596
+ # # raise gr.Error("Email ID is compulsory")
597
+ # face_file.upload(
598
+ # fn=remove_tips,
599
+ # outputs=usage_tips,
600
+ # queue=True,
601
+ # api_name=False,
602
+ # show_progress = "full"
603
+ # ).then(
604
+ # fn=run_for_prompts1,
605
+ # inputs=[face_file,style],
606
+ # outputs=[gallery1]
607
+ # ).then(
608
+ # fn=run_for_prompts2,
609
+ # inputs=[face_file,style],
610
+ # outputs=[gallery2]
611
+ # ).then(
612
+ # fn=run_for_prompts3,
613
+ # inputs=[face_file,style],
614
+ # outputs=[gallery3]
615
+ # ).then(
616
+ # fn=run_for_prompts4,
617
+ # inputs=[face_file,style],
618
+ # outputs=[gallery4]
619
+ # )
620
+ # submit.click(
621
+ # fn=remove_tips,
622
+ # outputs=usage_tips,
623
+ # queue=True,
624
+ # api_name=False,
625
+ # show_progress = "full"
626
+ # ).then(
627
+ # fn=run_for_prompts1,
628
+ # inputs=[face_file,style],
629
+ # outputs=[gallery1]
630
+ # ).then(
631
+ # fn=run_for_prompts2,
632
+ # inputs=[face_file,style],
633
+ # outputs=[gallery2]
634
+ # ).then(
635
+ # fn=run_for_prompts3,
636
+ # inputs=[face_file,style],
637
+ # outputs=[gallery3]
638
+ # ).then(
639
+ # fn=run_for_prompts4,
640
+ # inputs=[face_file,style],
641
+ # outputs=[gallery4]
642
+ # )
643
+
644
+
645
+ # gr.Markdown(article)
646
+
647
+ # demo.launch(share=True)
648
+
649
+ if __name__ == "__main__":
650
+ parser = argparse.ArgumentParser()
651
+ parser.add_argument("--pretrained_model_name_or_path", type=str, default="wangqixun/YamerMIX_v8")
652
+ args = parser.parse_args()
653
+
654
+ main(args.pretrained_model_name_or_path, False)
655
+
656
+
gradio_demo/app1.py ADDED
@@ -0,0 +1,434 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ sys.path.append('./')
3
+
4
+ from typing import Tuple
5
+
6
+ import os
7
+ import cv2
8
+ import math
9
+ import torch
10
+ import random
11
+ import numpy as np
12
+ import argparse
13
+
14
+ import PIL
15
+ from PIL import Image
16
+
17
+ import diffusers
18
+ from diffusers.utils import load_image
19
+ from diffusers.models import ControlNetModel
20
+ from diffusers import LCMScheduler
21
+
22
+ from huggingface_hub import hf_hub_download
23
+
24
+ import insightface
25
+ from insightface.app import FaceAnalysis
26
+
27
+ from style_template import styles
28
+ from pipeline_stable_diffusion_xl_instantid_full import StableDiffusionXLInstantIDPipeline
29
+ from model_util import load_models_xl, get_torch_device, torch_gc
30
+
31
+
32
+ # global variable
33
+ MAX_SEED = np.iinfo(np.int32).max
34
+ device = get_torch_device()
35
+ dtype = torch.float16 if str(device).__contains__("cuda") else torch.float32
36
+ STYLE_NAMES = list(styles.keys())
37
+ DEFAULT_STYLE_NAME = "Watercolor"
38
+
39
+ # Load face encoder
40
+ app = FaceAnalysis(name='antelopev2', root='./', providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
41
+ app.prepare(ctx_id=0, det_size=(320, 320))
42
+
43
+ # Path to InstantID models
44
+ face_adapter = f'./checkpoints/ip-adapter.bin'
45
+ controlnet_path = f'./checkpoints/ControlNetModel'
46
+
47
+ # Load pipeline
48
+ controlnet = ControlNetModel.from_pretrained(controlnet_path, torch_dtype=dtype)
49
+
50
+ logo = Image.open("./gradio_demo/logo.png")
51
+
52
+ from cv2 import imencode
53
+ import base64
54
+
55
+ # def encode_pil_to_base64_new(pil_image):
56
+ # print("AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA")
57
+ # image_arr = np.asarray(pil_image)[:,:,::-1]
58
+ # _, byte_data = imencode('.png', image_arr)
59
+ # base64_data = base64.b64encode(byte_data)
60
+ # base64_string_opencv = base64_data.decode("utf-8")
61
+ # return "data:image/png;base64," + base64_string_opencv
62
+
63
+ import gradio as gr
64
+
65
+ # gr.processing_utils.encode_pil_to_base64 = encode_pil_to_base64_new
66
+
67
+ def main(pretrained_model_name_or_path="wangqixun/YamerMIX_v8", enable_lcm_arg=False):
68
+
69
+ if pretrained_model_name_or_path.endswith(
70
+ ".ckpt"
71
+ ) or pretrained_model_name_or_path.endswith(".safetensors"):
72
+ scheduler_kwargs = hf_hub_download(
73
+ repo_id="wangqixun/YamerMIX_v8",
74
+ subfolder="scheduler",
75
+ filename="scheduler_config.json",
76
+ )
77
+
78
+ (tokenizers, text_encoders, unet, _, vae) = load_models_xl(
79
+ pretrained_model_name_or_path=pretrained_model_name_or_path,
80
+ scheduler_name=None,
81
+ weight_dtype=dtype,
82
+ )
83
+
84
+ scheduler = diffusers.EulerDiscreteScheduler.from_config(scheduler_kwargs)
85
+ pipe = StableDiffusionXLInstantIDPipeline(
86
+ vae=vae,
87
+ text_encoder=text_encoders[0],
88
+ text_encoder_2=text_encoders[1],
89
+ tokenizer=tokenizers[0],
90
+ tokenizer_2=tokenizers[1],
91
+ unet=unet,
92
+ scheduler=scheduler,
93
+ controlnet=controlnet,
94
+ ).to(device)
95
+
96
+ else:
97
+ pipe = StableDiffusionXLInstantIDPipeline.from_pretrained(
98
+ pretrained_model_name_or_path,
99
+ controlnet=controlnet,
100
+ torch_dtype=dtype,
101
+ safety_checker=None,
102
+ feature_extractor=None,
103
+ ).to(device)
104
+
105
+ pipe.scheduler = diffusers.EulerDiscreteScheduler.from_config(pipe.scheduler.config)
106
+
107
+ pipe.load_ip_adapter_instantid(face_adapter)
108
+ # load and disable LCM
109
+ pipe.load_lora_weights("latent-consistency/lcm-lora-sdxl")
110
+ pipe.disable_lora()
111
+
112
+ def remove_tips():
113
+ return gr.update(visible=False)
114
+
115
+
116
+ # prompts = [
117
+ # ["superman","Vibrant Color"], ["japanese anime character with white/neon hair","Watercolor"],
118
+ # # ["Suited professional","(No style)"],
119
+ # ["Scooba diver","Line art"], ["eskimo","Snow"]
120
+ # ]
121
+
122
+ def convert_from_cv2_to_image(img: np.ndarray) -> Image:
123
+ return Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
124
+
125
+ def convert_from_image_to_cv2(img: Image) -> np.ndarray:
126
+ return cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
127
+
128
+ def run_for_prompts1(face_file,style,progress=gr.Progress(track_tqdm=True)):
129
+ # if email != "":
130
+ p,n = styles.get(style, styles.get(STYLE_NAMES[1]))
131
+ return generate_image(face_file, p[0], n)
132
+ # else:
133
+ # raise gr.Error("Email ID is compulsory")
134
+ def run_for_prompts2(face_file,style,progress=gr.Progress(track_tqdm=True)):
135
+ # if email != "":
136
+ p,n = styles.get(style, styles.get(STYLE_NAMES[1]))
137
+ return generate_image(face_file, p[1], n)
138
+ def run_for_prompts3(face_file,style,progress=gr.Progress(track_tqdm=True)):
139
+ # if email != "":
140
+ p,n = styles.get(style, styles.get(STYLE_NAMES[1]))
141
+ return generate_image(face_file, p[2], n)
142
+ def run_for_prompts4(face_file,style,progress=gr.Progress(track_tqdm=True)):
143
+ # if email != "":
144
+ p,n = styles.get(style, styles.get(STYLE_NAMES[1]))
145
+ return generate_image(face_file, p[3], n)
146
+
147
+ # def validate_and_process(face_file, style, email):
148
+
149
+ # # Your processing logic here
150
+ # gallery1, gallery2, gallery3, gallery4 = run_for_prompts1(face_file, style), run_for_prompts2(face_file, style), run_for_prompts3(face_file, style), run_for_prompts4(face_file, style)
151
+ # return gallery1, gallery2, gallery3, gallery4
152
+
153
+ def draw_kps(image_pil, kps, color_list=[(255,0,0), (0,255,0), (0,0,255), (255,255,0), (255,0,255)]):
154
+ stickwidth = 4
155
+ limbSeq = np.array([[0, 2], [1, 2], [3, 2], [4, 2]])
156
+ kps = np.array(kps)
157
+
158
+ w, h = image_pil.size
159
+ out_img = np.zeros([h, w, 3])
160
+
161
+ for i in range(len(limbSeq)):
162
+ index = limbSeq[i]
163
+ color = color_list[index[0]]
164
+
165
+ x = kps[index][:, 0]
166
+ y = kps[index][:, 1]
167
+ length = ((x[0] - x[1]) ** 2 + (y[0] - y[1]) ** 2) ** 0.5
168
+ angle = math.degrees(math.atan2(y[0] - y[1], x[0] - x[1]))
169
+ polygon = cv2.ellipse2Poly((int(np.mean(x)), int(np.mean(y))), (int(length / 2), stickwidth), int(angle), 0, 360, 1)
170
+ out_img = cv2.fillConvexPoly(out_img.copy(), polygon, color)
171
+ out_img = (out_img * 0.6).astype(np.uint8)
172
+
173
+ for idx_kp, kp in enumerate(kps):
174
+ color = color_list[idx_kp]
175
+ x, y = kp
176
+ out_img = cv2.circle(out_img.copy(), (int(x), int(y)), 10, color, -1)
177
+
178
+ out_img_pil = Image.fromarray(out_img.astype(np.uint8))
179
+ return out_img_pil
180
+
181
+ def resize_img(input_image, max_side=640, min_side=640, size=None,
182
+ pad_to_max_side=True, mode=PIL.Image.BILINEAR, base_pixel_number=64):
183
+
184
+ w, h = input_image.size
185
+ print(w)
186
+ print(h)
187
+ if size is not None:
188
+ w_resize_new, h_resize_new = size
189
+ else:
190
+ ratio = min_side / min(h, w)
191
+ w, h = round(ratio*w), round(ratio*h)
192
+ ratio = max_side / max(h, w)
193
+ input_image = input_image.resize([round(ratio*w), round(ratio*h)], mode)
194
+ w_resize_new = (round(ratio * w) // base_pixel_number) * base_pixel_number
195
+ h_resize_new = (round(ratio * h) // base_pixel_number) * base_pixel_number
196
+ input_image = input_image.resize([w_resize_new, h_resize_new], mode)
197
+
198
+ if pad_to_max_side:
199
+ res = np.ones([max_side, max_side, 3], dtype=np.uint8) * 255
200
+ offset_x = (max_side - w_resize_new) // 2
201
+ offset_y = (max_side - h_resize_new) // 2
202
+ res[offset_y:offset_y+h_resize_new, offset_x:offset_x+w_resize_new] = np.array(input_image)
203
+ input_image = Image.fromarray(res)
204
+ return input_image
205
+
206
+ # def apply_style(style_name: str, positive: str, negative: str = "") -> Tuple[str, str]:
207
+ # p, n = styles.get(style_name, styles[DEFAULT_STYLE_NAME])
208
+ # return p.replace("{prompt}", positive), n + ' ' + negative
209
+
210
+ def generate_image(face_image,prompt,negative_prompt):
211
+ pose_image_path = None
212
+ # prompt = "superman"
213
+ enable_LCM = False
214
+ identitynet_strength_ratio = 0.95
215
+ adapter_strength_ratio = 0.60
216
+ num_steps = 15
217
+ guidance_scale = 8.5
218
+ seed = random.randint(0, MAX_SEED)
219
+ # negative_prompt = ""
220
+ # negative_prompt += neg
221
+ enhance_face_region = True
222
+ if enable_LCM:
223
+ pipe.enable_lora()
224
+ pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
225
+ else:
226
+ pipe.disable_lora()
227
+ pipe.scheduler = diffusers.EulerDiscreteScheduler.from_config(pipe.scheduler.config)
228
+
229
+ if face_image is None:
230
+ raise gr.Error(f"Cannot find any input face image! Please upload the face image")
231
+
232
+ # if prompt is None:
233
+ # prompt = "a person"
234
+
235
+ # apply the style template
236
+ # prompt, negative_prompt = apply_style(style_name, prompt, negative_prompt)
237
+
238
+ # face_image = load_image(face_image_path)
239
+ face_image = resize_img(face_image)
240
+ face_image_cv2 = convert_from_image_to_cv2(face_image)
241
+ height, width, _ = face_image_cv2.shape
242
+
243
+ # Extract face features
244
+ face_info = app.get(face_image_cv2)
245
+
246
+ if len(face_info) == 0:
247
+ raise gr.Error(f"Cannot find any face in the image! Please upload another person image")
248
+
249
+ face_info = sorted(face_info, key=lambda x:(x['bbox'][2]-x['bbox'][0])*(x['bbox'][3]-x['bbox'][1]))[-1] # only use the maximum face
250
+ face_emb = face_info['embedding']
251
+ face_kps = draw_kps(convert_from_cv2_to_image(face_image_cv2), face_info['kps'])
252
+
253
+ if pose_image_path is not None:
254
+ pose_image = load_image(pose_image_path)
255
+ pose_image = resize_img(pose_image)
256
+ pose_image_cv2 = convert_from_image_to_cv2(pose_image)
257
+
258
+ face_info = app.get(pose_image_cv2)
259
+
260
+ if len(face_info) == 0:
261
+ raise gr.Error(f"Cannot find any face in the reference image! Please upload another person image")
262
+
263
+ face_info = face_info[-1]
264
+ face_kps = draw_kps(pose_image, face_info['kps'])
265
+
266
+ width, height = face_kps.size
267
+
268
+ if enhance_face_region:
269
+ control_mask = np.zeros([height, width, 3])
270
+ x1, y1, x2, y2 = face_info["bbox"]
271
+ x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
272
+ control_mask[y1:y2, x1:x2] = 255
273
+ control_mask = Image.fromarray(control_mask.astype(np.uint8))
274
+ else:
275
+ control_mask = None
276
+
277
+ generator = torch.Generator(device=device).manual_seed(seed)
278
+
279
+ print("Start inference...")
280
+ print(f"[Debug] Prompt: {prompt}, \n[Debug] Neg Prompt: {negative_prompt}")
281
+
282
+ pipe.set_ip_adapter_scale(adapter_strength_ratio)
283
+ images = pipe(
284
+ prompt=prompt,
285
+ negative_prompt=negative_prompt,
286
+ image_embeds=face_emb,
287
+ image=face_kps,
288
+ control_mask=control_mask,
289
+ controlnet_conditioning_scale=float(identitynet_strength_ratio),
290
+ num_inference_steps=num_steps,
291
+ guidance_scale=guidance_scale,
292
+ height=height,
293
+ width=width,
294
+ generator=generator,
295
+ # num_images_per_prompt = 4
296
+ ).images
297
+
298
+ return images[0]
299
+
300
+ ### Description
301
+ title = r"""
302
+ <h1 align="center">Choose your AVATAR</h1>
303
+ """
304
+
305
+ description = r"""
306
+ <h2> Powered by IDfy </h2>"""
307
+
308
+ article = r""""""
309
+
310
+ tips = r""""""
311
+
312
+ css = '''
313
+ .gradio-container {width: 95% !important; background-color: #E6F3FF;}
314
+ .image-gallery {height: 100vh !important; overflow: auto;}
315
+ .gradio-row .gradio-element { margin: 0 !important; }
316
+ '''
317
+ with gr.Blocks(css=css) as demo:
318
+
319
+ # description
320
+ gr.Markdown(title)
321
+ with gr.Row():
322
+ gr.Image("./gradio_demo/logo.png",scale=0,min_width=50,show_label=False,show_download_button=False)
323
+ gr.Markdown(description)
324
+ with gr.Row():
325
+ with gr.Column():
326
+ style = gr.Dropdown(label="Choose your STYLE", choices=STYLE_NAMES)
327
+ face_file = gr.Image(label="Upload a photo of your face", type="pil",sources="webcam")
328
+ submit = gr.Button("Submit", variant="primary")
329
+ with gr.Column():
330
+ with gr.Row():
331
+ gallery1 = gr.Image(label="Generated Images")
332
+ gallery2 = gr.Image(label="Generated Images")
333
+ with gr.Row():
334
+ gallery3 = gr.Image(label="Generated Images")
335
+ gallery4 = gr.Image(label="Generated Images")
336
+ email = gr.Textbox(label="Email",
337
+ info="Enter your email address",
338
+ value="")
339
+
340
+ usage_tips = gr.Markdown(label="Usage tips of InstantID", value=tips ,visible=False)
341
+ # identitynet_strength_ratio = gr.Slider(
342
+ # label="IdentityNet strength (for fidelity)",
343
+ # minimum=0,
344
+ # maximum=1.5,
345
+ # step=0.05,
346
+ # value=0.95,
347
+ # )
348
+ # adapter_strength_ratio = gr.Slider(
349
+ # label="Image adapter strength (for detail)",
350
+ # minimum=0,
351
+ # maximum=1.5,
352
+ # step=0.05,
353
+ # value=0.60,
354
+ # )
355
+ # negative_prompt = gr.Textbox(
356
+ # label="Negative Prompt",
357
+ # placeholder="low quality",
358
+ # value="(lowres, low quality, worst quality:1.2), (text:1.2), watermark, (frame:1.2), deformed, ugly, deformed eyes, blur, out of focus, blurry, deformed cat, deformed, photo, anthropomorphic cat, monochrome, pet collar, gun, weapon, blue, 3d, drones, drone, buildings in background, green",
359
+ # )
360
+ # num_steps = gr.Slider(
361
+ # label="Number of sample steps",
362
+ # minimum=15,
363
+ # maximum=100,
364
+ # step=1,
365
+ # value=5 if enable_lcm_arg else 15,
366
+ # )
367
+ # guidance_scale = gr.Slider(
368
+ # label="Guidance scale",
369
+ # minimum=0.1,
370
+ # maximum=10.0,
371
+ # step=0.1,
372
+ # value=0 if enable_lcm_arg else 8.5,
373
+ # )
374
+ # if email is None:
375
+ # print("STOPPPP")
376
+ # raise gr.Error("Email ID is compulsory")
377
+ face_file.upload(
378
+ fn=remove_tips,
379
+ outputs=usage_tips,
380
+ queue=True,
381
+ api_name=False,
382
+ show_progress = "full"
383
+ ).then(
384
+ fn=run_for_prompts1,
385
+ inputs=[face_file,style],
386
+ outputs=[gallery1]
387
+ ).then(
388
+ fn=run_for_prompts2,
389
+ inputs=[face_file,style],
390
+ outputs=[gallery2]
391
+ ).then(
392
+ fn=run_for_prompts3,
393
+ inputs=[face_file,style],
394
+ outputs=[gallery3]
395
+ ).then(
396
+ fn=run_for_prompts4,
397
+ inputs=[face_file,style],
398
+ outputs=[gallery4]
399
+ )
400
+ submit.click(
401
+ fn=remove_tips,
402
+ outputs=usage_tips,
403
+ queue=True,
404
+ api_name=False,
405
+ show_progress = "full"
406
+ ).then(
407
+ fn=run_for_prompts1,
408
+ inputs=[face_file,style],
409
+ outputs=[gallery1]
410
+ ).then(
411
+ fn=run_for_prompts2,
412
+ inputs=[face_file,style],
413
+ outputs=[gallery2]
414
+ ).then(
415
+ fn=run_for_prompts3,
416
+ inputs=[face_file,style],
417
+ outputs=[gallery3]
418
+ ).then(
419
+ fn=run_for_prompts4,
420
+ inputs=[face_file,style],
421
+ outputs=[gallery4]
422
+ )
423
+
424
+
425
+ gr.Markdown(article)
426
+
427
+ demo.launch(share=True)
428
+
429
+ if __name__ == "__main__":
430
+ parser = argparse.ArgumentParser()
431
+ parser.add_argument("--pretrained_model_name_or_path", type=str, default="wangqixun/YamerMIX_v8")
432
+ args = parser.parse_args()
433
+
434
+ main(args.pretrained_model_name_or_path, False)
gradio_demo/background.jpg ADDED
gradio_demo/controlnet_util.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import numpy as np
3
+ from PIL import Image
4
+ from controlnet_aux import OpenposeDetector
5
+ from model_util import get_torch_device
6
+ import cv2
7
+
8
+
9
+ from transformers import DPTImageProcessor, DPTForDepthEstimation
10
+
11
+ device = get_torch_device()
12
+ depth_estimator = DPTForDepthEstimation.from_pretrained("Intel/dpt-hybrid-midas").to(device)
13
+ feature_extractor = DPTImageProcessor.from_pretrained("Intel/dpt-hybrid-midas")
14
+ openpose = OpenposeDetector.from_pretrained("lllyasviel/ControlNet")
15
+
16
+ def get_depth_map(image):
17
+ image = feature_extractor(images=image, return_tensors="pt").pixel_values.to("cuda")
18
+ with torch.no_grad(), torch.autocast("cuda"):
19
+ depth_map = depth_estimator(image).predicted_depth
20
+
21
+ depth_map = torch.nn.functional.interpolate(
22
+ depth_map.unsqueeze(1),
23
+ size=(1024, 1024),
24
+ mode="bicubic",
25
+ align_corners=False,
26
+ )
27
+ depth_min = torch.amin(depth_map, dim=[1, 2, 3], keepdim=True)
28
+ depth_max = torch.amax(depth_map, dim=[1, 2, 3], keepdim=True)
29
+ depth_map = (depth_map - depth_min) / (depth_max - depth_min)
30
+ image = torch.cat([depth_map] * 3, dim=1)
31
+
32
+ image = image.permute(0, 2, 3, 1).cpu().numpy()[0]
33
+ image = Image.fromarray((image * 255.0).clip(0, 255).astype(np.uint8))
34
+ return image
35
+
36
+ def get_canny_image(image, t1=100, t2=200):
37
+ image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
38
+ edges = cv2.Canny(image, t1, t2)
39
+ return Image.fromarray(edges, "L")
gradio_demo/demo.py ADDED
@@ -0,0 +1,369 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ sys.path.append('./')
3
+
4
+ from typing import Tuple
5
+
6
+ import os
7
+ import cv2
8
+ import math
9
+ import torch
10
+ import random
11
+ import numpy as np
12
+ import argparse
13
+
14
+ import PIL
15
+ from PIL import Image
16
+
17
+ import diffusers
18
+ from diffusers.utils import load_image
19
+ from diffusers.models import ControlNetModel
20
+ from diffusers import LCMScheduler
21
+
22
+ from huggingface_hub import hf_hub_download
23
+
24
+ import insightface
25
+ from insightface.app import FaceAnalysis
26
+
27
+ from style_template import styles
28
+ from pipeline_stable_diffusion_xl_instantid_full import StableDiffusionXLInstantIDPipeline
29
+ from model_util import load_models_xl, get_torch_device, torch_gc
30
+
31
+ from cv2 import imencode
32
+ import base64
33
+
34
+ # def encode_pil_to_base64_new(pil_image):
35
+ # print("AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA")
36
+ # image_arr = np.asarray(pil_image)[:,:,::-1]
37
+ # _, byte_data = imencode('.png', image_arr)
38
+ # base64_data = base64.b64encode(byte_data)
39
+ # base64_string_opencv = base64_data.decode("utf-8")
40
+ # return "data:image/png;base64," + base64_string_opencv
41
+
42
+ import gradio as gr
43
+
44
+
45
+ # global variable
46
+ MAX_SEED = np.iinfo(np.int32).max
47
+ device = get_torch_device()
48
+ dtype = torch.float16 if str(device).__contains__("cuda") else torch.float32
49
+ STYLE_NAMES = list(styles.keys())
50
+ DEFAULT_STYLE_NAME = "Watercolor"
51
+
52
+ # Load face encoder
53
+ app = FaceAnalysis(name='antelopev2', root='./', providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
54
+ app.prepare(ctx_id=0, det_size=(320, 320))
55
+
56
+ # Path to InstantID models
57
+ face_adapter = f'./checkpoints/ip-adapter.bin'
58
+ controlnet_path = f'./checkpoints/ControlNetModel'
59
+
60
+ # Load pipeline
61
+ controlnet = ControlNetModel.from_pretrained(controlnet_path, torch_dtype=dtype)
62
+
63
+ logo = Image.open("./gradio_demo/logo.png")
64
+
65
+ pretrained_model_name_or_path="wangqixun/YamerMIX_v8"
66
+
67
+
68
+ if pretrained_model_name_or_path.endswith(
69
+ ".ckpt"
70
+ ) or pretrained_model_name_or_path.endswith(".safetensors"):
71
+ scheduler_kwargs = hf_hub_download(
72
+ repo_id="wangqixun/YamerMIX_v8",
73
+ subfolder="scheduler",
74
+ filename="scheduler_config.json",
75
+ )
76
+
77
+ (tokenizers, text_encoders, unet, _, vae) = load_models_xl(
78
+ pretrained_model_name_or_path=pretrained_model_name_or_path,
79
+ scheduler_name=None,
80
+ weight_dtype=dtype,
81
+ )
82
+
83
+ scheduler = diffusers.EulerDiscreteScheduler.from_config(scheduler_kwargs)
84
+ pipe = StableDiffusionXLInstantIDPipeline(
85
+ vae=vae,
86
+ text_encoder=text_encoders[0],
87
+ text_encoder_2=text_encoders[1],
88
+ tokenizer=tokenizers[0],
89
+ tokenizer_2=tokenizers[1],
90
+ unet=unet,
91
+ scheduler=scheduler,
92
+ controlnet=controlnet,
93
+ ).to(device)
94
+
95
+ else:
96
+ pipe = StableDiffusionXLInstantIDPipeline.from_pretrained(
97
+ pretrained_model_name_or_path,
98
+ controlnet=controlnet,
99
+ torch_dtype=dtype,
100
+ safety_checker=None,
101
+ feature_extractor=None,
102
+ ).to(device)
103
+
104
+ pipe.scheduler = diffusers.EulerDiscreteScheduler.from_config(pipe.scheduler.config)
105
+
106
+ pipe.load_ip_adapter_instantid(face_adapter)
107
+ # load and disable LCM
108
+ pipe.load_lora_weights("latent-consistency/lcm-lora-sdxl")
109
+ pipe.disable_lora()
110
+
111
+ # gr.processing_utils.encode_pil_to_base64 = encode_pil_to_base64_new
112
+ def remove_tips():
113
+ print("GG")
114
+ return gr.update(visible=False)
115
+
116
+ def convert_from_cv2_to_image(img: np.ndarray) -> Image:
117
+ return Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
118
+
119
+ def convert_from_image_to_cv2(img: Image) -> np.ndarray:
120
+ return cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
121
+
122
+ def run_for_prompts1(face_file,style,progress=gr.Progress(track_tqdm=True)):
123
+ # if email != "":
124
+ p,n = styles.get(style, styles.get(STYLE_NAMES[1]))
125
+ return generate_image(face_file, p[0], n)
126
+ # else:
127
+ # raise gr.Error("Email ID is compulsory")
128
+ def run_for_prompts2(face_file,style,progress=gr.Progress(track_tqdm=True)):
129
+ # if email != "":
130
+ p,n = styles.get(style, styles.get(STYLE_NAMES[1]))
131
+ return generate_image(face_file, p[1], n)
132
+
133
+ def run_for_prompts3(face_file,style,progress=gr.Progress(track_tqdm=True)):
134
+ # if email != "":
135
+ p,n = styles.get(style, styles.get(STYLE_NAMES[1]))
136
+ return generate_image(face_file, p[2], n)
137
+
138
+ def run_for_prompts4(face_file,style,progress=gr.Progress(track_tqdm=True)):
139
+ # if email != "":
140
+ p,n = styles.get(style, styles.get(STYLE_NAMES[1]))
141
+ return generate_image(face_file, p[3], n)
142
+
143
+
144
+ def draw_kps(image_pil, kps, color_list=[(255,0,0), (0,255,0), (0,0,255), (255,255,0), (255,0,255)]):
145
+ stickwidth = 4
146
+ limbSeq = np.array([[0, 2], [1, 2], [3, 2], [4, 2]])
147
+ kps = np.array(kps)
148
+
149
+ w, h = image_pil.size
150
+ out_img = np.zeros([h, w, 3])
151
+
152
+ for i in range(len(limbSeq)):
153
+ index = limbSeq[i]
154
+ color = color_list[index[0]]
155
+
156
+ x = kps[index][:, 0]
157
+ y = kps[index][:, 1]
158
+ length = ((x[0] - x[1]) ** 2 + (y[0] - y[1]) ** 2) ** 0.5
159
+ angle = math.degrees(math.atan2(y[0] - y[1], x[0] - x[1]))
160
+ polygon = cv2.ellipse2Poly((int(np.mean(x)), int(np.mean(y))), (int(length / 2), stickwidth), int(angle), 0, 360, 1)
161
+ out_img = cv2.fillConvexPoly(out_img.copy(), polygon, color)
162
+ out_img = (out_img * 0.6).astype(np.uint8)
163
+
164
+ for idx_kp, kp in enumerate(kps):
165
+ color = color_list[idx_kp]
166
+ x, y = kp
167
+ out_img = cv2.circle(out_img.copy(), (int(x), int(y)), 10, color, -1)
168
+
169
+ out_img_pil = Image.fromarray(out_img.astype(np.uint8))
170
+ return out_img_pil
171
+
172
+ def resize_img(input_image, max_side=640, min_side=640, size=None,
173
+ pad_to_max_side=True, mode=PIL.Image.BILINEAR, base_pixel_number=64):
174
+
175
+ w, h = input_image.size
176
+ print(w)
177
+ print(h)
178
+ if size is not None:
179
+ w_resize_new, h_resize_new = size
180
+ else:
181
+ ratio = min_side / min(h, w)
182
+ w, h = round(ratio*w), round(ratio*h)
183
+ ratio = max_side / max(h, w)
184
+ input_image = input_image.resize([round(ratio*w), round(ratio*h)], mode)
185
+ w_resize_new = (round(ratio * w) // base_pixel_number) * base_pixel_number
186
+ h_resize_new = (round(ratio * h) // base_pixel_number) * base_pixel_number
187
+ input_image = input_image.resize([w_resize_new, h_resize_new], mode)
188
+
189
+ if pad_to_max_side:
190
+ res = np.ones([max_side, max_side, 3], dtype=np.uint8) * 255
191
+ offset_x = (max_side - w_resize_new) // 2
192
+ offset_y = (max_side - h_resize_new) // 2
193
+ res[offset_y:offset_y+h_resize_new, offset_x:offset_x+w_resize_new] = np.array(input_image)
194
+ input_image = Image.fromarray(res)
195
+ return input_image
196
+
197
+
198
+ def generate_image(face_image,prompt,negative_prompt):
199
+ pose_image_path = None
200
+ # prompt = "superman"
201
+ enable_LCM = False
202
+ identitynet_strength_ratio = 0.95
203
+ adapter_strength_ratio = 0.60
204
+ num_steps = 15
205
+ guidance_scale = 8.5
206
+ seed = random.randint(0, MAX_SEED)
207
+ # negative_prompt = ""
208
+ # negative_prompt += neg
209
+ enhance_face_region = True
210
+ if enable_LCM:
211
+ pipe.enable_lora()
212
+ pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
213
+ else:
214
+ pipe.disable_lora()
215
+ pipe.scheduler = diffusers.EulerDiscreteScheduler.from_config(pipe.scheduler.config)
216
+
217
+ if face_image is None:
218
+ raise gr.Error(f"Cannot find any input face image! Please upload the face image")
219
+
220
+ # if prompt is None:
221
+ # prompt = "a person"
222
+
223
+ # apply the style template
224
+ # prompt, negative_prompt = apply_style(style_name, prompt, negative_prompt)
225
+
226
+ # face_image = load_image(face_image_path)
227
+ face_image = resize_img(face_image)
228
+ face_image_cv2 = convert_from_image_to_cv2(face_image)
229
+ height, width, _ = face_image_cv2.shape
230
+
231
+ # Extract face features
232
+ face_info = app.get(face_image_cv2)
233
+
234
+ if len(face_info) == 0:
235
+ raise gr.Error(f"Cannot find any face in the image! Please upload another person image")
236
+
237
+ face_info = sorted(face_info, key=lambda x:(x['bbox'][2]-x['bbox'][0])*(x['bbox'][3]-x['bbox'][1]))[-1] # only use the maximum face
238
+ face_emb = face_info['embedding']
239
+ face_kps = draw_kps(convert_from_cv2_to_image(face_image_cv2), face_info['kps'])
240
+
241
+ if pose_image_path is not None:
242
+ pose_image = load_image(pose_image_path)
243
+ pose_image = resize_img(pose_image)
244
+ pose_image_cv2 = convert_from_image_to_cv2(pose_image)
245
+
246
+ face_info = app.get(pose_image_cv2)
247
+
248
+ if len(face_info) == 0:
249
+ raise gr.Error(f"Cannot find any face in the reference image! Please upload another person image")
250
+
251
+ face_info = face_info[-1]
252
+ face_kps = draw_kps(pose_image, face_info['kps'])
253
+
254
+ width, height = face_kps.size
255
+
256
+ if enhance_face_region:
257
+ control_mask = np.zeros([height, width, 3])
258
+ x1, y1, x2, y2 = face_info["bbox"]
259
+ x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
260
+ control_mask[y1:y2, x1:x2] = 255
261
+ control_mask = Image.fromarray(control_mask.astype(np.uint8))
262
+ else:
263
+ control_mask = None
264
+
265
+ generator = torch.Generator(device=device).manual_seed(seed)
266
+
267
+ print("Start inference...")
268
+ print(f"[Debug] Prompt: {prompt}, \n[Debug] Neg Prompt: {negative_prompt}")
269
+
270
+ pipe.set_ip_adapter_scale(adapter_strength_ratio)
271
+ images = pipe(
272
+ prompt=prompt,
273
+ negative_prompt=negative_prompt,
274
+ image_embeds=face_emb,
275
+ image=face_kps,
276
+ control_mask=control_mask,
277
+ controlnet_conditioning_scale=float(identitynet_strength_ratio),
278
+ num_inference_steps=num_steps,
279
+ guidance_scale=guidance_scale,
280
+ height=height,
281
+ width=width,
282
+ generator=generator,
283
+ # num_images_per_prompt = 4
284
+ ).images
285
+
286
+ return images[0]
287
+
288
+ def main(pretrained_model_name_or_path="wangqixun/YamerMIX_v8", enable_lcm_arg=False):
289
+
290
+
291
+
292
+
293
+ ### Description
294
+ title = r"""
295
+ <h1 align="center">Choose your AVATAR</h1>
296
+ """
297
+
298
+ description = r"""
299
+ <h2> Powered by IDfy </h2>"""
300
+
301
+ article = r""""""
302
+
303
+ tips = r""""""
304
+
305
+ js = ''' '''
306
+
307
+ css = '''
308
+ .gradio-container {width: 95% !important; background-color: #E6F3FF;}
309
+ .image-gallery {height: 100vh !important; overflow: auto;}
310
+ .gradio-row .gradio-element { margin: 0 !important; }
311
+ '''
312
+
313
+
314
+ with gr.Blocks(css=css, js=js) as demo:
315
+
316
+ # description
317
+ gr.Markdown(title)
318
+ with gr.Row():
319
+ gr.Image("./gradio_demo/logo.png",scale=0,min_width=50,show_label=False,show_download_button=False)
320
+ gr.Markdown(description)
321
+ with gr.Row():
322
+ with gr.Column():
323
+ style = gr.Dropdown(label="Choose your STYLE", choices=STYLE_NAMES)
324
+ face_file = gr.Image(label="Upload a photo of your face", type="pil",sources="webcam")
325
+ submit = gr.Button("Submit", variant="primary")
326
+ with gr.Column():
327
+ with gr.Row():
328
+ gallery1 = gr.Image(label="Generated Images")
329
+ gallery2 = gr.Image(label="Generated Images")
330
+ with gr.Row():
331
+ gallery3 = gr.Image(label="Generated Images")
332
+ gallery4 = gr.Image(label="Generated Images")
333
+ email = gr.Textbox(label="Email",
334
+ info="Enter your email address",
335
+ value="")
336
+
337
+ usage_tips = gr.Markdown(label="Usage tips of InstantID", value=tips ,visible=False)
338
+
339
+ face_file.upload(
340
+ fn=remove_tips,
341
+ outputs=usage_tips,
342
+ queue=True,
343
+ api_name=False,
344
+ show_progress = "full"
345
+ )
346
+
347
+ submit.click(
348
+ fn=remove_tips,
349
+ outputs=usage_tips,
350
+ queue=True,
351
+ api_name=False,
352
+ show_progress = "full"
353
+ ).then(
354
+ fn=run_for_prompts1,
355
+ inputs=[face_file,style],
356
+ outputs=[gallery1]
357
+ )
358
+
359
+
360
+ gr.Markdown(article)
361
+
362
+ demo.launch(share=True)
363
+
364
+ if __name__ == "__main__":
365
+ parser = argparse.ArgumentParser()
366
+ parser.add_argument("--pretrained_model_name_or_path", type=str, default="wangqixun/YamerMIX_v8")
367
+ args = parser.parse_args()
368
+
369
+ main(args.pretrained_model_name_or_path, False)
gradio_demo/download_models.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from huggingface_hub import hf_hub_download
2
+ import gdown
3
+ import os
4
+
5
+ # download models
6
+ hf_hub_download(
7
+ repo_id="InstantX/InstantID",
8
+ filename="ControlNetModel/config.json",
9
+ local_dir="./checkpoints",
10
+ )
11
+ hf_hub_download(
12
+ repo_id="InstantX/InstantID",
13
+ filename="ControlNetModel/diffusion_pytorch_model.safetensors",
14
+ local_dir="./checkpoints",
15
+ )
16
+ hf_hub_download(
17
+ repo_id="InstantX/InstantID", filename="ip-adapter.bin", local_dir="./checkpoints"
18
+ )
19
+ hf_hub_download(
20
+ repo_id="latent-consistency/lcm-lora-sdxl",
21
+ filename="pytorch_lora_weights.safetensors",
22
+ local_dir="./checkpoints",
23
+ )
24
+ # download antelopev2
25
+ gdown.download(url="https://drive.google.com/file/d/18wEUfMNohBJ4K3Ly5wpTejPfDzp-8fI8/view?usp=sharing", output="./models/", quiet=False, fuzzy=True)
26
+ # unzip antelopev2.zip
27
+ os.system("unzip ./models/antelopev2.zip -d ./models/")
gradio_demo/logo.png ADDED
gradio_demo/logo1.png ADDED
gradio_demo/model_util.py ADDED
@@ -0,0 +1,472 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Literal, Union, Optional, Tuple, List
2
+
3
+ import torch
4
+ from transformers import CLIPTextModel, CLIPTokenizer, CLIPTextModelWithProjection
5
+ from diffusers import (
6
+ UNet2DConditionModel,
7
+ SchedulerMixin,
8
+ StableDiffusionPipeline,
9
+ StableDiffusionXLPipeline,
10
+ AutoencoderKL,
11
+ )
12
+ from diffusers.pipelines.stable_diffusion.convert_from_ckpt import (
13
+ convert_ldm_unet_checkpoint,
14
+ )
15
+ from safetensors.torch import load_file
16
+ from diffusers.schedulers import (
17
+ DDIMScheduler,
18
+ DDPMScheduler,
19
+ LMSDiscreteScheduler,
20
+ EulerDiscreteScheduler,
21
+ EulerAncestralDiscreteScheduler,
22
+ UniPCMultistepScheduler,
23
+ )
24
+
25
+ from omegaconf import OmegaConf
26
+
27
+ # DiffUsers版StableDiffusionのモデルパラメータ
28
+ NUM_TRAIN_TIMESTEPS = 1000
29
+ BETA_START = 0.00085
30
+ BETA_END = 0.0120
31
+
32
+ UNET_PARAMS_MODEL_CHANNELS = 320
33
+ UNET_PARAMS_CHANNEL_MULT = [1, 2, 4, 4]
34
+ UNET_PARAMS_ATTENTION_RESOLUTIONS = [4, 2, 1]
35
+ UNET_PARAMS_IMAGE_SIZE = 64 # fixed from old invalid value `32`
36
+ UNET_PARAMS_IN_CHANNELS = 4
37
+ UNET_PARAMS_OUT_CHANNELS = 4
38
+ UNET_PARAMS_NUM_RES_BLOCKS = 2
39
+ UNET_PARAMS_CONTEXT_DIM = 768
40
+ UNET_PARAMS_NUM_HEADS = 8
41
+ # UNET_PARAMS_USE_LINEAR_PROJECTION = False
42
+
43
+ VAE_PARAMS_Z_CHANNELS = 4
44
+ VAE_PARAMS_RESOLUTION = 256
45
+ VAE_PARAMS_IN_CHANNELS = 3
46
+ VAE_PARAMS_OUT_CH = 3
47
+ VAE_PARAMS_CH = 128
48
+ VAE_PARAMS_CH_MULT = [1, 2, 4, 4]
49
+ VAE_PARAMS_NUM_RES_BLOCKS = 2
50
+
51
+ # V2
52
+ V2_UNET_PARAMS_ATTENTION_HEAD_DIM = [5, 10, 20, 20]
53
+ V2_UNET_PARAMS_CONTEXT_DIM = 1024
54
+ # V2_UNET_PARAMS_USE_LINEAR_PROJECTION = True
55
+
56
+ TOKENIZER_V1_MODEL_NAME = "CompVis/stable-diffusion-v1-4"
57
+ TOKENIZER_V2_MODEL_NAME = "stabilityai/stable-diffusion-2-1"
58
+
59
+ AVAILABLE_SCHEDULERS = Literal["ddim", "ddpm", "lms", "euler_a", "euler", "uniPC"]
60
+
61
+ SDXL_TEXT_ENCODER_TYPE = Union[CLIPTextModel, CLIPTextModelWithProjection]
62
+
63
+ DIFFUSERS_CACHE_DIR = None # if you want to change the cache dir, change this
64
+
65
+
66
+ def load_checkpoint_with_text_encoder_conversion(ckpt_path: str, device="cpu"):
67
+ # text encoderの格納形式が違うモデルに対応する ('text_model'がない)
68
+ TEXT_ENCODER_KEY_REPLACEMENTS = [
69
+ (
70
+ "cond_stage_model.transformer.embeddings.",
71
+ "cond_stage_model.transformer.text_model.embeddings.",
72
+ ),
73
+ (
74
+ "cond_stage_model.transformer.encoder.",
75
+ "cond_stage_model.transformer.text_model.encoder.",
76
+ ),
77
+ (
78
+ "cond_stage_model.transformer.final_layer_norm.",
79
+ "cond_stage_model.transformer.text_model.final_layer_norm.",
80
+ ),
81
+ ]
82
+
83
+ if ckpt_path.endswith(".safetensors"):
84
+ checkpoint = None
85
+ state_dict = load_file(ckpt_path) # , device) # may causes error
86
+ else:
87
+ checkpoint = torch.load(ckpt_path, map_location=device)
88
+ if "state_dict" in checkpoint:
89
+ state_dict = checkpoint["state_dict"]
90
+ else:
91
+ state_dict = checkpoint
92
+ checkpoint = None
93
+
94
+ key_reps = []
95
+ for rep_from, rep_to in TEXT_ENCODER_KEY_REPLACEMENTS:
96
+ for key in state_dict.keys():
97
+ if key.startswith(rep_from):
98
+ new_key = rep_to + key[len(rep_from) :]
99
+ key_reps.append((key, new_key))
100
+
101
+ for key, new_key in key_reps:
102
+ state_dict[new_key] = state_dict[key]
103
+ del state_dict[key]
104
+
105
+ return checkpoint, state_dict
106
+
107
+
108
+ def create_unet_diffusers_config(v2, use_linear_projection_in_v2=False):
109
+ """
110
+ Creates a config for the diffusers based on the config of the LDM model.
111
+ """
112
+ # unet_params = original_config.model.params.unet_config.params
113
+
114
+ block_out_channels = [
115
+ UNET_PARAMS_MODEL_CHANNELS * mult for mult in UNET_PARAMS_CHANNEL_MULT
116
+ ]
117
+
118
+ down_block_types = []
119
+ resolution = 1
120
+ for i in range(len(block_out_channels)):
121
+ block_type = (
122
+ "CrossAttnDownBlock2D"
123
+ if resolution in UNET_PARAMS_ATTENTION_RESOLUTIONS
124
+ else "DownBlock2D"
125
+ )
126
+ down_block_types.append(block_type)
127
+ if i != len(block_out_channels) - 1:
128
+ resolution *= 2
129
+
130
+ up_block_types = []
131
+ for i in range(len(block_out_channels)):
132
+ block_type = (
133
+ "CrossAttnUpBlock2D"
134
+ if resolution in UNET_PARAMS_ATTENTION_RESOLUTIONS
135
+ else "UpBlock2D"
136
+ )
137
+ up_block_types.append(block_type)
138
+ resolution //= 2
139
+
140
+ config = dict(
141
+ sample_size=UNET_PARAMS_IMAGE_SIZE,
142
+ in_channels=UNET_PARAMS_IN_CHANNELS,
143
+ out_channels=UNET_PARAMS_OUT_CHANNELS,
144
+ down_block_types=tuple(down_block_types),
145
+ up_block_types=tuple(up_block_types),
146
+ block_out_channels=tuple(block_out_channels),
147
+ layers_per_block=UNET_PARAMS_NUM_RES_BLOCKS,
148
+ cross_attention_dim=UNET_PARAMS_CONTEXT_DIM
149
+ if not v2
150
+ else V2_UNET_PARAMS_CONTEXT_DIM,
151
+ attention_head_dim=UNET_PARAMS_NUM_HEADS
152
+ if not v2
153
+ else V2_UNET_PARAMS_ATTENTION_HEAD_DIM,
154
+ # use_linear_projection=UNET_PARAMS_USE_LINEAR_PROJECTION if not v2 else V2_UNET_PARAMS_USE_LINEAR_PROJECTION,
155
+ )
156
+ if v2 and use_linear_projection_in_v2:
157
+ config["use_linear_projection"] = True
158
+
159
+ return config
160
+
161
+
162
+ def load_diffusers_model(
163
+ pretrained_model_name_or_path: str,
164
+ v2: bool = False,
165
+ clip_skip: Optional[int] = None,
166
+ weight_dtype: torch.dtype = torch.float32,
167
+ ) -> Tuple[CLIPTokenizer, CLIPTextModel, UNet2DConditionModel,]:
168
+ if v2:
169
+ tokenizer = CLIPTokenizer.from_pretrained(
170
+ TOKENIZER_V2_MODEL_NAME,
171
+ subfolder="tokenizer",
172
+ torch_dtype=weight_dtype,
173
+ cache_dir=DIFFUSERS_CACHE_DIR,
174
+ )
175
+ text_encoder = CLIPTextModel.from_pretrained(
176
+ pretrained_model_name_or_path,
177
+ subfolder="text_encoder",
178
+ # default is clip skip 2
179
+ num_hidden_layers=24 - (clip_skip - 1) if clip_skip is not None else 23,
180
+ torch_dtype=weight_dtype,
181
+ cache_dir=DIFFUSERS_CACHE_DIR,
182
+ )
183
+ else:
184
+ tokenizer = CLIPTokenizer.from_pretrained(
185
+ TOKENIZER_V1_MODEL_NAME,
186
+ subfolder="tokenizer",
187
+ torch_dtype=weight_dtype,
188
+ cache_dir=DIFFUSERS_CACHE_DIR,
189
+ )
190
+ text_encoder = CLIPTextModel.from_pretrained(
191
+ pretrained_model_name_or_path,
192
+ subfolder="text_encoder",
193
+ num_hidden_layers=12 - (clip_skip - 1) if clip_skip is not None else 12,
194
+ torch_dtype=weight_dtype,
195
+ cache_dir=DIFFUSERS_CACHE_DIR,
196
+ )
197
+
198
+ unet = UNet2DConditionModel.from_pretrained(
199
+ pretrained_model_name_or_path,
200
+ subfolder="unet",
201
+ torch_dtype=weight_dtype,
202
+ cache_dir=DIFFUSERS_CACHE_DIR,
203
+ )
204
+
205
+ vae = AutoencoderKL.from_pretrained(pretrained_model_name_or_path, subfolder="vae")
206
+
207
+ return tokenizer, text_encoder, unet, vae
208
+
209
+
210
+ def load_checkpoint_model(
211
+ checkpoint_path: str,
212
+ v2: bool = False,
213
+ clip_skip: Optional[int] = None,
214
+ weight_dtype: torch.dtype = torch.float32,
215
+ ) -> Tuple[CLIPTokenizer, CLIPTextModel, UNet2DConditionModel,]:
216
+ pipe = StableDiffusionPipeline.from_single_file(
217
+ checkpoint_path,
218
+ upcast_attention=True if v2 else False,
219
+ torch_dtype=weight_dtype,
220
+ cache_dir=DIFFUSERS_CACHE_DIR,
221
+ )
222
+
223
+ _, state_dict = load_checkpoint_with_text_encoder_conversion(checkpoint_path)
224
+ unet_config = create_unet_diffusers_config(v2, use_linear_projection_in_v2=v2)
225
+ unet_config["class_embed_type"] = None
226
+ unet_config["addition_embed_type"] = None
227
+ converted_unet_checkpoint = convert_ldm_unet_checkpoint(state_dict, unet_config)
228
+ unet = UNet2DConditionModel(**unet_config)
229
+ unet.load_state_dict(converted_unet_checkpoint)
230
+
231
+ tokenizer = pipe.tokenizer
232
+ text_encoder = pipe.text_encoder
233
+ vae = pipe.vae
234
+ if clip_skip is not None:
235
+ if v2:
236
+ text_encoder.config.num_hidden_layers = 24 - (clip_skip - 1)
237
+ else:
238
+ text_encoder.config.num_hidden_layers = 12 - (clip_skip - 1)
239
+
240
+ del pipe
241
+
242
+ return tokenizer, text_encoder, unet, vae
243
+
244
+
245
+ def load_models(
246
+ pretrained_model_name_or_path: str,
247
+ scheduler_name: str,
248
+ v2: bool = False,
249
+ v_pred: bool = False,
250
+ weight_dtype: torch.dtype = torch.float32,
251
+ ) -> Tuple[CLIPTokenizer, CLIPTextModel, UNet2DConditionModel, SchedulerMixin,]:
252
+ if pretrained_model_name_or_path.endswith(
253
+ ".ckpt"
254
+ ) or pretrained_model_name_or_path.endswith(".safetensors"):
255
+ tokenizer, text_encoder, unet, vae = load_checkpoint_model(
256
+ pretrained_model_name_or_path, v2=v2, weight_dtype=weight_dtype
257
+ )
258
+ else: # diffusers
259
+ tokenizer, text_encoder, unet, vae = load_diffusers_model(
260
+ pretrained_model_name_or_path, v2=v2, weight_dtype=weight_dtype
261
+ )
262
+
263
+ if scheduler_name:
264
+ scheduler = create_noise_scheduler(
265
+ scheduler_name,
266
+ prediction_type="v_prediction" if v_pred else "epsilon",
267
+ )
268
+ else:
269
+ scheduler = None
270
+
271
+ return tokenizer, text_encoder, unet, scheduler, vae
272
+
273
+
274
+ def load_diffusers_model_xl(
275
+ pretrained_model_name_or_path: str,
276
+ weight_dtype: torch.dtype = torch.float32,
277
+ ) -> Tuple[List[CLIPTokenizer], List[SDXL_TEXT_ENCODER_TYPE], UNet2DConditionModel,]:
278
+ # returns tokenizer, tokenizer_2, text_encoder, text_encoder_2, unet
279
+
280
+ tokenizers = [
281
+ CLIPTokenizer.from_pretrained(
282
+ pretrained_model_name_or_path,
283
+ subfolder="tokenizer",
284
+ torch_dtype=weight_dtype,
285
+ cache_dir=DIFFUSERS_CACHE_DIR,
286
+ ),
287
+ CLIPTokenizer.from_pretrained(
288
+ pretrained_model_name_or_path,
289
+ subfolder="tokenizer_2",
290
+ torch_dtype=weight_dtype,
291
+ cache_dir=DIFFUSERS_CACHE_DIR,
292
+ pad_token_id=0, # same as open clip
293
+ ),
294
+ ]
295
+
296
+ text_encoders = [
297
+ CLIPTextModel.from_pretrained(
298
+ pretrained_model_name_or_path,
299
+ subfolder="text_encoder",
300
+ torch_dtype=weight_dtype,
301
+ cache_dir=DIFFUSERS_CACHE_DIR,
302
+ ),
303
+ CLIPTextModelWithProjection.from_pretrained(
304
+ pretrained_model_name_or_path,
305
+ subfolder="text_encoder_2",
306
+ torch_dtype=weight_dtype,
307
+ cache_dir=DIFFUSERS_CACHE_DIR,
308
+ ),
309
+ ]
310
+
311
+ unet = UNet2DConditionModel.from_pretrained(
312
+ pretrained_model_name_or_path,
313
+ subfolder="unet",
314
+ torch_dtype=weight_dtype,
315
+ cache_dir=DIFFUSERS_CACHE_DIR,
316
+ )
317
+ vae = AutoencoderKL.from_pretrained(pretrained_model_name_or_path, subfolder="vae")
318
+ return tokenizers, text_encoders, unet, vae
319
+
320
+
321
+ def load_checkpoint_model_xl(
322
+ checkpoint_path: str,
323
+ weight_dtype: torch.dtype = torch.float32,
324
+ ) -> Tuple[List[CLIPTokenizer], List[SDXL_TEXT_ENCODER_TYPE], UNet2DConditionModel,]:
325
+ pipe = StableDiffusionXLPipeline.from_single_file(
326
+ checkpoint_path,
327
+ torch_dtype=weight_dtype,
328
+ cache_dir=DIFFUSERS_CACHE_DIR,
329
+ )
330
+
331
+ unet = pipe.unet
332
+ vae = pipe.vae
333
+ tokenizers = [pipe.tokenizer, pipe.tokenizer_2]
334
+ text_encoders = [pipe.text_encoder, pipe.text_encoder_2]
335
+ if len(text_encoders) == 2:
336
+ text_encoders[1].pad_token_id = 0
337
+
338
+ del pipe
339
+
340
+ return tokenizers, text_encoders, unet, vae
341
+
342
+
343
+ def load_models_xl(
344
+ pretrained_model_name_or_path: str,
345
+ scheduler_name: str,
346
+ weight_dtype: torch.dtype = torch.float32,
347
+ noise_scheduler_kwargs=None,
348
+ ) -> Tuple[
349
+ List[CLIPTokenizer],
350
+ List[SDXL_TEXT_ENCODER_TYPE],
351
+ UNet2DConditionModel,
352
+ SchedulerMixin,
353
+ ]:
354
+ if pretrained_model_name_or_path.endswith(
355
+ ".ckpt"
356
+ ) or pretrained_model_name_or_path.endswith(".safetensors"):
357
+ (tokenizers, text_encoders, unet, vae) = load_checkpoint_model_xl(
358
+ pretrained_model_name_or_path, weight_dtype
359
+ )
360
+ else: # diffusers
361
+ (tokenizers, text_encoders, unet, vae) = load_diffusers_model_xl(
362
+ pretrained_model_name_or_path, weight_dtype
363
+ )
364
+ if scheduler_name:
365
+ scheduler = create_noise_scheduler(scheduler_name, noise_scheduler_kwargs)
366
+ else:
367
+ scheduler = None
368
+
369
+ return tokenizers, text_encoders, unet, scheduler, vae
370
+
371
+ def create_noise_scheduler(
372
+ scheduler_name: AVAILABLE_SCHEDULERS = "ddpm",
373
+ noise_scheduler_kwargs=None,
374
+ prediction_type: Literal["epsilon", "v_prediction"] = "epsilon",
375
+ ) -> SchedulerMixin:
376
+ name = scheduler_name.lower().replace(" ", "_")
377
+ if name.lower() == "ddim":
378
+ # https://huggingface.co/docs/diffusers/v0.17.1/en/api/schedulers/ddim
379
+ scheduler = DDIMScheduler(**OmegaConf.to_container(noise_scheduler_kwargs))
380
+ elif name.lower() == "ddpm":
381
+ # https://huggingface.co/docs/diffusers/v0.17.1/en/api/schedulers/ddpm
382
+ scheduler = DDPMScheduler(**OmegaConf.to_container(noise_scheduler_kwargs))
383
+ elif name.lower() == "lms":
384
+ # https://huggingface.co/docs/diffusers/v0.17.1/en/api/schedulers/lms_discrete
385
+ scheduler = LMSDiscreteScheduler(
386
+ **OmegaConf.to_container(noise_scheduler_kwargs)
387
+ )
388
+ elif name.lower() == "euler_a":
389
+ # https://huggingface.co/docs/diffusers/v0.17.1/en/api/schedulers/euler_ancestral
390
+ scheduler = EulerAncestralDiscreteScheduler(
391
+ **OmegaConf.to_container(noise_scheduler_kwargs)
392
+ )
393
+ elif name.lower() == "euler":
394
+ # https://huggingface.co/docs/diffusers/v0.17.1/en/api/schedulers/euler_ancestral
395
+ scheduler = EulerDiscreteScheduler(
396
+ **OmegaConf.to_container(noise_scheduler_kwargs)
397
+ )
398
+ elif name.lower() == "unipc":
399
+ # https://huggingface.co/docs/diffusers/v0.17.1/en/api/schedulers/unipc
400
+ scheduler = UniPCMultistepScheduler(
401
+ **OmegaConf.to_container(noise_scheduler_kwargs)
402
+ )
403
+ else:
404
+ raise ValueError(f"Unknown scheduler name: {name}")
405
+
406
+ return scheduler
407
+
408
+
409
+ def torch_gc():
410
+ import gc
411
+
412
+ gc.collect()
413
+ if torch.cuda.is_available():
414
+ with torch.cuda.device("cuda"):
415
+ torch.cuda.empty_cache()
416
+ torch.cuda.ipc_collect()
417
+
418
+
419
+ from enum import Enum
420
+
421
+
422
+ class CPUState(Enum):
423
+ GPU = 0
424
+ CPU = 1
425
+ MPS = 2
426
+
427
+
428
+ cpu_state = CPUState.GPU
429
+ xpu_available = False
430
+ directml_enabled = False
431
+
432
+
433
+ def is_intel_xpu():
434
+ global cpu_state
435
+ global xpu_available
436
+ if cpu_state == CPUState.GPU:
437
+ if xpu_available:
438
+ return True
439
+ return False
440
+
441
+
442
+ try:
443
+ import intel_extension_for_pytorch as ipex
444
+
445
+ if torch.xpu.is_available():
446
+ xpu_available = True
447
+ except:
448
+ pass
449
+
450
+ try:
451
+ if torch.backends.mps.is_available():
452
+ cpu_state = CPUState.MPS
453
+ import torch.mps
454
+ except:
455
+ pass
456
+
457
+
458
+ def get_torch_device():
459
+ global directml_enabled
460
+ global cpu_state
461
+ if directml_enabled:
462
+ global directml_device
463
+ return directml_device
464
+ if cpu_state == CPUState.MPS:
465
+ return torch.device("mps")
466
+ if cpu_state == CPUState.CPU:
467
+ return torch.device("cpu")
468
+ else:
469
+ if is_intel_xpu():
470
+ return torch.device("xpu")
471
+ else:
472
+ return torch.device(torch.cuda.current_device())
gradio_demo/preprocess.py ADDED
@@ -0,0 +1,232 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import random
3
+ import csv
4
+ import gc
5
+ import glob
6
+ from datetime import datetime
7
+ import time
8
+ from pathlib import Path
9
+ from style_template import style_list
10
+ from PIL import Image, ImageOps
11
+
12
+ # Default Configuration variables
13
+ INPUT_FOLDER_NAME = 'examples'
14
+ OUTPUT_FOLDER_NAME = 'generated_images'
15
+ LOG_FILENAME = 'generation_log.csv'
16
+ logfile_path = os.path.join(os.getcwd(), LOG_FILENAME)
17
+
18
+ PROMPT = "human, sharp focus"
19
+ NEGATIVE_PROMPT = "(blurry, blur, text, abstract, glitch, lowres, low quality, worst quality:1.2), (text:1.2), watermark, painting, drawing, illustration, glitch, deformed, mutated, cross-eyed, ugly, disfigured"
20
+ IDENTITYNET_STRENGTH_RATIO_RANGE = (1.0, 1.5)
21
+ ADAPTER_STRENGTH_RATIO_RANGE = (0.7, 1.0)
22
+ NUM_INFERENCE_STEPS_RANGE = (40, 60)
23
+ GUIDANCE_SCALE_RANGE = (7.0, 12.0)
24
+ MAX_SIDE = 1280
25
+ MIN_SIDE = 1024
26
+ NUMBER_OF_LOOPS = 1
27
+
28
+ # Dynamically create the STYLES list from imported style_list
29
+ STYLES = [style["name"] for style in style_list]
30
+ USE_RANDOM_STYLE = False
31
+
32
+ def choose_random_style():
33
+ return random.choice(STYLES)
34
+
35
+ def get_random_image_file(input_folder):
36
+ valid_extensions = [".jpg", ".jpeg", ".png"]
37
+ files = [file for file in Path(input_folder).glob("*") if file.suffix.lower() in valid_extensions]
38
+ if not files:
39
+ raise FileNotFoundError(f"No images found in directory {input_folder}")
40
+ return str(random.choice(files))
41
+
42
+ def resize_and_pad_image(image_path, max_side, min_side, pad_color=(255, 255, 255)):
43
+ # Open an image using PIL
44
+ image = Image.open(image_path)
45
+
46
+ # Calculate the scale and new size
47
+ ratio = min(min_side / min(image.size), max_side / max(image.size))
48
+ new_size = (int(image.size[0] * ratio), int(image.size[1] * ratio))
49
+
50
+ # Resize the image
51
+ image = image.resize(new_size, Image.BILINEAR)
52
+
53
+ # Calculate padding
54
+ delta_w = max_side - new_size[0]
55
+ delta_h = max_side - new_size[1]
56
+
57
+ # Pad the resized image to make it square
58
+ padding = (delta_w // 2, delta_h // 2, delta_w - (delta_w // 2), delta_h - (delta_h // 2))
59
+ image = ImageOps.expand(image, padding, pad_color)
60
+
61
+ return image
62
+
63
+ def log_to_csv(logfile_path, image_name, new_file_name='Unknown', identitynet_strength_ratio=0.0, adapter_strength_ratio=0.0, num_inference_steps=0, guidance_scale=0.0, seed=0, success=True, error_message='', style_name="", prompt="", negative_prompt="", time_taken=0.0, current_timestamp=""):
64
+ os.makedirs(os.path.dirname(logfile_path), exist_ok=True)
65
+ file_exists = os.path.isfile(logfile_path)
66
+
67
+ with open(logfile_path, 'a', newline='', encoding='utf-8') as csvfile:
68
+ fieldnames = ['image_name', 'new_file_name', 'identitynet_strength_ratio', 'adapter_strength_ratio', 'num_inference_steps', 'guidance_scale', 'seed', 'success', 'error_message', 'style_name', 'prompt', 'negative_prompt', 'time_taken', 'current_timestamp']
69
+ writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
70
+ if not file_exists:
71
+ writer.writeheader()
72
+ writer.writerow({
73
+ 'image_name': image_name,
74
+ 'new_file_name': new_file_name,
75
+ 'identitynet_strength_ratio': identitynet_strength_ratio,
76
+ 'adapter_strength_ratio': adapter_strength_ratio,
77
+ 'num_inference_steps': num_inference_steps,
78
+ 'guidance_scale': guidance_scale,
79
+ 'seed': seed,
80
+ 'success': success,
81
+ 'error_message': error_message,
82
+ 'style_name': style_name,
83
+ 'prompt': prompt,
84
+ 'negative_prompt': negative_prompt,
85
+ 'time_taken': time_taken,
86
+ 'current_timestamp': current_timestamp
87
+ })
88
+
89
+ def initial_image(generate_image_func):
90
+ overall_start_time = time.time()
91
+ total_time_taken = 0.0
92
+
93
+ # Initialize a counter for processed images at the beginning of the function
94
+ processed_images_count = 0
95
+
96
+ # List all image files in the `INPUT_FOLDER_NAME`
97
+ image_files = glob.glob(f'{INPUT_FOLDER_NAME}/*.png') + \
98
+ glob.glob(f'{INPUT_FOLDER_NAME}/*.jpg') + \
99
+ glob.glob(f'{INPUT_FOLDER_NAME}/*.jpeg')
100
+
101
+ # Check if we found any images
102
+ if not image_files:
103
+ raise FileNotFoundError(f"No images found in directory {INPUT_FOLDER_NAME}")
104
+
105
+ # Print the count of detected image files
106
+ print(f"Processing a total of {len(image_files)} image(s) in '{INPUT_FOLDER_NAME}'")
107
+
108
+ # Shuffle the image files randomly
109
+ random.shuffle(image_files)
110
+
111
+ total_images = len(image_files) # Get the total number of images to process
112
+
113
+ for loop in range(NUMBER_OF_LOOPS):
114
+ print(f"Starting loop {loop+1} of {NUMBER_OF_LOOPS}")
115
+
116
+ for image_number, face_image_path in enumerate(image_files, start=1):
117
+ loop_start_time = datetime.now()
118
+ face_image = [face_image_path]
119
+ basename = os.path.basename(face_image_path)
120
+ processed_images_count += 1
121
+
122
+ # Resize and pad the image before processing
123
+ processed_image = resize_and_pad_image(
124
+ image_path=face_image_path,
125
+ max_side=MAX_SIDE,
126
+ min_side=MIN_SIDE
127
+ )
128
+
129
+ if USE_RANDOM_STYLE:
130
+ style_name = choose_random_style()
131
+ else:
132
+ style_name = "(No style)"
133
+
134
+ identitynet_strength_ratio = random.uniform(*IDENTITYNET_STRENGTH_RATIO_RANGE)
135
+ adapter_strength_ratio = random.uniform(*ADAPTER_STRENGTH_RATIO_RANGE)
136
+ num_inference_steps = random.randint(*NUM_INFERENCE_STEPS_RANGE)
137
+ guidance_scale = random.uniform(*GUIDANCE_SCALE_RANGE)
138
+ seed = random.randint(0, 2**32 - 1)
139
+
140
+ # Print settings for the current image BEFORE processing it
141
+ print_generation_settings(basename, style_name, identitynet_strength_ratio,
142
+ adapter_strength_ratio, num_inference_steps, guidance_scale, seed,
143
+ image_number, total_images)
144
+
145
+ # Here, the generate_image_func is supposedly called and image processing happens
146
+ _, _, generated_file_paths = generate_image_func(
147
+ face_image=face_image,
148
+ pose_image=None,
149
+ prompt=PROMPT,
150
+ negative_prompt=NEGATIVE_PROMPT,
151
+ style_name=style_name,
152
+ enhance_face_region=True,
153
+ num_steps=num_inference_steps,
154
+ identitynet_strength_ratio=identitynet_strength_ratio,
155
+ adapter_strength_ratio=adapter_strength_ratio,
156
+ guidance_scale=guidance_scale,
157
+ seed=seed
158
+ )
159
+
160
+ loop_end_time = datetime.now()
161
+ loop_time_taken = (loop_end_time - loop_start_time).total_seconds()
162
+
163
+ # Immediately print the time taken and current time.
164
+ print(f"Time taken to process image: {loop_time_taken:.2f} seconds")
165
+
166
+ # Update the total time taken with this image's processing time
167
+ total_time_taken += loop_time_taken
168
+
169
+ # Calculate the average time taken per image
170
+ average_time_per_image = total_time_taken / image_number
171
+
172
+ current_timestamp = loop_end_time.strftime("%Y-%m-%d %H:%M:%S") # Current time after processing
173
+ print(f"Current timestamp: {current_timestamp}")
174
+
175
+ # Calculate estimated remaining time considering the images left in this loop and the additional loops
176
+ remaining_images_this_loop = total_images - image_number
177
+ remaining_images_in_additional_loops = (NUMBER_OF_LOOPS - (loop + 1)) * total_images
178
+ total_remaining_images = remaining_images_this_loop + remaining_images_in_additional_loops
179
+ estimated_time_remaining = average_time_per_image * total_remaining_images
180
+
181
+ # Display the estimated time remaining including remaining loops
182
+ print(f"Estimated time remaining (including loops): {estimated_time_remaining // 60:.0f} minutes, {estimated_time_remaining % 60:.0f} seconds")
183
+
184
+ # Display the overall average time per image in seconds
185
+ print(f"Overall average time per image: {average_time_per_image:.2f} seconds")
186
+
187
+ # Display the total number of remaining images to process including looping
188
+ print(f"Total remaining images to process (including loops): {total_remaining_images}")
189
+
190
+
191
+ success = True # Assuming generation was successful.
192
+ error_message = "" # Assuming no error.
193
+
194
+ # Log to CSV after the image generation.
195
+ for generated_file_path in generated_file_paths:
196
+ new_file_name = os.path.basename(generated_file_path)
197
+ log_to_csv(logfile_path, basename, new_file_name, identitynet_strength_ratio,
198
+ adapter_strength_ratio, num_inference_steps, guidance_scale, seed, success,
199
+ error_message, style_name, PROMPT, NEGATIVE_PROMPT, loop_time_taken, current_timestamp)
200
+
201
+
202
+ del generated_file_paths # Explicitly delete large variables
203
+ gc.collect() # Call garbage collection
204
+
205
+
206
+ # At the end of the initial_image() function, add:
207
+ total_elapsed_time = time.time() - overall_start_time
208
+ print("\n===FINAL SUMMARY===")
209
+ print(f"Total loops completed: {NUMBER_OF_LOOPS}")
210
+ print(f"Total images processed per loop: {len(image_files)}")
211
+ print(f"Overall total images processed: {NUMBER_OF_LOOPS * len(image_files)}") # Multiplied by the number of loops
212
+ print(f"Overall total time: {total_elapsed_time / 60:.2f} minutes")
213
+
214
+
215
+ def print_generation_settings(basename, style_name, identitynet_strength_ratio, adapter_strength_ratio, num_inference_steps, guidance_scale, seed, image_number, total_images):
216
+ print("===IMAGE GENERATION DATA SUMMARY===")
217
+ # Print settings for the current image
218
+ print(f"- Image {image_number} of {total_images}\n"
219
+ f"- Filename: {basename}\n"
220
+ f"- Style: {style_name}\n"
221
+ f"- IdentityNet strength ratio: {identitynet_strength_ratio:0.2f}\n"
222
+ f"- Adapter strength ratio: {adapter_strength_ratio:0.2f}\n"
223
+ f"- Number of inference steps: {num_inference_steps}\n"
224
+ f"- Guidance scale: {guidance_scale:0.2f}\n"
225
+ f"- Seed: {seed}\n"
226
+ f"- Input folder name: {INPUT_FOLDER_NAME}\n"
227
+ f"- Output folder name: {OUTPUT_FOLDER_NAME}\n"
228
+ f"- Prompt: {PROMPT}\n"
229
+ f"- Negative prompt: {NEGATIVE_PROMPT}\n"
230
+ f"- Number of loops: {NUMBER_OF_LOOPS}\n"
231
+ f"- Use random style: {USE_RANDOM_STYLE}\n")
232
+ print("===DEFINING COMPLETE, GENERATING IMAGE...===")
gradio_demo/requirements.txt ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ diffusers==0.25.1
2
+ torch==2.0.0
3
+ torchvision==0.15.1
4
+ transformers==4.37.1
5
+ accelerate==0.25.0
6
+ safetensors==0.4.3
7
+ einops==0.7.0
8
+ onnxruntime-gpu==1.18.1
9
+ spaces==0.19.4
10
+ omegaconf==2.3.0
11
+ peft==0.11.1
12
+ huggingface-hub==0.23.4
13
+ opencv-python==4.10.0.84
14
+ insightface==0.7.3
15
+ gradio==4.38.1
16
+ controlnet_aux==0.0.9
17
+ gdown==5.2.0
18
+ peft==0.11.1
19
+ setuptools=71.1.0
gradio_demo/style_template.py ADDED
@@ -0,0 +1,136 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # style_list = [
2
+ # {
3
+ # "name": "Professional",
4
+ # "prompt": ["Minimalist style, Simple, clean, uncluttered, modern, elegant, white background, professional photo, linkedin profile photo, formal attire, professional look","Minimalist style, Simple, clean, uncluttered, modern, elegant, white background, professional photo, linkedin profile photo, formal attire, professional look","Minimalist style, Simple, clean, uncluttered, modern, elegant, white background, professional photo, linkedin profile photo, formal attire, professional look","Minimalist style, Simple, clean, uncluttered, modern, elegant, white background, professional photo, linkedin profile photo, formal attire, professional look"],
5
+ # # "prompt": ["Minimalist style, Simple, clean, uncluttered, modern, elegant, white background, professional photo, linkedin profile photo, formal attire, professional look,highly detailed, sharp focus","Minimalist style, Simple, clean, uncluttered, modern, elegant, white background, professional photo, linkedin profile photo, formal attire, professional look","Minimalist style, Simple, clean, uncluttered, modern, elegant, white background, professional photo, linkedin profile photo, formal attire, professional look","Minimalist style, Simple, clean, uncluttered, modern, elegant, white background, professional photo, linkedin profile photo, formal attire, professional look"],
6
+ # "negative_prompt":
7
+ # # "photographic, realistic, realism, 35mm film, dslr, cropped, frame, text, deformed, glitch, noise, noisy, blurry, off-center, deformed, cross-eyed, closed eyes, bad anatomy, ugly, disfigured, sloppy, duplicate, mutated, black and white, multiple people, green, deformed"
8
+ # # "drawing, painting, crayon, sketch, graphite, impressionist, noisy, blurry, soft, deformed, ugly"
9
+
10
+ # # "lowres, low quality, worst quality:1.2), (text:1.2), watermark, (frame:1.2), deformed, ugly, deformed eyes, blur, out of focus, blurry, deformed"
11
+
12
+ # "high saturation, multiple people, two people, patchy, photographic, realistic, realism, 35mm film, dslr, cropped, frame, text, deformed, glitch,blurred, blurry, noise, noisy, off-center, deformed, cross-eyed, closed eyes, bad anatomy, ugly, disfigured, sloppy, duplicate, mutated, black and white"
13
+ # },
14
+ # {
15
+ # "name": "Quirky",
16
+ # "prompt": ["vibrant colorful, ink sketch|vector|2d colors, sharp focus, superman/wonderwoman, highly detailed, the clouds,colorful,ultra sharpness,4k","watercolor painting, japanese anime character with white/neon hair. vibrant, beautiful, painterly, detailed, textural, artistic","vibrant colorful, ink sketch|vector|2d colors, sharp focus, scooba diver, highly detailed, the ocean,fishes,colorful,ultra sharpness,4k","individual dressed as an eskimo, surrounded by snowy mountains and igloo, snow crystals, cold, windy background, frozen natural landscape in background,highly detailed, sharp focus, intricate design, 4k resolution"],
17
+ # "negative_prompt": "saturation, highly saturated,(lowres, low quality, worst quality:1.2), (text:1.2), watermark, (frame:1.2), deformed, ugly, deformed eyes, blur, out of focus, blurry, multiple people, buildings in background, green"
18
+ # },
19
+ # {
20
+ # "name": "Sci-fi",
21
+ # "prompt": ["ethereal fantasy concept art individual, magnificent, celestial, ethereal, painterly, epic, majestic, magical, fantasy art, cover art, dreamy","Dystopian style cyborg. Bleak, post-apocalyptic, somber, dramatic, highly detailed","Alien-themed, Extraterrestrial, cosmic, otherworldly, mysterious, sci-fi, highly detailed", "Legend of Zelda style . Vibrant, fantasy, detailed, epic, heroic, reminiscent of The Legend of Zelda series"],
22
+ # "negative_prompt": "photographic, realistic, realism, 35mm film, dslr, cropped, frame, text, deformed, glitch, noise, noisy, off-center, deformed, cross-eyed, closed eyes, bad anatomy, ugly, disfigured, sloppy, duplicate, mutated, black and white, multiple people, green, deformed",
23
+ # }
24
+ # ]
25
+
26
+ # styles = {k["name"]: (k["prompt"], k["negative_prompt"]) for k in style_list}
27
+
28
+ # # lowres, low quality, worst quality:1.2), (text:1.2), watermark, (frame:1.2), deformed, ugly, deformed eyes, blur, out of focus, blurry, deformed
29
+
30
+ # style_list = [
31
+ # {
32
+ # "name": "(No style)",
33
+ # "prompt": "Realistic, 4k resolution, ultra sharpness, {prompt} sitiing at a desk, office environment, professional photoshoot",
34
+ # "negative_prompt": "(lowres, low quality, worst quality:1.2), (text:1.2), watermark, (frame:1.2), deformed, ugly, deformed eyes, blur, out of focus, blurry, deformed cat, deformed, photo, anthropomorphic cat, monochrome, photo, pet collar, gun, weapon, blue, 3d, drones, drone, buildings in background, green",
35
+ # },
36
+ # {
37
+ # "name": "Watercolor",
38
+ # "prompt": "watercolor painting, japanese anime character with white/neon hair. vibrant, beautiful, painterly, detailed, textural, artistic",
39
+ # "negative_prompt": "(lowres, low quality, worst quality:1.2), (text:1.2), watermark, anime, photorealistic, 35mm film, deformed, glitch, low contrast, noisy",
40
+ # },
41
+ # {
42
+ # "name": "Film Noir",
43
+ # "prompt": "film noir style, ink sketch|vector, {prompt} highly detailed, sharp focus, ultra sharpness, monochrome, high contrast, dramatic shadows, 1940s style, mysterious, cinematic",
44
+ # "negative_prompt": "(lowres, low quality, worst quality:1.2), (text:1.2), watermark, (frame:1.2), deformed, ugly, deformed eyes, blur, out of focus, blurry, deformed cat, deformed, photo, anthropomorphic cat, monochrome, photo, pet collar, gun, weapon, blue, 3d, drones, drone, buildings in background, green",
45
+ # },
46
+ # {
47
+ # "name": "Neon",
48
+ # "prompt": "masterpiece painting, buildings in the backdrop, kaleidoscope, lilac orange blue cream fuchsia bright vivid gradient colors, the scene is cinematic, {prompt}, emotional realism, double exposure, watercolor ink pencil, graded wash, color layering, magic realism, figurative painting, intricate motifs, organic tracery, polished",
49
+ # "negative_prompt": "(lowres, low quality, worst quality:1.2), (text:1.2), watermark, (frame:1.2), deformed, ugly, deformed eyes, blur, out of focus, blurry, deformed cat, deformed, photo, anthropomorphic cat, monochrome, photo, pet collar, gun, weapon, blue, 3d, drones, drone, buildings in background, green",
50
+ # },
51
+ # {
52
+ # "name": "Jungle",
53
+ # "prompt": 'waist-up "{prompt} in a Jungle" by Syd Mead, tangerine cold color palette, muted colors, detailed, 8k,photo r3al,dripping paint,3d toon style,3d style,Movie Still',
54
+ # "negative_prompt": "(lowres, low quality, worst quality:1.2), (text:1.2), watermark, (frame:1.2), deformed, ugly, deformed eyes, blur, out of focus, blurry, deformed cat, deformed, photo, anthropomorphic cat, monochrome, photo, pet collar, gun, weapon, blue, 3d, drones, drone, buildings in background, green",
55
+ # },
56
+ # {
57
+ # "name": "Mars",
58
+ # "prompt": "{prompt}, Post-apocalyptic. Mars Colony, Scavengers roam the wastelands searching for valuable resources, rovers, bright morning sunlight shining, (detailed) (intricate) (8k) (HDR) (cinematic lighting) (sharp focus)",
59
+ # "negative_prompt": "(lowres, low quality, worst quality:1.2), (text:1.2), watermark, (frame:1.2), deformed, ugly, deformed eyes, blur, out of focus, blurry, deformed cat, deformed, photo, anthropomorphic cat, monochrome, photo, pet collar, gun, weapon, blue, 3d, drones, drone, buildings in background, green",
60
+ # },
61
+ # {
62
+ # "name": "Vibrant Color",
63
+ # "prompt": "vibrant colorful, ink sketch|vector|2d colors, sharp focus, {prompt}, highly detailed, the clouds,colorful,ultra sharpness,4k",
64
+ # "negative_prompt": "(lowres, low quality, worst quality:1.2), (text:1.2), watermark, (frame:1.2), deformed, ugly,distorted, deformed eyes, blur, out of focus, blurry, deformed cat, deformed, photo, anthropomorphic cat, monochrome, photo, pet collar, gun, weapon, blue, 3d, drones, drone, buildings in background, green",
65
+ # },
66
+ # {
67
+ # "name": "Snow",
68
+ # "prompt": "individual dressed as an {prompt}, high contrast, surrounded by snowy mountains and igloo, snow crystals, cold, windy background, frozen natural landscape in background,highly detailed, sharp focus, intricate design, 4k resolution",
69
+ # "negative_prompt": "(lowres, low quality, worst quality:1.2), (text:1.2), watermark, (frame:1.2), deformed, ugly, deformed eyes, blur, out of focus, blurry, deformed cat, deformed, photo, anthropomorphic cat, monochrome, photo, pet collar, gun, weapon, blue, 3d, drones, drone, buildings in background, green",
70
+ # },
71
+ # {
72
+ # "name": "Line art",
73
+ # "prompt": "vibrant colorful, sharp focus,individual wearing {prompt} costume, highly detailed, sharp focus, the ocean, fishes swimming in the background,coral reef behind, ocean landscape, 4k, colorful",
74
+ # "negative_prompt": "(lowres, low quality, worst quality:1.2), (text:1.2), watermark, (frame:1.2), deformed, ugly, deformed eyes, blur, out of focus, blurry, deformed cat, deformed, photo, anthropomorphic cat, monochrome, photo, pet collar, gun, weapon, blue, 3d, drones, drone, buildings in background, green",
75
+ # },
76
+ # ]
77
+
78
+ # styles = {k["name"]: (k["prompt"], k["negative_prompt"]) for k in style_list}
79
+
80
+ style_list = [
81
+ {
82
+ "name": "Professional",
83
+ # "prompt": ["Minimalist style, Simple, clean, uncluttered, modern, elegant, White background, suit and tie, professional look","Minimalist style, Simple, clean, uncluttered, modern, elegant, white background, professional photo, linkedin profile photo, formal attire, professional look","Minimalist style, Simple, clean, uncluttered, modern, elegant, formal attire, sitting on a chair, professional look","Minimalist style, Simple, clean, uncluttered, modern, elegant, formal attire, individual sitiing at a desk, office environment, professional look"],
84
+ "prompt": ["professional portrait, gender-aligned, natural skin tones, cinematic lighting, highly detailed, well-composed, professional photography, subtle background blur","Minimalist portrait, clean lines, soft colors, simple background, modern, elegant, subtle details, focus on facial features","Professional, Corporate, formal attire, polished, sharp features, clean background, high clarity, refined, business style","LinkedIn professional, business attire, neutral background, sharp focus, approachable, polished, suited for professional networking"],
85
+ "negative_prompt": "oversaturated, unnatural skin tones, deformed, disfigured, low resolution, cartoonish, unrealistic"
86
+ },
87
+ # {
88
+ # "name": "Watercolor",
89
+ # "prompt": "watercolor painting, {prompt}. vibrant, beautiful, painterly, detailed, textural, artistic",
90
+ # "negative_prompt": "(lowres, low quality, worst quality:1.2), (text:1.2), watermark, anime, photorealistic, 35mm film, deformed, glitch, low contrast, noisy",
91
+ # },
92
+ # {
93
+ # "name": "Film Noir",
94
+ # "prompt": "film noir style, ink sketch|vector, {prompt} highly detailed, sharp focus, ultra sharpness, monochrome, high contrast, dramatic shadows, 1940s style, mysterious, cinematic",
95
+ # "negative_prompt": "(lowres, low quality, worst quality:1.2), (text:1.2), watermark, (frame:1.2), deformed, ugly, deformed eyes, blur, out of focus, blurry, deformed cat, deformed, photo, anthropomorphic cat, monochrome, photo, pet collar, gun, weapon, blue, 3d, drones, drone, buildings in background, green",
96
+ # },
97
+ # {
98
+ # "name": "Neon",
99
+ # "prompt": "masterpiece painting, buildings in the backdrop, kaleidoscope, lilac orange blue cream fuchsia bright vivid gradient colors, the scene is cinematic, {prompt}, emotional realism, double exposure, watercolor ink pencil, graded wash, color layering, magic realism, figurative painting, intricate motifs, organic tracery, polished",
100
+ # "negative_prompt": "(lowres, low quality, worst quality:1.2), (text:1.2), watermark, (frame:1.2), deformed, ugly, deformed eyes, blur, out of focus, blurry, deformed cat, deformed, photo, anthropomorphic cat, monochrome, photo, pet collar, gun, weapon, blue, 3d, drones, drone, buildings in background, green",
101
+ # },
102
+ # {
103
+ # "name": "Jungle",
104
+ # "prompt": 'waist-up "{prompt} in a Jungle" by Syd Mead, tangerine cold color palette, muted colors, detailed, 8k,photo r3al,dripping paint,3d toon style,3d style,Movie Still',
105
+ # "negative_prompt": "(lowres, low quality, worst quality:1.2), (text:1.2), watermark, (frame:1.2), deformed, ugly, deformed eyes, blur, out of focus, blurry, deformed cat, deformed, photo, anthropomorphic cat, monochrome, photo, pet collar, gun, weapon, blue, 3d, drones, drone, buildings in background, green",
106
+ # },
107
+ # {
108
+ # "name": "Mars",
109
+ # "prompt": "{prompt}, Post-apocalyptic. Mars Colony, Scavengers roam the wastelands searching for valuable resources, rovers, bright morning sunlight shining, (detailed) (intricate) (8k) (HDR) (cinematic lighting) (sharp focus)",
110
+ # "negative_prompt": "(lowres, low quality, worst quality:1.2), (text:1.2), watermark, (frame:1.2), deformed, ugly, deformed eyes, blur, out of focus, blurry, deformed cat, deformed, photo, anthropomorphic cat, monochrome, photo, pet collar, gun, weapon, blue, 3d, drones, drone, buildings in background, green",
111
+ # },
112
+ # {
113
+ # "name": "Vibrant Color",
114
+ # "prompt": "vibrant colorful, ink sketch|vector|2d colors, sharp focus, {prompt}, highly detailed, the clouds,colorful,ultra sharpness,4k",
115
+ # "negative_prompt": "(lowres, low quality, worst quality:1.2), (text:1.2), watermark, (frame:1.2), deformed, ugly,distorted, deformed eyes, blur, out of focus, blurry, deformed cat, deformed, photo, anthropomorphic cat, monochrome, photo, pet collar, gun, weapon, blue, 3d, drones, drone, buildings in background, green",
116
+ # },
117
+ # {
118
+ # "name": "Snow",
119
+ # "prompt": "individual dressed as an {prompt}, high contrast, surrounded by snowy mountains and igloo, snow crystals, cold, windy background, frozen natural landscape in background,highly detailed, sharp focus, intricate design, 4k resolution",
120
+ # "negative_prompt": "(lowres, low quality, worst quality:1.2), (text:1.2), watermark, (frame:1.2), deformed, ugly, deformed eyes, blur, out of focus, blurry, deformed cat, deformed, photo, anthropomorphic cat, monochrome, photo, pet collar, gun, weapon, blue, 3d, drones, drone, buildings in background, green",
121
+ # },
122
+ {
123
+ "name": "Quirky",
124
+ "prompt": ["vibrant colorful, ink sketch|vector|2d colors, sharp focus, superman/wonderwoman, highly detailed, the clouds,colorful,ultra sharpness,4k","watercolor painting, japanese anime character with white/neon hair. vibrant, beautiful, painterly, detailed, textural, artistic","vibrant colorful, ink sketch|vector|2d colors, sharp focus, scooba diver, highly detailed, the ocean,fishes,colorful,ultra sharpness,4k","individual dressed as an eskimo, high contrast, surrounded by snowy mountains and igloo, snow crystals, cold, windy background, frozen natural landscape in background,highly detailed, sharp focus, intricate design, 4k resolution"],
125
+ "negative_prompt": "(lowres, low quality, worst quality:1.2), (text:1.2), watermark, (frame:1.2), deformed, ugly, deformed eyes, blur, out of focus, blurry, deformed cat, deformed, photo, anthropomorphic cat, monochrome, pet collar, gun, weapon, blue, 3d, drones, drone, buildings in background, green"
126
+ },
127
+ {
128
+ "name": "Sci-fi",
129
+ "prompt": ["ethereal fantasy concept art individual, magnificent, celestial, ethereal, painterly, epic, majestic, magical, fantasy art, cover art, dreamy","Dystopian style cyborg. Bleak, post-apocalyptic, somber, dramatic, highly detailed","Alien-themed, Extraterrestrial, cosmic, otherworldly, mysterious, sci-fi, highly detailed", "Legend of Zelda style . Vibrant, fantasy, detailed, epic, heroic, reminiscent of The Legend of Zelda series"],
130
+ "negative_prompt": "photographic, realistic, realism, 35mm film, dslr, cropped, frame, text, deformed, glitch, noise, noisy, off-center, deformed, cross-eyed, closed eyes, bad anatomy, ugly, disfigured, sloppy, duplicate, mutated, black and white, multiple people",
131
+ }
132
+ ]
133
+
134
+ styles = {k["name"]: (k["prompt"], k["negative_prompt"]) for k in style_list}
135
+
136
+ # lowres
gradio_demo/test.py ADDED
@@ -0,0 +1,400 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ sys.path.append('./')
3
+
4
+ from typing import Tuple
5
+
6
+ import os
7
+ import cv2
8
+ import math
9
+ import torch
10
+ import random
11
+ import numpy as np
12
+ import argparse
13
+ import pandas as pd
14
+
15
+ import PIL
16
+ from PIL import Image
17
+
18
+ import diffusers
19
+ from diffusers.utils import load_image
20
+ from diffusers.models import ControlNetModel
21
+ from diffusers import LCMScheduler
22
+
23
+ from huggingface_hub import hf_hub_download
24
+
25
+ import insightface
26
+ from insightface.app import FaceAnalysis
27
+
28
+ from style_template import styles
29
+ from pipeline_stable_diffusion_xl_instantid_full import StableDiffusionXLInstantIDPipeline
30
+ from model_util import load_models_xl, get_torch_device, torch_gc
31
+
32
+
33
+ # global variable
34
+ MAX_SEED = np.iinfo(np.int32).max
35
+ device = get_torch_device()
36
+ dtype = torch.float16 if str(device).__contains__("cuda") else torch.float32
37
+ STYLE_NAMES = list(styles.keys())
38
+ DEFAULT_STYLE_NAME = "Watercolor"
39
+
40
+ # Load face encoder
41
+ app = FaceAnalysis(name='antelopev2', root='./', providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
42
+ app.prepare(ctx_id=0, det_size=(320, 320))
43
+
44
+ # Path to InstantID models
45
+ face_adapter = f'./checkpoints/ip-adapter.bin'
46
+ controlnet_path = f'./checkpoints/ControlNetModel'
47
+
48
+ # Load pipeline
49
+ controlnet = ControlNetModel.from_pretrained(controlnet_path, torch_dtype=dtype)
50
+
51
+ logo = Image.open("./gradio_demo/logo.png")
52
+
53
+ from cv2 import imencode
54
+ import base64
55
+
56
+ # def encode_pil_to_base64_new(pil_image):
57
+ # print("AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA")
58
+ # image_arr = np.asarray(pil_image)[:,:,::-1]
59
+ # _, byte_data = imencode('.png', image_arr)
60
+ # base64_data = base64.b64encode(byte_data)
61
+ # base64_string_opencv = base64_data.decode("utf-8")
62
+ # return "data:image/png;base64," + base64_string_opencv
63
+
64
+ import gradio as gr
65
+
66
+ # gr.processing_utils.encode_pil_to_base64 = encode_pil_to_base64_new
67
+
68
+ def main(pretrained_model_name_or_path="wangqixun/YamerMIX_v8", enable_lcm_arg=False):
69
+
70
+ if pretrained_model_name_or_path.endswith(
71
+ ".ckpt"
72
+ ) or pretrained_model_name_or_path.endswith(".safetensors"):
73
+ scheduler_kwargs = hf_hub_download(
74
+ repo_id="wangqixun/YamerMIX_v8",
75
+ subfolder="scheduler",
76
+ filename="scheduler_config.json",
77
+ )
78
+
79
+ (tokenizers, text_encoders, unet, _, vae) = load_models_xl(
80
+ pretrained_model_name_or_path=pretrained_model_name_or_path,
81
+ scheduler_name=None,
82
+ weight_dtype=dtype,
83
+ )
84
+
85
+ scheduler = diffusers.EulerDiscreteScheduler.from_config(scheduler_kwargs)
86
+ pipe = StableDiffusionXLInstantIDPipeline(
87
+ vae=vae,
88
+ text_encoder=text_encoders[0],
89
+ text_encoder_2=text_encoders[1],
90
+ tokenizer=tokenizers[0],
91
+ tokenizer_2=tokenizers[1],
92
+ unet=unet,
93
+ scheduler=scheduler,
94
+ controlnet=controlnet,
95
+ ).to(device)
96
+
97
+ else:
98
+ pipe = StableDiffusionXLInstantIDPipeline.from_pretrained(
99
+ pretrained_model_name_or_path,
100
+ controlnet=controlnet,
101
+ torch_dtype=dtype,
102
+ safety_checker=None,
103
+ feature_extractor=None,
104
+ ).to(device)
105
+
106
+ pipe.scheduler = diffusers.EulerDiscreteScheduler.from_config(pipe.scheduler.config)
107
+
108
+ pipe.load_ip_adapter_instantid(face_adapter)
109
+ # load and disable LCM
110
+ pipe.load_lora_weights("latent-consistency/lcm-lora-sdxl")
111
+ pipe.disable_lora()
112
+
113
+ def remove_tips():
114
+ return gr.update(visible=False)
115
+
116
+
117
+ # prompts = [
118
+ # ["superman","Vibrant Color"], ["japanese anime character with white/neon hair","Watercolor"],
119
+ # # ["Suited professional","(No style)"],
120
+ # ["Scooba diver","Line art"], ["eskimo","Snow"]
121
+ # ]
122
+
123
+ def convert_from_cv2_to_image(img: np.ndarray) -> Image:
124
+ return Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
125
+
126
+ def convert_from_image_to_cv2(img: Image) -> np.ndarray:
127
+ return cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
128
+
129
+ def run_for_prompts1(face_file,style,progress=gr.Progress(track_tqdm=True)):
130
+ # if email != "":
131
+ p,n = styles.get(style, styles.get(STYLE_NAMES[1]))
132
+ return generate_image(face_file, p[0], n)
133
+ # else:
134
+ # raise gr.Error("Email ID is compulsory")
135
+ def run_for_prompts2(face_file,style,progress=gr.Progress(track_tqdm=True)):
136
+ # if email != "":
137
+ p,n = styles.get(style, styles.get(STYLE_NAMES[1]))
138
+ return generate_image(face_file, p[1], n)
139
+ def run_for_prompts3(face_file,style,progress=gr.Progress(track_tqdm=True)):
140
+ # if email != "":
141
+ p,n = styles.get(style, styles.get(STYLE_NAMES[1]))
142
+ return generate_image(face_file, p[2], n)
143
+ def run_for_prompts4(face_file,style,progress=gr.Progress(track_tqdm=True)):
144
+ # if email != "":
145
+ p,n = styles.get(style, styles.get(STYLE_NAMES[1]))
146
+ return generate_image(face_file, p[3], n)
147
+
148
+ # def validate_and_process(face_file, style, email):
149
+
150
+ # # Your processing logic here
151
+ # gallery1, gallery2, gallery3, gallery4 = run_for_prompts1(face_file, style), run_for_prompts2(face_file, style), run_for_prompts3(face_file, style), run_for_prompts4(face_file, style)
152
+ # return gallery1, gallery2, gallery3, gallery4
153
+
154
+ def draw_kps(image_pil, kps, color_list=[(255,0,0), (0,255,0), (0,0,255), (255,255,0), (255,0,255)]):
155
+ stickwidth = 4
156
+ limbSeq = np.array([[0, 2], [1, 2], [3, 2], [4, 2]])
157
+ kps = np.array(kps)
158
+
159
+ w, h = image_pil.size
160
+ out_img = np.zeros([h, w, 3])
161
+
162
+ for i in range(len(limbSeq)):
163
+ index = limbSeq[i]
164
+ color = color_list[index[0]]
165
+
166
+ x = kps[index][:, 0]
167
+ y = kps[index][:, 1]
168
+ length = ((x[0] - x[1]) ** 2 + (y[0] - y[1]) ** 2) ** 0.5
169
+ angle = math.degrees(math.atan2(y[0] - y[1], x[0] - x[1]))
170
+ polygon = cv2.ellipse2Poly((int(np.mean(x)), int(np.mean(y))), (int(length / 2), stickwidth), int(angle), 0, 360, 1)
171
+ out_img = cv2.fillConvexPoly(out_img.copy(), polygon, color)
172
+ out_img = (out_img * 0.6).astype(np.uint8)
173
+
174
+ for idx_kp, kp in enumerate(kps):
175
+ color = color_list[idx_kp]
176
+ x, y = kp
177
+ out_img = cv2.circle(out_img.copy(), (int(x), int(y)), 10, color, -1)
178
+
179
+ out_img_pil = Image.fromarray(out_img.astype(np.uint8))
180
+ return out_img_pil
181
+
182
+ def resize_img(input_image, max_side=640, min_side=640, size=None,
183
+ pad_to_max_side=True, mode=PIL.Image.BILINEAR, base_pixel_number=64):
184
+
185
+ w, h = input_image.size
186
+ print(w)
187
+ print(h)
188
+ if size is not None:
189
+ w_resize_new, h_resize_new = size
190
+ else:
191
+ ratio = min_side / min(h, w)
192
+ w, h = round(ratio*w), round(ratio*h)
193
+ ratio = max_side / max(h, w)
194
+ input_image = input_image.resize([round(ratio*w), round(ratio*h)], mode)
195
+ w_resize_new = (round(ratio * w) // base_pixel_number) * base_pixel_number
196
+ h_resize_new = (round(ratio * h) // base_pixel_number) * base_pixel_number
197
+ input_image = input_image.resize([w_resize_new, h_resize_new], mode)
198
+
199
+ if pad_to_max_side:
200
+ res = np.ones([max_side, max_side, 3], dtype=np.uint8) * 255
201
+ offset_x = (max_side - w_resize_new) // 2
202
+ offset_y = (max_side - h_resize_new) // 2
203
+ res[offset_y:offset_y+h_resize_new, offset_x:offset_x+w_resize_new] = np.array(input_image)
204
+ input_image = Image.fromarray(res)
205
+ return input_image
206
+
207
+ def store_images(email, gallery1, gallery2, gallery3, gallery4):
208
+ galleries = []
209
+ for i, img in enumerate([gallery1, gallery2, gallery3, gallery4], start=1):
210
+ if isinstance(img, np.ndarray):
211
+ img = Image.fromarray(img)
212
+ print(f"Gallery {i} type after conversion: {type(img)}")
213
+ galleries.append(img)
214
+ # Create the images directory if it doesn't exist
215
+ if not os.path.exists('images'):
216
+ os.makedirs('images')
217
+
218
+ # Define image file paths
219
+ image_paths = []
220
+ for i, img in enumerate(galleries, start=1):
221
+ img_path = f'images/{email}_gallery{i}.png'
222
+ img.save(img_path)
223
+ image_paths.append(img_path)
224
+
225
+ # Define the CSV file path
226
+ csv_file_path = 'image_data.csv'
227
+
228
+ # Create a DataFrame for the email and image paths
229
+ df = pd.DataFrame({
230
+ 'email': [email],
231
+ 'img1_path': [image_paths[0]],
232
+ 'img2_path': [image_paths[1]],
233
+ 'img3_path': [image_paths[2]],
234
+ 'img4_path': [image_paths[3]],
235
+ })
236
+
237
+ # Write to CSV (append if the file exists, create a new one if it doesn't)
238
+ if not os.path.isfile(csv_file_path):
239
+ df.to_csv(csv_file_path, index=False)
240
+ else:
241
+ df.to_csv(csv_file_path, mode='a', header=False, index=False)
242
+
243
+
244
+ def generate_image(face_image,prompt,negative_prompt):
245
+ pose_image_path = None
246
+ # prompt = "superman"
247
+ enable_LCM = False
248
+ identitynet_strength_ratio = 0.95
249
+ adapter_strength_ratio = 0.60
250
+ num_steps = 15
251
+ guidance_scale = 8.5
252
+ seed = random.randint(0, MAX_SEED)
253
+ # negative_prompt = ""
254
+ # negative_prompt += neg
255
+ enhance_face_region = True
256
+ if enable_LCM:
257
+ pipe.enable_lora()
258
+ pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
259
+ else:
260
+ pipe.disable_lora()
261
+ pipe.scheduler = diffusers.EulerDiscreteScheduler.from_config(pipe.scheduler.config)
262
+
263
+ if face_image is None:
264
+ raise gr.Error(f"Cannot find any input face image! Please upload the face image")
265
+
266
+ # if prompt is None:
267
+ # prompt = "a person"
268
+
269
+ # apply the style template
270
+ # prompt, negative_prompt = apply_style(style_name, prompt, negative_prompt)
271
+
272
+ # face_image = load_image(face_image_path)
273
+ face_image = resize_img(face_image)
274
+ face_image_cv2 = convert_from_image_to_cv2(face_image)
275
+ height, width, _ = face_image_cv2.shape
276
+
277
+ # Extract face features
278
+ face_info = app.get(face_image_cv2)
279
+
280
+ if len(face_info) == 0:
281
+ raise gr.Error(f"Cannot find any face in the image! Please upload another person image")
282
+
283
+ face_info = sorted(face_info, key=lambda x:(x['bbox'][2]-x['bbox'][0])*(x['bbox'][3]-x['bbox'][1]))[-1] # only use the maximum face
284
+ face_emb = face_info['embedding']
285
+ face_kps = draw_kps(convert_from_cv2_to_image(face_image_cv2), face_info['kps'])
286
+
287
+ if pose_image_path is not None:
288
+ pose_image = load_image(pose_image_path)
289
+ pose_image = resize_img(pose_image)
290
+ pose_image_cv2 = convert_from_image_to_cv2(pose_image)
291
+
292
+ face_info = app.get(pose_image_cv2)
293
+
294
+ if len(face_info) == 0:
295
+ raise gr.Error(f"Cannot find any face in the reference image! Please upload another person image")
296
+
297
+ face_info = face_info[-1]
298
+ face_kps = draw_kps(pose_image, face_info['kps'])
299
+
300
+ width, height = face_kps.size
301
+
302
+ if enhance_face_region:
303
+ control_mask = np.zeros([height, width, 3])
304
+ x1, y1, x2, y2 = face_info["bbox"]
305
+ x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
306
+ control_mask[y1:y2, x1:x2] = 255
307
+ control_mask = Image.fromarray(control_mask.astype(np.uint8))
308
+ else:
309
+ control_mask = None
310
+
311
+ generator = torch.Generator(device=device).manual_seed(seed)
312
+
313
+ print("Start inference...")
314
+ print(f"[Debug] Prompt: {prompt}, \n[Debug] Neg Prompt: {negative_prompt}")
315
+
316
+ pipe.set_ip_adapter_scale(adapter_strength_ratio)
317
+ images = pipe(
318
+ prompt=prompt,
319
+ negative_prompt=negative_prompt,
320
+ image_embeds=face_emb,
321
+ image=face_kps,
322
+ control_mask=control_mask,
323
+ controlnet_conditioning_scale=float(identitynet_strength_ratio),
324
+ num_inference_steps=num_steps,
325
+ guidance_scale=guidance_scale,
326
+ height=height,
327
+ width=width,
328
+ generator=generator,
329
+ # num_images_per_prompt = 4
330
+ ).images
331
+
332
+ print(images[0])
333
+
334
+ return images[0]
335
+
336
+
337
+ ### Description
338
+ title = r"""
339
+ <h1 align="center">Choose your AVATAR</h1>
340
+ """
341
+
342
+ description = r"""
343
+ <h2> Powered by IDfy </h2>"""
344
+
345
+ article = r""""""
346
+
347
+ tips = r""""""
348
+
349
+ css = '''
350
+ .gradio-container {width: 95% !important; background-color: #E6F3FF;}
351
+ .image-gallery {height: 100vh !important; overflow: auto;}
352
+ .gradio-row .gradio-element { margin: 0 !important; }
353
+ '''
354
+ with gr.Blocks(css=css) as demo:
355
+ title = "<h1 align='center'>Choose your AVATAR</h1>"
356
+ description = "<h2> Powered by IDfy </h2>"
357
+
358
+ # Description
359
+ gr.Markdown(title)
360
+ with gr.Row():
361
+ gr.Image("./gradio_demo/logo.png",scale=0,min_width=50,show_label=False,show_download_button=False)
362
+ gr.Markdown(description)
363
+ with gr.Row():
364
+ with gr.Column():
365
+ style = gr.Dropdown(label="Choose your STYLE", choices=STYLE_NAMES)
366
+ face_file = gr.Image(label="Upload a photo of your face", type="pil")
367
+ submit = gr.Button("Submit", variant="primary")
368
+ with gr.Column():
369
+ with gr.Row():
370
+ gallery1 = gr.Image(label="Generated Images")
371
+ gallery2 = gr.Image(label="Generated Images")
372
+ with gr.Row():
373
+ gallery3 = gr.Image(label="Generated Images")
374
+ gallery4 = gr.Image(label="Generated Images")
375
+ email = gr.Textbox(label="Email",
376
+ info="Enter your email address",
377
+ value="")
378
+ submit1 = gr.Button("STORE", variant="primary")
379
+ usage_tips = gr.Markdown(label="Usage tips of InstantID", value="", visible=False)
380
+
381
+ # Image upload and processing chain
382
+ face_file.upload(remove_tips, outputs=usage_tips).then(run_for_prompts1, inputs=[face_file, style], outputs=[gallery1]).then(run_for_prompts2, inputs=[face_file, style], outputs=[gallery2]).then(run_for_prompts3, inputs=[face_file, style], outputs=[gallery3]).then(run_for_prompts4, inputs=[face_file, style], outputs=[gallery4])
383
+ submit.click(remove_tips, outputs=usage_tips).then(run_for_prompts1, inputs=[face_file, style], outputs=[gallery1]).then(run_for_prompts2, inputs=[face_file, style], outputs=[gallery2]).then(run_for_prompts3, inputs=[face_file, style], outputs=[gallery3]).then(run_for_prompts4, inputs=[face_file, style], outputs=[gallery4])
384
+
385
+ # Store data on button click
386
+ submit1.click(
387
+ fn=store_images,
388
+ inputs=[email,gallery1,gallery2,gallery3,gallery4],
389
+ outputs=None)
390
+
391
+ gr.Markdown("")
392
+
393
+ demo.launch(share=True)
394
+
395
+ if __name__ == "__main__":
396
+ parser = argparse.ArgumentParser()
397
+ parser.add_argument("--pretrained_model_name_or_path", type=str, default="wangqixun/YamerMIX_v8")
398
+ args = parser.parse_args()
399
+
400
+ main(args.pretrained_model_name_or_path, False)
gradio_demo/watermark.png ADDED
image_data.csv ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ email,img1_path,img2_path,img3_path,img4_path
2
+ ll,images/ll_gallery1.png,images/ll_gallery2.png,images/ll_gallery3.png,images/ll_gallery4.png
3
+ kajal@img,images/kajal@img_gallery1.png,images/kajal@img_gallery2.png,images/kajal@img_gallery3.png,images/kajal@img_gallery4.png
4
+ heeral@img,images/heeral@img_gallery1.png,images/heeral@img_gallery2.png,images/heeral@img_gallery3.png,images/heeral@img_gallery4.png
5
+ sanskruti@img-scifi,images/sanskruti@img-scifi_gallery1.png,images/sanskruti@img-scifi_gallery2.png,images/sanskruti@img-scifi_gallery3.png,images/sanskruti@img-scifi_gallery4.png
6
+ sanskruti@img-scifi,images/sanskruti@img-scifi_gallery1.png,images/sanskruti@img-scifi_gallery2.png,images/sanskruti@img-scifi_gallery3.png,images/sanskruti@img-scifi_gallery4.png
7
+ sanskruti@img-quirky,images/sanskruti@img-quirky_gallery1.png,images/sanskruti@img-quirky_gallery2.png,images/sanskruti@img-quirky_gallery3.png,images/sanskruti@img-quirky_gallery4.png
8
+ kajal@quirky,images/kajal@quirky_gallery1.png,images/kajal@quirky_gallery2.png,images/kajal@quirky_gallery3.png,images/kajal@quirky_gallery4.png
9
+ kajal@prof,images/kajal@prof_gallery1.png,images/kajal@prof_gallery2.png,images/kajal@prof_gallery3.png,images/kajal@prof_gallery4.png
10
+ kajal@quirky,images/kajal@quirky_gallery1.png,images/kajal@quirky_gallery2.png,images/kajal@quirky_gallery3.png,images/kajal@quirky_gallery4.png
11
+ kajal@sci-fi,images/kajal@sci-fi_gallery1.png,images/kajal@sci-fi_gallery2.png,images/kajal@sci-fi_gallery3.png,images/kajal@sci-fi_gallery4.png
12
+ yashvi,images/yashvi_gallery1.png,images/yashvi_gallery2.png,images/yashvi_gallery3.png,images/yashvi_gallery4.png
13
+ yashviwhy@instantid.com,images/yashviwhy@instantid.com_gallery1.png,images/yashviwhy@instantid.com_gallery2.png,images/yashviwhy@instantid.com_gallery3.png,images/yashviwhy@instantid.com_gallery4.png
14
+ kartik@prof,images/kartik@prof_gallery1.png,images/kartik@prof_gallery2.png,images/kartik@prof_gallery3.png,images/kartik@prof_gallery4.png
15
+ yashvii@proffffff,images/yashvii@proffffff_gallery1.png,images/yashvii@proffffff_gallery2.png,images/yashvii@proffffff_gallery3.png,images/yashvii@proffffff_gallery4.png
images/aa.ll_gallery1.png ADDED

Git LFS Details

  • SHA256: 0c08937723c681094ef3befc375dfe1ae7f67fa4b8f89325df494861a5091a20
  • Pointer size: 132 Bytes
  • Size of remote file: 1.13 MB
images/aa.ll_gallery2.png ADDED
images/aa.ll_gallery3.png ADDED
images/aa.ll_gallery4.png ADDED
images/heeral@img_gallery1.png ADDED
images/heeral@img_gallery2.png ADDED
images/heeral@img_gallery3.png ADDED
images/heeral@img_gallery4.png ADDED
images/kajal@img_gallery1.png ADDED
images/kajal@img_gallery2.png ADDED
images/kajal@img_gallery3.png ADDED
images/kajal@img_gallery4.png ADDED