Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,8 +1,6 @@
|
|
1 |
from dotenv import load_dotenv
|
2 |
from IPython.display import display, Image, Audio
|
3 |
from moviepy.editor import VideoFileClip, AudioFileClip
|
4 |
-
from moviepy.audio.io.AudioFileClip import AudioFileClip
|
5 |
-
|
6 |
import cv2
|
7 |
import base64
|
8 |
import io
|
@@ -15,7 +13,6 @@ import tempfile
|
|
15 |
# Load environment variables from .env.local
|
16 |
load_dotenv('.env.local')
|
17 |
|
18 |
-
# Add a function to check the password
|
19 |
def check_password():
|
20 |
correct_password = os.getenv('PASSWORD')
|
21 |
if correct_password is None:
|
@@ -30,29 +27,33 @@ def check_password():
|
|
30 |
st.error("Incorrect password")
|
31 |
return False
|
32 |
|
33 |
-
|
34 |
-
def video_to_frames(video_file):
|
35 |
-
# Save the uploaded video file to a temporary file
|
36 |
with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as tmpfile:
|
37 |
tmpfile.write(video_file.read())
|
38 |
video_filename = tmpfile.name
|
39 |
|
40 |
-
|
|
|
|
|
|
|
|
|
41 |
video = cv2.VideoCapture(video_filename)
|
42 |
base64Frame = []
|
|
|
43 |
|
44 |
while video.isOpened():
|
45 |
success, frame = video.read()
|
46 |
if not success:
|
47 |
break
|
48 |
-
|
49 |
-
|
50 |
-
|
|
|
|
|
51 |
video.release()
|
52 |
-
print(len(base64Frame)
|
53 |
return base64Frame, video_filename, video_duration
|
54 |
|
55 |
-
## 2. Generate stories based on frames with gpt4v
|
56 |
def frames_to_story(base64Frames, prompt, api_key):
|
57 |
PROMPT_MESSAGES = [
|
58 |
{
|
@@ -74,7 +75,6 @@ def frames_to_story(base64Frames, prompt, api_key):
|
|
74 |
print(result.choices[0].message.content)
|
75 |
return result.choices[0].message.content
|
76 |
|
77 |
-
## 3. Generate voiceover from stories
|
78 |
def text_to_audio(text, api_key, voice):
|
79 |
response = requests.post(
|
80 |
"https://api.openai.com/v1/audio/speech",
|
@@ -88,20 +88,14 @@ def text_to_audio(text, api_key, voice):
|
|
88 |
},
|
89 |
)
|
90 |
|
91 |
-
# Check if the request was successful
|
92 |
if response.status_code != 200:
|
93 |
raise Exception("Request failed with status code")
|
94 |
|
95 |
-
# Create an in-memory bytes buffer
|
96 |
audio_bytes_io = io.BytesIO()
|
97 |
-
# Write audio data to the in-memory bytes buffer
|
98 |
for chunk in response.iter_content(chunk_size=1024*1024):
|
99 |
audio_bytes_io.write(chunk)
|
100 |
-
|
101 |
-
# Important: Seek to the start of the BytesIO buffer before returning
|
102 |
audio_bytes_io.seek(0)
|
103 |
|
104 |
-
# Save audio to a temporary file
|
105 |
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmpfile:
|
106 |
for chunk in response.iter_content(chunk_size=1024*1024):
|
107 |
tmpfile.write(chunk)
|
@@ -109,83 +103,58 @@ def text_to_audio(text, api_key, voice):
|
|
109 |
|
110 |
return audio_filename, audio_bytes_io
|
111 |
|
112 |
-
## 4. Merge videos & audio
|
113 |
def merge_audio_video(video_filename, audio_filename, output_filename):
|
114 |
print("Merging audio and video ...")
|
115 |
-
# Load the video file
|
116 |
video_clip = VideoFileClip(video_filename)
|
117 |
-
# Load the audio file
|
118 |
audio_clip = AudioFileClip(audio_filename)
|
119 |
-
# Set the audio of the video clip as the audio file
|
120 |
final_clip = video_clip.set_audio(audio_clip)
|
121 |
-
# Write the result to a file (without audio)
|
122 |
final_clip.write_videofile(output_filename, codec='libx264', audio_codec="aac")
|
123 |
-
# Close the clips
|
124 |
video_clip.close()
|
125 |
audio_clip.close()
|
126 |
-
|
127 |
-
# Return the path to the new video file
|
128 |
return output_filename
|
129 |
|
130 |
-
## 5. Streamlit UI
|
131 |
def main():
|
132 |
st.set_page_config(page_title="AI Voiceover", page_icon="🔮")
|
133 |
st.title("GPT4V AI Voiceover 🎥🔮")
|
134 |
-
st.text("Explore how GPT4V changes the way we voiceover videos.")
|
135 |
|
136 |
-
# Check password before proceeding
|
137 |
if not check_password():
|
138 |
-
return
|
139 |
|
140 |
-
# Retrieve the OpenAI API key from environment
|
141 |
openai_key = os.getenv('OPENAI_API_KEY')
|
142 |
if not openai_key:
|
143 |
st.error("OpenAI API key is not set in .env.local")
|
144 |
-
return
|
145 |
-
|
146 |
uploaded_file = st.file_uploader("Select a video file", type=["mp4", "avi"])
|
147 |
|
148 |
-
option = st.selectbox(
|
149 |
-
|
150 |
-
('Female Voice', 'Male Voice'))
|
151 |
-
classify = ''
|
152 |
-
if option == 'Male Voice':
|
153 |
-
classify = 'alloy'
|
154 |
-
elif option == 'Female Voice':
|
155 |
-
classify = 'nova'
|
156 |
|
157 |
if uploaded_file is not None:
|
158 |
st.video(uploaded_file)
|
159 |
-
|
160 |
-
prompt = st.text_area("Prompt", value=p)
|
161 |
|
162 |
if st.button("START PROCESSING", type="primary"):
|
163 |
with st.spinner("Video is being processed..."):
|
164 |
-
base64Frame, video_filename, video_duration = video_to_frames(uploaded_file)
|
165 |
|
166 |
-
# Check if the video duration exceeds 30 seconds
|
167 |
if video_duration > 30:
|
168 |
st.error("The video exceeds the maximum allowed duration of 30 seconds.")
|
169 |
-
return
|
170 |
|
171 |
-
|
172 |
-
final_prompt = prompt + f"(This video is ONLY {video_duration} seconds long. So make sure the voiceover MUST be able to be explained in less than {est_word_count} words. Ignore and don't generate anything else than the script that you'll use to voice over the video.)"
|
173 |
text = frames_to_story(base64Frame, final_prompt, openai_key)
|
174 |
st.write(text)
|
175 |
-
|
176 |
audio_filename, audio_bytes_io = text_to_audio(text, openai_key, classify)
|
177 |
-
# Merge audio and video
|
178 |
output_video_filename = os.path.splitext(video_filename)[0] + "_output.mp4"
|
179 |
|
180 |
final_video_filename = merge_audio_video(video_filename, audio_filename, output_video_filename)
|
181 |
-
|
182 |
-
# Display the result
|
183 |
st.video(final_video_filename)
|
184 |
|
185 |
-
# Clean up the temporary files
|
186 |
os.unlink(video_filename)
|
187 |
os.unlink(audio_filename)
|
188 |
os.unlink(final_video_filename)
|
189 |
|
190 |
if __name__ == "__main__":
|
191 |
-
main()
|
|
|
1 |
from dotenv import load_dotenv
|
2 |
from IPython.display import display, Image, Audio
|
3 |
from moviepy.editor import VideoFileClip, AudioFileClip
|
|
|
|
|
4 |
import cv2
|
5 |
import base64
|
6 |
import io
|
|
|
13 |
# Load environment variables from .env.local
|
14 |
load_dotenv('.env.local')
|
15 |
|
|
|
16 |
def check_password():
|
17 |
correct_password = os.getenv('PASSWORD')
|
18 |
if correct_password is None:
|
|
|
27 |
st.error("Incorrect password")
|
28 |
return False
|
29 |
|
30 |
+
def video_to_frames(video_file, frame_sampling_rate=1):
|
|
|
|
|
31 |
with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as tmpfile:
|
32 |
tmpfile.write(video_file.read())
|
33 |
video_filename = tmpfile.name
|
34 |
|
35 |
+
video_clip = VideoFileClip(video_filename)
|
36 |
+
video_duration = video_clip.duration
|
37 |
+
fps = video_clip.fps
|
38 |
+
frames_to_skip = int(fps * frame_sampling_rate)
|
39 |
+
|
40 |
video = cv2.VideoCapture(video_filename)
|
41 |
base64Frame = []
|
42 |
+
current_frame = 0
|
43 |
|
44 |
while video.isOpened():
|
45 |
success, frame = video.read()
|
46 |
if not success:
|
47 |
break
|
48 |
+
if current_frame % frames_to_skip == 0:
|
49 |
+
_, buffer = cv2.imencode('.jpg', frame)
|
50 |
+
base64Frame.append(base64.b64encode(buffer).decode("utf-8"))
|
51 |
+
current_frame += 1
|
52 |
+
|
53 |
video.release()
|
54 |
+
print(f"{len(base64Frame)} frames read at a sampling rate of {frame_sampling_rate} second(s) per frame.")
|
55 |
return base64Frame, video_filename, video_duration
|
56 |
|
|
|
57 |
def frames_to_story(base64Frames, prompt, api_key):
|
58 |
PROMPT_MESSAGES = [
|
59 |
{
|
|
|
75 |
print(result.choices[0].message.content)
|
76 |
return result.choices[0].message.content
|
77 |
|
|
|
78 |
def text_to_audio(text, api_key, voice):
|
79 |
response = requests.post(
|
80 |
"https://api.openai.com/v1/audio/speech",
|
|
|
88 |
},
|
89 |
)
|
90 |
|
|
|
91 |
if response.status_code != 200:
|
92 |
raise Exception("Request failed with status code")
|
93 |
|
|
|
94 |
audio_bytes_io = io.BytesIO()
|
|
|
95 |
for chunk in response.iter_content(chunk_size=1024*1024):
|
96 |
audio_bytes_io.write(chunk)
|
|
|
|
|
97 |
audio_bytes_io.seek(0)
|
98 |
|
|
|
99 |
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmpfile:
|
100 |
for chunk in response.iter_content(chunk_size=1024*1024):
|
101 |
tmpfile.write(chunk)
|
|
|
103 |
|
104 |
return audio_filename, audio_bytes_io
|
105 |
|
|
|
106 |
def merge_audio_video(video_filename, audio_filename, output_filename):
|
107 |
print("Merging audio and video ...")
|
|
|
108 |
video_clip = VideoFileClip(video_filename)
|
|
|
109 |
audio_clip = AudioFileClip(audio_filename)
|
|
|
110 |
final_clip = video_clip.set_audio(audio_clip)
|
|
|
111 |
final_clip.write_videofile(output_filename, codec='libx264', audio_codec="aac")
|
|
|
112 |
video_clip.close()
|
113 |
audio_clip.close()
|
|
|
|
|
114 |
return output_filename
|
115 |
|
|
|
116 |
def main():
|
117 |
st.set_page_config(page_title="AI Voiceover", page_icon="🔮")
|
118 |
st.title("GPT4V AI Voiceover 🎥🔮")
|
|
|
119 |
|
|
|
120 |
if not check_password():
|
121 |
+
return
|
122 |
|
|
|
123 |
openai_key = os.getenv('OPENAI_API_KEY')
|
124 |
if not openai_key:
|
125 |
st.error("OpenAI API key is not set in .env.local")
|
126 |
+
return
|
127 |
+
|
128 |
uploaded_file = st.file_uploader("Select a video file", type=["mp4", "avi"])
|
129 |
|
130 |
+
option = st.selectbox('Choose the voice you want', ('Female Voice', 'Male Voice'))
|
131 |
+
classify = 'alloy' if option == 'Male Voice' else 'nova'
|
|
|
|
|
|
|
|
|
|
|
|
|
132 |
|
133 |
if uploaded_file is not None:
|
134 |
st.video(uploaded_file)
|
135 |
+
prompt = st.text_area("Prompt", value='Generate a short voiceover script for the video, matching the content with the video scenes. The style should be...')
|
|
|
136 |
|
137 |
if st.button("START PROCESSING", type="primary"):
|
138 |
with st.spinner("Video is being processed..."):
|
139 |
+
base64Frame, video_filename, video_duration = video_to_frames(uploaded_file, frame_sampling_rate=1)
|
140 |
|
|
|
141 |
if video_duration > 30:
|
142 |
st.error("The video exceeds the maximum allowed duration of 30 seconds.")
|
143 |
+
return
|
144 |
|
145 |
+
final_prompt = f"{prompt} (This video is ONLY {video_duration} seconds long. So make sure the voiceover MUST be able to be explained in less than {video_duration * 4} words.)"
|
|
|
146 |
text = frames_to_story(base64Frame, final_prompt, openai_key)
|
147 |
st.write(text)
|
148 |
+
|
149 |
audio_filename, audio_bytes_io = text_to_audio(text, openai_key, classify)
|
|
|
150 |
output_video_filename = os.path.splitext(video_filename)[0] + "_output.mp4"
|
151 |
|
152 |
final_video_filename = merge_audio_video(video_filename, audio_filename, output_video_filename)
|
|
|
|
|
153 |
st.video(final_video_filename)
|
154 |
|
|
|
155 |
os.unlink(video_filename)
|
156 |
os.unlink(audio_filename)
|
157 |
os.unlink(final_video_filename)
|
158 |
|
159 |
if __name__ == "__main__":
|
160 |
+
main()
|