vid2voiceover

Running

App Files Files Community

tsi-org commited on Mar 31

Commit

357d274

•

1 Parent(s): 2f9a107

Update app.py

Browse files

Files changed (1) hide show

app.py +24 -55

app.py CHANGED Viewed

@@ -1,8 +1,6 @@
 from dotenv import load_dotenv
 from IPython.display import display, Image, Audio
 from moviepy.editor import VideoFileClip, AudioFileClip
-from moviepy.audio.io.AudioFileClip import AudioFileClip
 import cv2
 import base64
 import io
@@ -15,7 +13,6 @@ import tempfile
 # Load environment variables from .env.local
 load_dotenv('.env.local')
-# Add a function to check the password
 def check_password():
     correct_password = os.getenv('PASSWORD')
     if correct_password is None:
@@ -30,29 +27,33 @@ def check_password():
             st.error("Incorrect password")
         return False
-## 1. Turn video into frames
-def video_to_frames(video_file):
-    # Save the uploaded video file to a temporary file
     with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as tmpfile:
         tmpfile.write(video_file.read())
         video_filename = tmpfile.name
-    video_duration = VideoFileClip(video_filename).duration
     video = cv2.VideoCapture(video_filename)
     base64Frame = []
     while video.isOpened():
         success, frame = video.read()
         if not success:
             break
-        _, buffer = cv2.imencode('.jpg', frame)
-        base64Frame.append(base64.b64encode(buffer).decode("utf-8"))
     video.release()
-    print(len(base64Frame), "frames read.")
     return base64Frame, video_filename, video_duration
-## 2. Generate stories based on frames with gpt4v
 def frames_to_story(base64Frames, prompt, api_key):
     PROMPT_MESSAGES = [
         {
@@ -74,7 +75,6 @@ def frames_to_story(base64Frames, prompt, api_key):
     print(result.choices[0].message.content)
     return result.choices[0].message.content
-## 3. Generate voiceover from stories
 def text_to_audio(text, api_key, voice):
     response = requests.post(
         "https://api.openai.com/v1/audio/speech",
@@ -88,20 +88,14 @@ def text_to_audio(text, api_key, voice):
         },
     )
-    # Check if the request was successful
     if response.status_code != 200:
         raise Exception("Request failed with status code")
-    # Create an in-memory bytes buffer
     audio_bytes_io = io.BytesIO()
-    # Write audio data to the in-memory bytes buffer
     for chunk in response.iter_content(chunk_size=1024*1024):
         audio_bytes_io.write(chunk)
-    # Important: Seek to the start of the BytesIO buffer before returning
     audio_bytes_io.seek(0)
-    # Save audio to a temporary file
     with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmpfile:
         for chunk in response.iter_content(chunk_size=1024*1024):
             tmpfile.write(chunk)
@@ -109,83 +103,58 @@ def text_to_audio(text, api_key, voice):
     return audio_filename, audio_bytes_io
-## 4. Merge videos & audio
 def merge_audio_video(video_filename, audio_filename, output_filename):
     print("Merging audio and video ...")
-    # Load the video file
     video_clip = VideoFileClip(video_filename)
-    # Load the audio file
     audio_clip = AudioFileClip(audio_filename)
-    # Set the audio of the video clip as the audio file
     final_clip = video_clip.set_audio(audio_clip)
-    # Write the result to a file (without audio)
     final_clip.write_videofile(output_filename, codec='libx264', audio_codec="aac")
-    # Close the clips
     video_clip.close()
     audio_clip.close()
-    # Return the path to the new video file
     return output_filename
-## 5. Streamlit UI
 def main():
     st.set_page_config(page_title="AI Voiceover", page_icon="🔮")
     st.title("GPT4V AI Voiceover 🎥🔮")
-    st.text("Explore how GPT4V changes the way we voiceover videos.")
-    # Check password before proceeding
     if not check_password():
-        return  # Exit the function if the password check fails
-    # Retrieve the OpenAI API key from environment
     openai_key = os.getenv('OPENAI_API_KEY')
     if not openai_key:
         st.error("OpenAI API key is not set in .env.local")
-        return  # or handle the error as you see fit
     uploaded_file = st.file_uploader("Select a video file", type=["mp4", "avi"])
-    option = st.selectbox(
-        'Choose the voice you want',
-        ('Female Voice', 'Male Voice'))
-    classify = ''
-    if option == 'Male Voice':
-        classify = 'alloy'
-    elif option == 'Female Voice':
-        classify = 'nova'
     if uploaded_file is not None:
         st.video(uploaded_file)
-        p = 'Generate a short voiceover script for the video, matching the content with the video scenes. The style should be...'
-        prompt = st.text_area("Prompt", value=p)
         if st.button("START PROCESSING", type="primary"):
             with st.spinner("Video is being processed..."):
-                base64Frame, video_filename, video_duration = video_to_frames(uploaded_file)
-                # Check if the video duration exceeds 30 seconds
                 if video_duration > 30:
                     st.error("The video exceeds the maximum allowed duration of 30 seconds.")
-                    return  # Stop processing further
-                est_word_count = video_duration * 4
-                final_prompt = prompt + f"(This video is ONLY {video_duration} seconds long. So make sure the voiceover MUST be able to be explained in less than {est_word_count} words. Ignore and don't generate anything else than the script that you'll use to voice over the video.)"
                 text = frames_to_story(base64Frame, final_prompt, openai_key)
                 st.write(text)
-                # Generate audio from text
                 audio_filename, audio_bytes_io = text_to_audio(text, openai_key, classify)
-                # Merge audio and video
                 output_video_filename = os.path.splitext(video_filename)[0] + "_output.mp4"
                 final_video_filename = merge_audio_video(video_filename, audio_filename, output_video_filename)
-                # Display the result
                 st.video(final_video_filename)
-                # Clean up the temporary files
                 os.unlink(video_filename)
                 os.unlink(audio_filename)
                 os.unlink(final_video_filename)
 if __name__ == "__main__":
-    main()

 from dotenv import load_dotenv
 from IPython.display import display, Image, Audio
 from moviepy.editor import VideoFileClip, AudioFileClip
 import cv2
 import base64
 import io
 # Load environment variables from .env.local
 load_dotenv('.env.local')
 def check_password():
     correct_password = os.getenv('PASSWORD')
     if correct_password is None:
             st.error("Incorrect password")
         return False
+def video_to_frames(video_file, frame_sampling_rate=1):
     with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as tmpfile:
         tmpfile.write(video_file.read())
         video_filename = tmpfile.name
+    video_clip = VideoFileClip(video_filename)
+    video_duration = video_clip.duration
+    fps = video_clip.fps
+    frames_to_skip = int(fps * frame_sampling_rate)
     video = cv2.VideoCapture(video_filename)
     base64Frame = []
+    current_frame = 0
     while video.isOpened():
         success, frame = video.read()
         if not success:
             break
+        if current_frame % frames_to_skip == 0:
+            _, buffer = cv2.imencode('.jpg', frame)
+            base64Frame.append(base64.b64encode(buffer).decode("utf-8"))
+        current_frame += 1
     video.release()
+    print(f"{len(base64Frame)} frames read at a sampling rate of {frame_sampling_rate} second(s) per frame.")
     return base64Frame, video_filename, video_duration
 def frames_to_story(base64Frames, prompt, api_key):
     PROMPT_MESSAGES = [
         {
     print(result.choices[0].message.content)
     return result.choices[0].message.content
 def text_to_audio(text, api_key, voice):
     response = requests.post(
         "https://api.openai.com/v1/audio/speech",
         },
     )
     if response.status_code != 200:
         raise Exception("Request failed with status code")
     audio_bytes_io = io.BytesIO()
     for chunk in response.iter_content(chunk_size=1024*1024):
         audio_bytes_io.write(chunk)
     audio_bytes_io.seek(0)
     with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmpfile:
         for chunk in response.iter_content(chunk_size=1024*1024):
             tmpfile.write(chunk)
     return audio_filename, audio_bytes_io
 def merge_audio_video(video_filename, audio_filename, output_filename):
     print("Merging audio and video ...")
     video_clip = VideoFileClip(video_filename)
     audio_clip = AudioFileClip(audio_filename)
     final_clip = video_clip.set_audio(audio_clip)
     final_clip.write_videofile(output_filename, codec='libx264', audio_codec="aac")
     video_clip.close()
     audio_clip.close()
     return output_filename
 def main():
     st.set_page_config(page_title="AI Voiceover", page_icon="🔮")
     st.title("GPT4V AI Voiceover 🎥🔮")
     if not check_password():
+        return
     openai_key = os.getenv('OPENAI_API_KEY')
     if not openai_key:
         st.error("OpenAI API key is not set in .env.local")
+        return
     uploaded_file = st.file_uploader("Select a video file", type=["mp4", "avi"])
+    option = st.selectbox('Choose the voice you want', ('Female Voice', 'Male Voice'))
+    classify = 'alloy' if option == 'Male Voice' else 'nova'
     if uploaded_file is not None:
         st.video(uploaded_file)
+        prompt = st.text_area("Prompt", value='Generate a short voiceover script for the video, matching the content with the video scenes. The style should be...')
         if st.button("START PROCESSING", type="primary"):
             with st.spinner("Video is being processed..."):
+                base64Frame, video_filename, video_duration = video_to_frames(uploaded_file, frame_sampling_rate=1)
                 if video_duration > 30:
                     st.error("The video exceeds the maximum allowed duration of 30 seconds.")
+                    return
+                final_prompt = f"{prompt} (This video is ONLY {video_duration} seconds long. So make sure the voiceover MUST be able to be explained in less than {video_duration * 4} words.)"
                 text = frames_to_story(base64Frame, final_prompt, openai_key)
                 st.write(text)
                 audio_filename, audio_bytes_io = text_to_audio(text, openai_key, classify)
                 output_video_filename = os.path.splitext(video_filename)[0] + "_output.mp4"
                 final_video_filename = merge_audio_video(video_filename, audio_filename, output_video_filename)
                 st.video(final_video_filename)
                 os.unlink(video_filename)
                 os.unlink(audio_filename)
                 os.unlink(final_video_filename)
 if __name__ == "__main__":
+    main()