tsi-org commited on
Commit
357d274
1 Parent(s): 2f9a107

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -55
app.py CHANGED
@@ -1,8 +1,6 @@
1
  from dotenv import load_dotenv
2
  from IPython.display import display, Image, Audio
3
  from moviepy.editor import VideoFileClip, AudioFileClip
4
- from moviepy.audio.io.AudioFileClip import AudioFileClip
5
-
6
  import cv2
7
  import base64
8
  import io
@@ -15,7 +13,6 @@ import tempfile
15
  # Load environment variables from .env.local
16
  load_dotenv('.env.local')
17
 
18
- # Add a function to check the password
19
  def check_password():
20
  correct_password = os.getenv('PASSWORD')
21
  if correct_password is None:
@@ -30,29 +27,33 @@ def check_password():
30
  st.error("Incorrect password")
31
  return False
32
 
33
- ## 1. Turn video into frames
34
- def video_to_frames(video_file):
35
- # Save the uploaded video file to a temporary file
36
  with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as tmpfile:
37
  tmpfile.write(video_file.read())
38
  video_filename = tmpfile.name
39
 
40
- video_duration = VideoFileClip(video_filename).duration
 
 
 
 
41
  video = cv2.VideoCapture(video_filename)
42
  base64Frame = []
 
43
 
44
  while video.isOpened():
45
  success, frame = video.read()
46
  if not success:
47
  break
48
- _, buffer = cv2.imencode('.jpg', frame)
49
- base64Frame.append(base64.b64encode(buffer).decode("utf-8"))
50
-
 
 
51
  video.release()
52
- print(len(base64Frame), "frames read.")
53
  return base64Frame, video_filename, video_duration
54
 
55
- ## 2. Generate stories based on frames with gpt4v
56
  def frames_to_story(base64Frames, prompt, api_key):
57
  PROMPT_MESSAGES = [
58
  {
@@ -74,7 +75,6 @@ def frames_to_story(base64Frames, prompt, api_key):
74
  print(result.choices[0].message.content)
75
  return result.choices[0].message.content
76
 
77
- ## 3. Generate voiceover from stories
78
  def text_to_audio(text, api_key, voice):
79
  response = requests.post(
80
  "https://api.openai.com/v1/audio/speech",
@@ -88,20 +88,14 @@ def text_to_audio(text, api_key, voice):
88
  },
89
  )
90
 
91
- # Check if the request was successful
92
  if response.status_code != 200:
93
  raise Exception("Request failed with status code")
94
 
95
- # Create an in-memory bytes buffer
96
  audio_bytes_io = io.BytesIO()
97
- # Write audio data to the in-memory bytes buffer
98
  for chunk in response.iter_content(chunk_size=1024*1024):
99
  audio_bytes_io.write(chunk)
100
-
101
- # Important: Seek to the start of the BytesIO buffer before returning
102
  audio_bytes_io.seek(0)
103
 
104
- # Save audio to a temporary file
105
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmpfile:
106
  for chunk in response.iter_content(chunk_size=1024*1024):
107
  tmpfile.write(chunk)
@@ -109,83 +103,58 @@ def text_to_audio(text, api_key, voice):
109
 
110
  return audio_filename, audio_bytes_io
111
 
112
- ## 4. Merge videos & audio
113
  def merge_audio_video(video_filename, audio_filename, output_filename):
114
  print("Merging audio and video ...")
115
- # Load the video file
116
  video_clip = VideoFileClip(video_filename)
117
- # Load the audio file
118
  audio_clip = AudioFileClip(audio_filename)
119
- # Set the audio of the video clip as the audio file
120
  final_clip = video_clip.set_audio(audio_clip)
121
- # Write the result to a file (without audio)
122
  final_clip.write_videofile(output_filename, codec='libx264', audio_codec="aac")
123
- # Close the clips
124
  video_clip.close()
125
  audio_clip.close()
126
-
127
- # Return the path to the new video file
128
  return output_filename
129
 
130
- ## 5. Streamlit UI
131
  def main():
132
  st.set_page_config(page_title="AI Voiceover", page_icon="🔮")
133
  st.title("GPT4V AI Voiceover 🎥🔮")
134
- st.text("Explore how GPT4V changes the way we voiceover videos.")
135
 
136
- # Check password before proceeding
137
  if not check_password():
138
- return # Exit the function if the password check fails
139
 
140
- # Retrieve the OpenAI API key from environment
141
  openai_key = os.getenv('OPENAI_API_KEY')
142
  if not openai_key:
143
  st.error("OpenAI API key is not set in .env.local")
144
- return # or handle the error as you see fit
145
-
146
  uploaded_file = st.file_uploader("Select a video file", type=["mp4", "avi"])
147
 
148
- option = st.selectbox(
149
- 'Choose the voice you want',
150
- ('Female Voice', 'Male Voice'))
151
- classify = ''
152
- if option == 'Male Voice':
153
- classify = 'alloy'
154
- elif option == 'Female Voice':
155
- classify = 'nova'
156
 
157
  if uploaded_file is not None:
158
  st.video(uploaded_file)
159
- p = 'Generate a short voiceover script for the video, matching the content with the video scenes. The style should be...'
160
- prompt = st.text_area("Prompt", value=p)
161
 
162
  if st.button("START PROCESSING", type="primary"):
163
  with st.spinner("Video is being processed..."):
164
- base64Frame, video_filename, video_duration = video_to_frames(uploaded_file)
165
 
166
- # Check if the video duration exceeds 30 seconds
167
  if video_duration > 30:
168
  st.error("The video exceeds the maximum allowed duration of 30 seconds.")
169
- return # Stop processing further
170
 
171
- est_word_count = video_duration * 4
172
- final_prompt = prompt + f"(This video is ONLY {video_duration} seconds long. So make sure the voiceover MUST be able to be explained in less than {est_word_count} words. Ignore and don't generate anything else than the script that you'll use to voice over the video.)"
173
  text = frames_to_story(base64Frame, final_prompt, openai_key)
174
  st.write(text)
175
- # Generate audio from text
176
  audio_filename, audio_bytes_io = text_to_audio(text, openai_key, classify)
177
- # Merge audio and video
178
  output_video_filename = os.path.splitext(video_filename)[0] + "_output.mp4"
179
 
180
  final_video_filename = merge_audio_video(video_filename, audio_filename, output_video_filename)
181
-
182
- # Display the result
183
  st.video(final_video_filename)
184
 
185
- # Clean up the temporary files
186
  os.unlink(video_filename)
187
  os.unlink(audio_filename)
188
  os.unlink(final_video_filename)
189
 
190
  if __name__ == "__main__":
191
- main()
 
1
  from dotenv import load_dotenv
2
  from IPython.display import display, Image, Audio
3
  from moviepy.editor import VideoFileClip, AudioFileClip
 
 
4
  import cv2
5
  import base64
6
  import io
 
13
  # Load environment variables from .env.local
14
  load_dotenv('.env.local')
15
 
 
16
  def check_password():
17
  correct_password = os.getenv('PASSWORD')
18
  if correct_password is None:
 
27
  st.error("Incorrect password")
28
  return False
29
 
30
+ def video_to_frames(video_file, frame_sampling_rate=1):
 
 
31
  with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as tmpfile:
32
  tmpfile.write(video_file.read())
33
  video_filename = tmpfile.name
34
 
35
+ video_clip = VideoFileClip(video_filename)
36
+ video_duration = video_clip.duration
37
+ fps = video_clip.fps
38
+ frames_to_skip = int(fps * frame_sampling_rate)
39
+
40
  video = cv2.VideoCapture(video_filename)
41
  base64Frame = []
42
+ current_frame = 0
43
 
44
  while video.isOpened():
45
  success, frame = video.read()
46
  if not success:
47
  break
48
+ if current_frame % frames_to_skip == 0:
49
+ _, buffer = cv2.imencode('.jpg', frame)
50
+ base64Frame.append(base64.b64encode(buffer).decode("utf-8"))
51
+ current_frame += 1
52
+
53
  video.release()
54
+ print(f"{len(base64Frame)} frames read at a sampling rate of {frame_sampling_rate} second(s) per frame.")
55
  return base64Frame, video_filename, video_duration
56
 
 
57
  def frames_to_story(base64Frames, prompt, api_key):
58
  PROMPT_MESSAGES = [
59
  {
 
75
  print(result.choices[0].message.content)
76
  return result.choices[0].message.content
77
 
 
78
  def text_to_audio(text, api_key, voice):
79
  response = requests.post(
80
  "https://api.openai.com/v1/audio/speech",
 
88
  },
89
  )
90
 
 
91
  if response.status_code != 200:
92
  raise Exception("Request failed with status code")
93
 
 
94
  audio_bytes_io = io.BytesIO()
 
95
  for chunk in response.iter_content(chunk_size=1024*1024):
96
  audio_bytes_io.write(chunk)
 
 
97
  audio_bytes_io.seek(0)
98
 
 
99
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmpfile:
100
  for chunk in response.iter_content(chunk_size=1024*1024):
101
  tmpfile.write(chunk)
 
103
 
104
  return audio_filename, audio_bytes_io
105
 
 
106
  def merge_audio_video(video_filename, audio_filename, output_filename):
107
  print("Merging audio and video ...")
 
108
  video_clip = VideoFileClip(video_filename)
 
109
  audio_clip = AudioFileClip(audio_filename)
 
110
  final_clip = video_clip.set_audio(audio_clip)
 
111
  final_clip.write_videofile(output_filename, codec='libx264', audio_codec="aac")
 
112
  video_clip.close()
113
  audio_clip.close()
 
 
114
  return output_filename
115
 
 
116
  def main():
117
  st.set_page_config(page_title="AI Voiceover", page_icon="🔮")
118
  st.title("GPT4V AI Voiceover 🎥🔮")
 
119
 
 
120
  if not check_password():
121
+ return
122
 
 
123
  openai_key = os.getenv('OPENAI_API_KEY')
124
  if not openai_key:
125
  st.error("OpenAI API key is not set in .env.local")
126
+ return
127
+
128
  uploaded_file = st.file_uploader("Select a video file", type=["mp4", "avi"])
129
 
130
+ option = st.selectbox('Choose the voice you want', ('Female Voice', 'Male Voice'))
131
+ classify = 'alloy' if option == 'Male Voice' else 'nova'
 
 
 
 
 
 
132
 
133
  if uploaded_file is not None:
134
  st.video(uploaded_file)
135
+ prompt = st.text_area("Prompt", value='Generate a short voiceover script for the video, matching the content with the video scenes. The style should be...')
 
136
 
137
  if st.button("START PROCESSING", type="primary"):
138
  with st.spinner("Video is being processed..."):
139
+ base64Frame, video_filename, video_duration = video_to_frames(uploaded_file, frame_sampling_rate=1)
140
 
 
141
  if video_duration > 30:
142
  st.error("The video exceeds the maximum allowed duration of 30 seconds.")
143
+ return
144
 
145
+ final_prompt = f"{prompt} (This video is ONLY {video_duration} seconds long. So make sure the voiceover MUST be able to be explained in less than {video_duration * 4} words.)"
 
146
  text = frames_to_story(base64Frame, final_prompt, openai_key)
147
  st.write(text)
148
+
149
  audio_filename, audio_bytes_io = text_to_audio(text, openai_key, classify)
 
150
  output_video_filename = os.path.splitext(video_filename)[0] + "_output.mp4"
151
 
152
  final_video_filename = merge_audio_video(video_filename, audio_filename, output_video_filename)
 
 
153
  st.video(final_video_filename)
154
 
 
155
  os.unlink(video_filename)
156
  os.unlink(audio_filename)
157
  os.unlink(final_video_filename)
158
 
159
  if __name__ == "__main__":
160
+ main()