Spaces:

dokster
/

vqa-analysis

Running

App Files Files Community

dokster commited on Jul 2, 2023

Commit

9f8e9e6

•

1 Parent(s): cbafdbe

Update app.py

Browse files

Files changed (1) hide show

app.py +36 -29

app.py CHANGED Viewed

@@ -72,46 +72,53 @@ def main():
     st.write("---")
-    question = st.text_input("❔ Enter question prompt: ", "")
-    try:
-        tfile = tempfile.NamedTemporaryFile(delete=False)
-        tfile.write(uploaded_file.read())
-        device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
-        val_embeddings = []
-        val_captions = []
-        result = ''
-        text = f'Question: {question}? Answer:'
-        #read video -> get_ans
-        video = read_video(tfile.name, transform=None, frames_num=4)
-        if len(video) > 0:
-            i = image_grid(video, 2, 2)
-            image = preprocess(i).unsqueeze(0).to(device)
-            with torch.no_grad():
-                prefix = clip_model.encode_image(image).to(device, dtype=torch.float32)
-            val_embeddings.append(prefix)
-            val_captions.append(text)
-        answers = []
-        for i in tqdm(range(len(val_embeddings))):
-            emb = val_embeddings[i]
-            caption = val_captions[i]
-            ans = get_ans(model, tokenizer, emb, prefix_length, caption)
-            answers.append(ans['answer'])
-        result = answers[0].split(' A: ')[0]
-        res = st.text_input('✅ Answer to the question', result, disabled=False)
-    except:
-        pass
 if __name__ == '__main__':
     main()

     st.write("---")
+    a, b = st.columns([4, 1])
+    question = a.text_input(
+        label="❔ Enter question prompt: ",
+        placeholder="",
+        # label_visibility="collapsed",
+    )
+    button = b.button("Send", use_container_width=True)
+    if button:
+        try:
+            tfile = tempfile.NamedTemporaryFile(delete=False)
+            tfile.write(uploaded_file.read())
+            device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
+            val_embeddings = []
+            val_captions = []
+            result = ''
+            text = f'Question: {question}? Answer:'
+            # read video -> get_ans
+            video = read_video(tfile.name, transform=None, frames_num=4)
+            if len(video) > 0:
+                i = image_grid(video, 2, 2)
+                image = preprocess(i).unsqueeze(0).to(device)
+                with torch.no_grad():
+                    prefix = clip_model.encode_image(image).to(device, dtype=torch.float32)
+                val_embeddings.append(prefix)
+                val_captions.append(text)
+            answers = []
+            for i in tqdm(range(len(val_embeddings))):
+                emb = val_embeddings[i]
+                caption = val_captions[i]
+                ans = get_ans(model, tokenizer, emb, prefix_length, caption)
+                answers.append(ans['answer'])
+            result = answers[0].split(' A: ')[0]
+            res = st.text_input('✅ Answer to the question', result, disabled=False)
+        except:
+            pass
 if __name__ == '__main__':
     main()