Spaces:

mrciolino
/

ppt_owl_vit

Running

@@ -1,13 +1,16 @@
----
-title: Ppt Owl Vit
-emoji: 🏃
-colorFrom: gray
-colorTo: pink
-sdk: streamlit
-sdk_version: 1.15.2
-app_file: app.py
-pinned: false
-license: cc-by-4.0
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+# OwL-Vit Streamlit App
+## Summary
+An application that ultilizes OWL-Vit and Streamlit to understand text to detect objects in images.
+Deployed and ready to be tested on :
+[![Open in Streamlit](https://static.streamlit.io/badges/streamlit_badge_black_white.svg)](https://www.matthewciolino.com/)
+## Example
+![Baseball Field Picture](refs/baseball_labeled.png)
+## Made Possible with Adapatation from:
+#### Implementation -> https://huggingface.co/docs/transformers/model_doc/owlvit 🙏
+#### Paper (Yolo) -> https://arxiv.org/abs/2205.06230

app.py ADDED Viewed

	@@ -0,0 +1,132 @@

+import os
+import torch
+from transformers import OwlViTProcessor, OwlViTForObjectDetection
+import warnings
+import numpy as np
+from PIL import Image
+from io import BytesIO
+import streamlit as st
+import matplotlib.pyplot as plt
+import io
+import matplotlib.colors as mcolors
+# setttings
+os.environ['CUDA_VISIBLE_DEVICES'] = '1'
+warnings.filterwarnings('ignore')
+st.set_page_config()
+class owl_vit:
+    def __init__(self, image_path, text, threshold):
+        self.image_path = image_path
+        self.text = text
+        self.threshold = threshold
+    def process(self, processor, model):
+        image = Image.open(self.image_path)
+        if len(image.split()) == 1:
+            image = image.convert("RGB")
+        inputs = processor(text=[self.text], images=[image], return_tensors="pt")
+        outputs = model(**inputs)
+        target_sizes = torch.tensor([[image.height, image.width] for image in [image]])
+        self.results = processor.post_process(outputs=outputs, target_sizes=target_sizes)
+        self.image = image
+        return self.result_image()
+    def result_image(self):
+        boxes, scores, labels = self.results[0]["boxes"], self.results[0]["scores"], self.results[0]["labels"]
+        plt.imshow(self.image)
+        ax = plt.gca()
+        for box, score, label in zip(boxes, scores, labels):
+            if score >= self.threshold:
+                box = box.detach().numpy()
+                color = list(mcolors.CSS4_COLORS.keys())[label]
+                ax.add_patch(plt.Rectangle(box[:2], box[2] - box[0], box[3] - box[1], fill=False, color=color, linewidth=3,))
+                ax.text(box[0], box[1], f"{self.text[label]}: {round(score.item(), 2)}", fontsize=15, color=color)
+        plt.tight_layout()
+        img_buf = io.BytesIO()
+        plt.savefig(img_buf, format='png')
+        image = Image.open(img_buf)
+        return image
+def load_model():
+    with st.spinner('Getting Neruons in Order ...'):
+        processor = OwlViTProcessor.from_pretrained("google/owlvit-base-patch16")
+        model = OwlViTForObjectDetection.from_pretrained("google/owlvit-base-patch16")
+        return processor, model
+def show_detects(image):
+    st.title("Results")
+    st.image(image, use_column_width=True, caption="Object Detection Results", clamp=True)
+def process(upload, text, threshold):
+    # save upload to file
+    filetype = upload.name.split('.')[-1]
+    name = len(os.listdir("images")) + 1
+    file_path = os.path.join('images', f'{name}.{filetype}')
+    with open(file_path, "wb") as f:
+        f.write(upload.getbuffer())
+    # predict detections and show results
+    detector = owl_vit(file_path, text, threshold)
+    results = detector.process(processor, model)
+    show_detects(results)
+    # clean up - if over 1000 images in folder, delete oldest 1
+    if len(os.listdir("images")) > 1000:
+        oldest = min(os.listdir("images"), key=os.path.getctime)
+        os.remove(os.path.join("images", oldest))
+def main(processor, model):
+    # splash image
+    st.image(os.path.join('refs', 'baseball_labeled.png'), use_column_width=True)
+    # title project descriptions
+    st.title("OWL-ViT")
+    st.markdown("**OWL-ViT** is a zero-shot text-conditioned object detection model. OWL-ViT uses CLIP as its multi-modal \
+                backbone, with a ViT-like Transformer to get visual features and a causal language model to get the text features. \
+                To use CLIP for detection, OWL-ViT removes the final token pooling layer of the vision model and attaches a \
+                lightweight classification and box head to each transformer output token. Open-vocabulary classification \
+                is enabled by replacing the fixed classification layer weights with the class-name embeddings obtained \
+                from the text model. The authors first train CLIP from scratch and fine-tune it end-to-end with the classification \
+                and box heads on standard detection datasets using a bipartite matching loss. One or multiple text queries per image \
+                can be used to perform zero-shot text-conditioned object detection.", unsafe_allow_html=True)
+    # example
+    if st.button("Run the Example Image/Text"):
+        with st.spinner('Detecting Objects and Comparing Vocab...'):
+            info = owl_vit(os.path.join('refs', 'baseball.jpg'), ["batter", "umpire", "catcher"], 0.50)
+            results = info.process(processor, model)
+            show_detects(results)
+            if st.button("Clear Example"):
+                st.markdown("")
+    # upload
+    col1, col2 = st.columns(2)
+    threshold = st.slider('Confidence Threshold', min_value=0.0, max_value=1.0, value=0.1)
+    with col1:
+        upload = st.file_uploader('Image:', type=['jpg', 'jpeg', 'png'])
+    with col2:
+        text = st.text_area('Objects to Detect: (comma, seperated)', "batter, umpire, catcher")
+        text = [x.strip() for x in text.split(',')]
+    # process
+    if upload is not None and text is not None:
+        filetype = upload.name.split('.')[-1]
+        if filetype in ['jpg', 'jpeg', 'png']:
+            with st.spinner('Detecting and Counting Single Image...'):
+                process(upload, text, threshold)
+        else:
+            st.warning('Unsupported file type.')
+if __name__ == '__main__':
+    processor, model = load_model()
+    main(processor, model)

images/1.png ADDED Viewed

images/2.png ADDED Viewed

images/3.jpg ADDED Viewed

images/4.jpg ADDED Viewed

images/5.jpg ADDED Viewed

images/6.jpg ADDED Viewed

memory_test.py ADDED Viewed

	@@ -0,0 +1,58 @@

+# USAGE: python run_psrecord.py <PID> --plot plot.png --log activity.txt
+from psrecord.main import monitor
+import argparse
+def main():
+    # copied from C:\Users\user\anaconda3\envs\tfod\Lib\site-packages\psrecord\main.py
+    parser = argparse.ArgumentParser(
+        description='Record CPU and memory usage for a process')
+    parser.add_argument('process_id_or_command', type=str,
+                        help='the process id or command')
+    parser.add_argument('--log', type=str,
+                        help='output the statistics to a file')
+    parser.add_argument('--plot', type=str,
+                        help='output the statistics to a plot')
+    parser.add_argument('--duration', type=float,
+                        help='how long to record for (in seconds). If not '
+                             'specified, the recording is continuous until '
+                             'the job exits.')
+    parser.add_argument('--interval', type=float,
+                        help='how long to wait between each sample (in '
+                             'seconds). By default the process is sampled '
+                             'as often as possible.')
+    parser.add_argument('--include-children',
+                        help='include sub-processes in statistics (results '
+                             'in a slower maximum sampling rate).',
+                        action='store_true')
+    args = parser.parse_args()
+    # Attach to process
+    try:
+        pid = int(args.process_id_or_command)
+        print("Attaching to process {0}".format(pid))
+        sprocess = None
+    except Exception:
+        import subprocess
+        command = args.process_id_or_command
+        print("Starting up command '{0}' and attaching to process"
+              .format(command))
+        sprocess = subprocess.Popen(command, shell=True)
+        pid = sprocess.pid
+    monitor(pid, logfile=args.log, plot=args.plot, duration=args.duration,
+            interval=args.interval, include_children=args.include_children)
+    if sprocess is not None:
+        sprocess.kill()
+if __name__ == '__main__':
+    main()

plot.png ADDED Viewed

refs/baseball.jpg ADDED Viewed

refs/baseball_labeled.png ADDED Viewed

requirements.txt ADDED Viewed

	@@ -0,0 +1,136 @@

+altair==4.1.0
+anyio==3.5.0
+argon2-cffi==21.3.0
+argon2-cffi-bindings==21.2.0
+asttokens==2.2.1
+attrs==22.1.0
+autopep8==1.6.0
+backcall==0.2.0
+backports.functools-lru-cache==1.6.4
+beautifulsoup4==4.11.1
+bleach==4.1.0
+blinker==1.4
+Bottleneck==1.3.5
+brotlipy==0.7.0
+cachetools==4.2.2
+certifi==2022.9.24
+cffi==1.15.1
+charset-normalizer==2.0.4
+click==8.0.4
+colorama==0.4.5
+comm==0.1.2
+commonmark==0.9.1
+contourpy==1.0.5
+cryptography==38.0.1
+cycler==0.11.0
+debugpy==1.5.1
+decorator==5.1.1
+defusedxml==0.7.1
+entrypoints==0.4
+executing==1.2.0
+fastjsonschema==2.16.2
+filelock==3.6.0
+flit_core==3.6.0
+fonttools==4.25.0
+future==0.18.2
+gitdb==4.0.7
+GitPython==3.1.18
+huggingface-hub==0.10.1
+idna==3.4
+importlib-metadata==4.11.3
+ipykernel==6.19.3
+ipython==8.7.0
+ipython-genutils==0.2.0
+ipywidgets==7.6.5
+jedi==0.18.2
+Jinja2==3.1.2
+jsonschema==4.16.0
+jupyter_client==7.4.8
+jupyter_core==5.1.0
+jupyter-server==1.18.1
+jupyterlab-pygments==0.1.2
+jupyterlab-widgets==1.0.0
+kiwisolver==1.4.2
+lxml==4.9.1
+MarkupSafe==2.1.1
+matplotlib==3.6.2
+matplotlib-inline==0.1.6
+mistune==0.8.4
+mkl-fft==1.3.1
+mkl-random==1.2.2
+mkl-service==2.4.0
+munkres==1.1.4
+nbclassic==0.4.8
+nbclient==0.5.13
+nbconvert==6.5.4
+nbformat==5.7.0
+nest-asyncio==1.5.6
+notebook==6.5.2
+notebook_shim==0.2.2
+numexpr==2.8.4
+numpy==1.22.3
+packaging==21.3
+pandas==1.5.2
+pandocfilters==1.5.0
+parso==0.8.3
+pickleshare==0.7.5
+Pillow==9.3.0
+pip==22.3.1
+platformdirs==2.6.0
+ply==3.11
+prometheus-client==0.14.1
+prompt-toolkit==3.0.36
+protobuf==3.20.1
+psutil==5.9.0
+pure-eval==0.2.2
+pyarrow==8.0.0
+pycodestyle==2.8.0
+pycparser==2.21
+pydeck==0.7.1
+Pygments==2.13.0
+Pympler==0.9
+pyOpenSSL==22.0.0
+pyparsing==3.0.9
+PyQt5==5.15.7
+PyQt5-sip==12.11.0
+pyrsistent==0.18.0
+PySocks==1.7.1
+python-dateutil==2.8.2
+pytz==2022.1
+PyYAML==6.0
+pyzmq==23.2.0
+regex==2022.7.9
+requests==2.28.1
+rich==12.5.1
+semver==2.13.0
+Send2Trash==1.8.0
+setuptools==65.5.0
+sip==6.6.2
+six==1.16.0
+smmap==4.0.0
+sniffio==1.2.0
+soupsieve==2.3.2.post1
+stack-data==0.6.2
+streamlit==1.11.0
+terminado==0.13.1
+tinycss2==1.2.1
+tokenizers==0.11.4
+toml==0.10.2
+toolz==0.12.0
+torch==1.13.1
+torchaudio==0.13.1
+torchvision==0.14.1
+tornado==6.2
+tqdm==4.64.1
+traitlets==5.8.0
+transformers==4.24.0
+typing_extensions==4.4.0
+tzlocal==2.1
+urllib3==1.26.13
+validators==0.18.2
+watchdog==2.1.6
+wcwidth==0.2.5
+webencodings==0.5.1
+websocket-client==0.58.0
+wheel==0.37.1
+zipp==3.8.0

setup.sh ADDED Viewed

	@@ -0,0 +1,10 @@

+/home/appuser/venv/bin/python -m pip install --upgrade pip
+mkdir -p ~/.streamlit/
+echo "\
+[server]\n\
+headless = true\n\
+port = $PORT\n\
+enableCORS = false\n\
+\n\
+" > ~/.streamlit/config.toml