--- license: mit library_name: transformers pipeline_tag: image-to-text --- # Load model from transformers import AutoProcessor, BlipForConditionalGeneration processor = AutoProcessor.from_pretrained("trunks/blip-image-captioning-base") model = BlipForConditionalGeneration.from_pretrained("trunks/blip-image-captioning-base") # prepare image for model from PIL import Image from IPython.display import display img1 = Image.open("imagepath/img.jpeg") width, height = img1.size img1_resized = img1.resize((int(0.3 * width), int(0.3 * height)) display(img1_resized) # testing image inputs = processor(images=img1, return_tensors="pt") pixel_values = inputs.pixel_values generated_ids = model.generate(pixel_values=pixel_values, max_length=50) generated_caption = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] print(generated_caption)