Irsh Vijayvargia commited on
Commit
fc055d6
β€’
1 Parent(s): cb767d9

First Commit

Browse files
Files changed (6) hide show
  1. README.md +3 -3
  2. app.py +120 -0
  3. header.jpeg +0 -0
  4. requirements.txt +6 -0
  5. run.ipynb +238 -0
  6. trained-cnn-concrete-crack.model +3 -0
README.md CHANGED
@@ -1,7 +1,7 @@
1
  ---
2
- title: Concrete Crack Gradcam
3
- emoji: πŸ‘
4
- colorFrom: green
5
  colorTo: indigo
6
  sdk: gradio
7
  sdk_version: 4.36.1
 
1
  ---
2
+ title: Concrete Crack GradCAM
3
+ emoji: πŸ—
4
+ colorFrom: blue
5
  colorTo: indigo
6
  sdk: gradio
7
  sdk_version: 4.36.1
app.py ADDED
@@ -0,0 +1,120 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import torch.optim as optim
4
+ from torch.utils.data import Dataset, DataLoader, Subset
5
+ from torchvision import transforms, datasets
6
+ from PIL import Image
7
+ from tqdm.auto import tqdm
8
+ import torch.nn.functional as F
9
+ from pytorch_grad_cam import GradCAM, HiResCAM, ScoreCAM, GradCAMPlusPlus, AblationCAM, XGradCAM, FullGrad
10
+ from matplotlib import colormaps
11
+ import numpy as np
12
+ import gradio as gr
13
+
14
+
15
+ class CNN(nn.Module):
16
+ def __init__(self):
17
+ super(CNN, self).__init__()
18
+ # Convolutional layers
19
+ self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1)
20
+ self.conv2 = nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1)
21
+ self.conv3 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
22
+
23
+ # Pooling layer
24
+ self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
25
+
26
+ # Fully connected layers
27
+ self.fc1 = nn.Linear(64 * (224 // 8) * (224 // 8), 64) # Adjusted based on pooling layers
28
+ self.fc2 = nn.Linear(64, 2) # 2 classes for binary classification
29
+
30
+ def forward(self, x):
31
+ # Convolutional layers with relu activation and pooling
32
+ x = self.pool(F.relu(self.conv1(x)))
33
+ x = self.pool(F.relu(self.conv2(x)))
34
+ x = self.pool(F.relu(self.conv3(x)))
35
+
36
+ # Flatten for fully connected layers
37
+ x = torch.flatten(x, 1)
38
+
39
+ # Fully connected layers with relu activation
40
+ x = F.relu(self.fc1(x))
41
+ x = self.fc2(x)
42
+
43
+ return x
44
+
45
+ transform = transforms.Compose([
46
+ transforms.Resize((224, 224)), # Resize to 224x224
47
+ transforms.ToTensor(), # Convert to tensor
48
+ transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # Normalize
49
+ ])
50
+
51
+ model = CNN()
52
+
53
+ model.load_state_dict(torch.load("trained-cnn-concrete-crack.model", map_location=torch.device("cpu")))
54
+
55
+
56
+
57
+ magmaify = colormaps['magma']
58
+
59
+ def compute_gradcam(img_tensor, layer_idx, typeCAM):
60
+
61
+ allCAMs = {"GradCAM": GradCAM, "HiResCAM": HiResCAM, "ScoreCAM": ScoreCAM, "GradCAMPlusPlus": GradCAMPlusPlus, "AblationCAM": AblationCAM, "XGradCAM": XGradCAM, "FullGrad": FullGrad}
62
+
63
+ target_layers = [[model.conv1], [model.conv2], [model.conv3]]
64
+
65
+ cam = allCAMs[typeCAM](model=model, target_layers=target_layers[layer_idx-1])
66
+
67
+ grayscale_cam = cam(input_tensor=img_tensor, targets=None)
68
+
69
+ return magmaify(grayscale_cam.reshape(224, 224))
70
+
71
+
72
+ def predict_and_gradcam(model, img, layer_idx, typeCAM):
73
+ # Preprocess the image
74
+ img = Image.fromarray(img.astype('uint8'), 'RGB') if isinstance(img, np.ndarray) else img
75
+ img_tensor = transform(img).unsqueeze(0)
76
+
77
+ # Get predicted class index
78
+ with torch.no_grad():
79
+ output = model(img_tensor)
80
+ _, predicted = torch.max(output.data, 1)
81
+ predicted_label = str(predicted.item())
82
+
83
+ # Compute GradCAM
84
+ gradcam = compute_gradcam(img_tensor, layer_idx, typeCAM)
85
+
86
+ return predicted_label, gradcam
87
+
88
+
89
+
90
+ idx_to_lbl = {"0": "Cracked", "1":"Uncracked"}
91
+
92
+ # Define a function to be used in Gradio app
93
+ def classify_image(image, layer_idx, typeCAM):
94
+
95
+ # Predict label and get GradCAM
96
+ label, gradcam_img = predict_and_gradcam(model, image, layer_idx, typeCAM)
97
+
98
+ return idx_to_lbl[label], gradcam_img
99
+
100
+
101
+ description = """\
102
+ <center>Upload an image of concrete and get the predicted label along with the GradCAM heatmap.</center>
103
+ <img src="https://www.huggingface.co/spaces/concrete-crack-gradcam/main/resolve/header.jpeg"></img>
104
+ \
105
+ """
106
+
107
+ typeCAMs = ["GradCAM", "HiResCAM", "ScoreCAM", "GradCAMPlusPlus", "AblationCAM", "XGradCAM", "FullGrad"]
108
+
109
+ # Define Gradio interface
110
+ iface = gr.Interface(
111
+ fn=classify_image,
112
+ inputs=[gr.Image(), gr.Slider(minimum=1, maximum=3, step=1, value=1), gr.Dropdown(choices=typeCAMs, value="GradCAM")],
113
+ outputs=[gr.Textbox(label="Predicted Label"), gr.Image(label="GradCAM Heatmap")],
114
+ title="Concrete Crack Detection with GradCAM",
115
+ description= description,
116
+ allow_flagging=False
117
+ )
118
+
119
+ # Launch the interface
120
+ iface.launch()
header.jpeg ADDED
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ gradio
2
+ torch
3
+ torchvision
4
+ grad-cam
5
+ numpy
6
+ matplotlib
run.ipynb ADDED
@@ -0,0 +1,238 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 11,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "import torch\n",
10
+ "import torch.nn as nn\n",
11
+ "import torch.optim as optim\n",
12
+ "from torch.utils.data import Dataset, DataLoader, Subset\n",
13
+ "from torchvision import transforms, datasets\n",
14
+ "import os\n",
15
+ "from PIL import Image\n",
16
+ "from tqdm.auto import tqdm\n",
17
+ "import torch.nn.functional as F"
18
+ ]
19
+ },
20
+ {
21
+ "cell_type": "code",
22
+ "execution_count": 12,
23
+ "metadata": {},
24
+ "outputs": [],
25
+ "source": [
26
+ "\n",
27
+ "class CNN(nn.Module):\n",
28
+ " def __init__(self):\n",
29
+ " super(CNN, self).__init__()\n",
30
+ " # Convolutional layers\n",
31
+ " self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1)\n",
32
+ " self.conv2 = nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1)\n",
33
+ " self.conv3 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)\n",
34
+ " \n",
35
+ " # Pooling layer\n",
36
+ " self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)\n",
37
+ " \n",
38
+ " # Fully connected layers\n",
39
+ " self.fc1 = nn.Linear(64 * (224 // 8) * (224 // 8), 64) # Adjusted based on pooling layers\n",
40
+ " self.fc2 = nn.Linear(64, 2) # 2 classes for binary classification\n",
41
+ " \n",
42
+ " def forward(self, x):\n",
43
+ " # Convolutional layers with relu activation and pooling\n",
44
+ " x = self.pool(F.relu(self.conv1(x)))\n",
45
+ " x = self.pool(F.relu(self.conv2(x)))\n",
46
+ " x = self.pool(F.relu(self.conv3(x)))\n",
47
+ " \n",
48
+ " # Flatten for fully connected layers\n",
49
+ " x = torch.flatten(x, 1)\n",
50
+ " \n",
51
+ " # Fully connected layers with relu activation\n",
52
+ " x = F.relu(self.fc1(x))\n",
53
+ " x = self.fc2(x)\n",
54
+ " \n",
55
+ " return x"
56
+ ]
57
+ },
58
+ {
59
+ "cell_type": "code",
60
+ "execution_count": 13,
61
+ "metadata": {},
62
+ "outputs": [],
63
+ "source": [
64
+ "transform = transforms.Compose([\n",
65
+ " transforms.Resize((224, 224)), # Resize to 224x224\n",
66
+ " transforms.ToTensor(), # Convert to tensor\n",
67
+ " transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # Normalize\n",
68
+ "])"
69
+ ]
70
+ },
71
+ {
72
+ "cell_type": "code",
73
+ "execution_count": 14,
74
+ "metadata": {},
75
+ "outputs": [
76
+ {
77
+ "data": {
78
+ "text/plain": [
79
+ "<All keys matched successfully>"
80
+ ]
81
+ },
82
+ "execution_count": 14,
83
+ "metadata": {},
84
+ "output_type": "execute_result"
85
+ }
86
+ ],
87
+ "source": [
88
+ "model = CNN()\n",
89
+ "\n",
90
+ "model.load_state_dict(torch.load(\"trained-cnn-concrete-crack.model\", map_location=torch.device(\"cpu\")))"
91
+ ]
92
+ },
93
+ {
94
+ "cell_type": "code",
95
+ "execution_count": 41,
96
+ "metadata": {},
97
+ "outputs": [],
98
+ "source": [
99
+ "\n",
100
+ "\n",
101
+ "magmaify = colormaps['magma']\n",
102
+ "\n",
103
+ "\n",
104
+ "\n",
105
+ "def compute_gradcam(img_tensor, layer_idx):\n",
106
+ " target_layers = [[model.conv1], [model.conv2], [model.conv3]]\n",
107
+ "\n",
108
+ " cam = GradCAM(model=model, target_layers=target_layers[layer_idx-1])\n",
109
+ "\n",
110
+ " grayscale_cam = cam(input_tensor=img_tensor, targets=None)\n",
111
+ "\n",
112
+ " return magmaify(grayscale_cam.reshape(224, 224))"
113
+ ]
114
+ },
115
+ {
116
+ "cell_type": "code",
117
+ "execution_count": 42,
118
+ "metadata": {},
119
+ "outputs": [],
120
+ "source": [
121
+ "\n",
122
+ "def predict_and_gradcam(model, img, layer_idx):\n",
123
+ " # Preprocess the image\n",
124
+ " img = Image.fromarray(img.astype('uint8'), 'RGB') if isinstance(img, np.ndarray) else img\n",
125
+ " img_tensor = transform(img).unsqueeze(0)\n",
126
+ "\n",
127
+ " # Get predicted class index\n",
128
+ " with torch.no_grad():\n",
129
+ " output = model(img_tensor)\n",
130
+ " _, predicted = torch.max(output.data, 1)\n",
131
+ " predicted_label = str(predicted.item())\n",
132
+ "\n",
133
+ " # Compute GradCAM\n",
134
+ " gradcam = compute_gradcam(img_tensor, layer_idx)\n",
135
+ "\n",
136
+ " return predicted_label, gradcam"
137
+ ]
138
+ },
139
+ {
140
+ "cell_type": "code",
141
+ "execution_count": 47,
142
+ "metadata": {},
143
+ "outputs": [
144
+ {
145
+ "name": "stderr",
146
+ "output_type": "stream",
147
+ "text": [
148
+ "/Users/irsh/miniconda3/envs/speaker_verification/lib/python3.9/site-packages/gradio/interface.py:382: UserWarning: The `allow_flagging` parameter in `Interface` nowtakes a string value ('auto', 'manual', or 'never'), not a boolean. Setting parameter to: 'never'.\n",
149
+ " warnings.warn(\n"
150
+ ]
151
+ },
152
+ {
153
+ "name": "stdout",
154
+ "output_type": "stream",
155
+ "text": [
156
+ "Running on local URL: http://127.0.0.1:7871\n",
157
+ "\n",
158
+ "To create a public link, set `share=True` in `launch()`.\n"
159
+ ]
160
+ },
161
+ {
162
+ "data": {
163
+ "text/html": [
164
+ "<div><iframe src=\"http://127.0.0.1:7871/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
165
+ ],
166
+ "text/plain": [
167
+ "<IPython.core.display.HTML object>"
168
+ ]
169
+ },
170
+ "metadata": {},
171
+ "output_type": "display_data"
172
+ },
173
+ {
174
+ "data": {
175
+ "text/plain": []
176
+ },
177
+ "execution_count": 47,
178
+ "metadata": {},
179
+ "output_type": "execute_result"
180
+ }
181
+ ],
182
+ "source": [
183
+ "\n",
184
+ "\n",
185
+ "idx_to_lbl = {\"0\": \"Cracked\", \"1\":\"Uncracked\"}\n",
186
+ "\n",
187
+ "# Define a function to be used in Gradio app\n",
188
+ "def classify_image(image, layer_idx):\n",
189
+ " \n",
190
+ " # Predict label and get GradCAM\n",
191
+ " label, gradcam_img = predict_and_gradcam(model, image, layer_idx)\n",
192
+ "\n",
193
+ " return idx_to_lbl[label], gradcam_img\n",
194
+ "\n",
195
+ "# Define Gradio interface\n",
196
+ "iface = gr.Interface(\n",
197
+ " fn=classify_image,\n",
198
+ " inputs=[gr.Image(), gr.Slider(minimum=1, maximum=3, step=1, value=1)],\n",
199
+ " outputs=[gr.Textbox(label=\"Predicted Label\"), gr.Image(label=\"GradCAM Heatmap\")],\n",
200
+ " title=\"Concrete Crack Detection with GradCAM\",\n",
201
+ " description=\"Upload an image of concrete and get the predicted label along with the GradCAM heatmap.\",\n",
202
+ " allow_flagging=False\n",
203
+ ")\n",
204
+ "\n",
205
+ "# Launch the interface\n",
206
+ "iface.launch()"
207
+ ]
208
+ },
209
+ {
210
+ "cell_type": "code",
211
+ "execution_count": null,
212
+ "metadata": {},
213
+ "outputs": [],
214
+ "source": []
215
+ }
216
+ ],
217
+ "metadata": {
218
+ "kernelspec": {
219
+ "display_name": "Python-3.12",
220
+ "language": "python",
221
+ "name": "python-3.12"
222
+ },
223
+ "language_info": {
224
+ "codemirror_mode": {
225
+ "name": "ipython",
226
+ "version": 3
227
+ },
228
+ "file_extension": ".py",
229
+ "mimetype": "text/x-python",
230
+ "name": "python",
231
+ "nbconvert_exporter": "python",
232
+ "pygments_lexer": "ipython3",
233
+ "version": "3.9.19"
234
+ }
235
+ },
236
+ "nbformat": 4,
237
+ "nbformat_minor": 2
238
+ }
trained-cnn-concrete-crack.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc9573b1ca4fd5551573e7fb9aed9c22f6d556e8019dead340e1e916f4302686
3
+ size 12944322