Page MenuHomePhabricator
Paste P58917

logo-detection: prototype for JSON input, preprocess with Keras, and return JSON output
ActivePublic

Authored by kevinbazira on Mar 26 2024, 10:13 AM.
# !pip install keras==3.0.4
# !pip install keras-cv==0.8.1
import os
import requests
import json
import tempfile
import shutil
os.environ["KERAS_BACKEND"] = "tensorflow" # Set the Keras backend environment variable to "tensorflow"
import keras
import keras_cv
import numpy as np
BATCH_SIZE = 64
IMAGE_SIZE = (224, 224)
target = "logo" # Predicted target class: "album", "book", "logo", "screenshot"
label_mode = "binary" # Model type: "binary" for binary classification, "categorical" for multiclass
model_path = "/content/logo_detection/model/logo_max_all.keras" # Path to a trained Keras model with ".keras" extension
# Load the model
model = keras.models.load_model(model_path)
# Input data in JSON format, each containing filename, URL, and target class
input_data = [
{
"filename": "Elizabeth_Drive_-_border_of_Edensor_Park_and_Bonnyrigg_Heights_in_New_South_Wales_62.jpg",
"url": "https://phab.wmfusercontent.org/file/data/bfjukphf6khghlic2rgv/PHID-FILE-34q3qs3oe6ea5o4p22lv/Elizabeth_Drive_-_border_of_Edensor_Park_and_Bonnyrigg_Heights_in_New_South_Wales_62.jpg",
"target": "logo"
},
{
"filename": "Cambia_logo.png",
"url": "https://phab.wmfusercontent.org/file/data/mb6wynlvf3bdfw5e443f/PHID-FILE-wc27fvtkl6yv4rjdlqzn/Cambia_logo.png",
"target": "logo"
},
{
"filename": "Blooming_bush_(14248894271).jpg",
"url": "https://phab.wmfusercontent.org/file/data/46i23voto2a4aqwo6iyb/PHID-FILE-eldmzjv4p3vwsiwsuxya/Blooming_bush_%2814248894271%29.jpg",
"target": "logo"
},
{
"filename": "BackupVault_Logo_2019.png",
"url": "https://phab.wmfusercontent.org/file/data/licxzubl2357mpyw5hai/PHID-FILE-kygwsboczktnzfe3u2ne/BackupVault_Logo_2019.png",
"target": "logo"
},
{
"filename": "Abv.png",
"url": "https://phab.wmfusercontent.org/file/data/l5rkhcd3vv2kk4czp2y2/PHID-FILE-wj5balvrsa73eo35j7eg/Abv.png",
"target": "logo"
},
{
"filename": "12_rue_de_Condé_-_detail.jpg",
"url": "https://phab.wmfusercontent.org/file/data/wxtr7be45udzyjzrojr6/PHID-FILE-tnu6mrji2smn2hpm6nhv/12_rue_de_Cond%C3%A9_-_detail.jpg",
"target": "logo"
}
]
# Create a temporary directory to store images
temp_dir = tempfile.mkdtemp()
# Create subdirectories for "logo" and "out_of_domain"
logo_dir = os.path.join(temp_dir, "logo")
out_of_domain_dir = os.path.join(temp_dir, "out_of_domain")
os.makedirs(logo_dir)
os.makedirs(out_of_domain_dir)
# Download images from URLs and save them to appropriate directories
for idx, data in enumerate(input_data):
image_url = data["url"]
image_filename = os.path.join(temp_dir, data["target"], data["filename"])
with open(image_filename, "wb") as f:
response = requests.get(image_url)
f.write(response.content)
# Use keras.utils.image_dataset_from_directory to create test_set
test_set = keras.utils.image_dataset_from_directory(
temp_dir,
labels="inferred",
label_mode=label_mode,
class_names=["out_of_domain", "logo"],
batch_size=BATCH_SIZE,
image_size=IMAGE_SIZE,
shuffle=False,
)
predictions_response = []
# Iterate through the test set and make predictions
for images, labels in test_set:
predictions = model(images) # Pass images directly to the model
for i in range(len(predictions)):
file_path = test_set.file_paths[i]
file_name = os.path.basename(file_path)
rounded_predictions = np.around(predictions[i].numpy(), decimals=2).astype(float)
prediction = {
"filename": file_name,
"target": target,
"prediction": {
"logo": rounded_predictions[1],
"out_of_domain": rounded_predictions[0]
}
}
predictions_response.append(prediction)
# Output response in JSON format
print(json.dumps(predictions_response, indent=4))
# Delete the temporary directory after use
shutil.rmtree(temp_dir)

Event Timeline

Running this prototype returns the response below:

[
    {
        "filename": "12_rue_de_Cond\u00e9_-_detail.jpg",
        "target": "logo",
        "prediction": {
            "logo": 0.0,
            "out_of_domain": 1.0
        }
    },
    {
        "filename": "Abv.png",
        "target": "logo",
        "prediction": {
            "logo": 1.0,
            "out_of_domain": 0.0
        }
    },
    {
        "filename": "BackupVault_Logo_2019.png",
        "target": "logo",
        "prediction": {
            "logo": 1.0,
            "out_of_domain": 0.0
        }
    },
    {
        "filename": "Blooming_bush_(14248894271).jpg",
        "target": "logo",
        "prediction": {
            "logo": 0.009999999776482582,
            "out_of_domain": 0.9900000095367432
        }
    },
    {
        "filename": "Cambia_logo.png",
        "target": "logo",
        "prediction": {
            "logo": 1.0,
            "out_of_domain": 0.0
        }
    },
    {
        "filename": "Elizabeth_Drive_-_border_of_Edensor_Park_and_Bonnyrigg_Heights_in_New_South_Wales_62.jpg",
        "target": "logo",
        "prediction": {
            "logo": 0.0,
            "out_of_domain": 1.0
        }
    }
]

Running this change of the prototype returns the response below. We noticed the out_of_domain values for Abv.png and Cambia_logo.png are >1, specifically 8.932123455451801e-05 and 4.754207475343719e-05 respectively. @mfossati is this expected behavior?

[
    {
        "filename": "12_rue_de_Condé_-_detail.jpg",
        "target": "logo",
        "prediction": 0.0008389077847823501,
        "out_of_domain": 0.9991611242294312
    },
    {
        "filename": "Abv.png",
        "target": "logo",
        "prediction": 0.9999107122421265,
        "out_of_domain": 8.932123455451801e-05
    },
    {
        "filename": "BackupVault_Logo_2019.png",
        "target": "logo",
        "prediction": 0.9995936751365662,
        "out_of_domain": 0.00040629858267493546
    },
    {
        "filename": "Blooming_bush_(14248894271).jpg",
        "target": "logo",
        "prediction": 0.0051296367309987545,
        "out_of_domain": 0.9948704242706299
    },
    {
        "filename": "Cambia_logo.png",
        "target": "logo",
        "prediction": 0.9999524354934692,
        "out_of_domain": 4.754207475343719e-05
    },
    {
        "filename": "Elizabeth_Drive_-_border_of_Edensor_Park_and_Bonnyrigg_Heights_in_New_South_Wales_62.jpg",
        "target": "logo",
        "prediction": 0.0006882766610942781,
        "out_of_domain": 0.9993116855621338
    }
]

Running this change of the prototype returns the response below. We noticed the out_of_domain values for Abv.png and Cambia_logo.png are >1, specifically 8.932123455451801e-05 and 4.754207475343719e-05 respectively. @mfossati is this expected behavior?

The trailing e-05 notation means 10^-5, so the values can be read as 0.000089 ... and 0.000047 .... I think we can safely round them anyway. Done in https://gitlab.wikimedia.org/mfossati/scriptz/-/merge_requests/6/diffs?diff_id=51201&start_sha=24877299226efb565d4f17f576ff7c91c8b4e1f9.

Thank you for the clarification, I've run the version that rounds the floats and below are the results. Please confirm whether the prototype is returning the expected output.

[
    {
        "filename": "12_rue_de_Condé_-_detail.jpg",
        "target": "logo",
        "prediction": 0.0008,
        "out_of_domain": 0.9992
    },
    {
        "filename": "Abv.png",
        "target": "logo",
        "prediction": 0.9999,
        "out_of_domain": 0.0001
    },
    {
        "filename": "BackupVault_Logo_2019.png",
        "target": "logo",
        "prediction": 0.9996,
        "out_of_domain": 0.0004
    },
    {
        "filename": "Blooming_bush_(14248894271).jpg",
        "target": "logo",
        "prediction": 0.0051,
        "out_of_domain": 0.9949
    },
    {
        "filename": "Cambia_logo.png",
        "target": "logo",
        "prediction": 1.0,
        "out_of_domain": 0.0
    },
    {
        "filename": "Elizabeth_Drive_-_border_of_Edensor_Park_and_Bonnyrigg_Heights_in_New_South_Wales_62.jpg",
        "target": "logo",
        "prediction": 0.0007,
        "out_of_domain": 0.9993
    }
]

Once we have this confirmation, we can work on input validation and image limits. The goal is to get your requirements correct before we integrate the prototype into KServe and host it on LiftWing.

Thank you for the confirmation. I have pushed an MR for input validation and image limits: https://gitlab.wikimedia.org/mfossati/scriptz/-/merge_requests/7
Please have a look whenever you have a minute. Thanks!

Thanks for the review. I have pushed an MR to handle image download errors: https://gitlab.wikimedia.org/mfossati/scriptz/-/merge_requests/8
Please have a look whenever you have a minute. Thanks!

Thanks for the review. I have pushed an MR that creates a LogoDetectionModel class with its respective methods and type hinting: https://gitlab.wikimedia.org/mfossati/scriptz/-/merge_requests/9
This will enable us easily integrate the prototype into KServe. Please have a look whenever you have a minute. Thanks!

Thanks for all the reviews as we built this prototype, @mfossati! Please confirm whether it is currently taking the input format as specified in T358676#9650781, preprocessing with Keras in the correct way, and returning the expected output (T358676#9637065). Once we receive your confirmation, the ML team will proceed to integrate it into KServe and create a model server that will be hosted on LiftWing.

@kevinbazira , I can confirm that inputs and outputs are fine.
FYI, I've fixed the expected type of the image dataset, so please use the latest commit.

Great! Thank you for the confirmation and fixing the dataset type hint. We shall use the latest version of the prototype.