This commit is contained in:
2024-08-11 03:24:45 +02:00
parent 1c2336e142
commit 2bd1393ad5
6 changed files with 75 additions and 0 deletions

68
main.py Normal file
View File

@@ -0,0 +1,68 @@
from PIL import Image, UnidentifiedImageError
import io
import os
def extract_images(prefix, byte_data):
# JPEG and PNG start and end markers
jpeg_start_marker = b'\xFF\xD8'
jpeg_end_marker = b'\xFF\xD9'
png_start_marker = b'\x89\x50\x4E\x47\x0D\x0A\x1A\x0A'
png_end_marker = b'IEND\xAE\x42\x60\x82'
images = []
start = 0
while start < len(byte_data):
jpeg_start = byte_data.find(jpeg_start_marker, start)
png_start = byte_data.find(png_start_marker, start)
# Determine the next image type (JPEG or PNG) and its start position
if (jpeg_start != -1 and (jpeg_start < png_start or png_start == -1)):
# Processing JPEG image
jpeg_end = byte_data.find(jpeg_end_marker, jpeg_start) + len(jpeg_end_marker)
if jpeg_end == -1:
break # No valid end found, likely corrupted
image_data = byte_data[jpeg_start:jpeg_end]
start = jpeg_end # Move start to the end of the current image
elif (png_start != -1 and (png_start < jpeg_start or jpeg_start == -1)):
# Processing PNG image
png_end = byte_data.find(png_end_marker, png_start) + len(png_end_marker)
if png_end == -1:
break # No valid end found, likely corrupted
image_data = byte_data[png_start:png_end]
start = png_end # Move start to the end of the current image
else:
break # No more images found
try:
# Load the image using PIL to verify it's valid
image = Image.open(io.BytesIO(image_data))
image.verify() # Verify that it is indeed an image
images.append(image_data)
except (IOError, UnidentifiedImageError):
# Skip this image if it cannot be identified
print(f"Skipping invalid image data at position {start}")
continue
# Process all extracted images
for idx, image_data in enumerate(images):
# Reload the image (since `verify()` puts the file in an unusable state)
image = Image.open(io.BytesIO(image_data))
# Save the image (you can also display it using image.show())
extension = 'jpg' if image.format == 'JPEG' else 'png'
image.save(f"./output/{prefix}_image_{idx+1}.{extension}")
print(f"Extracted {image.format} image {idx+1}")
files = os.listdir("./input/")
for file in files:
filename, _ = os.path.splitext(file)
with open(f"./input/{file}", "rb") as file:
extract_images(filename, file.read())