There aren't many reasons why you would want to do H.264 encoding on a single image. Jpeg is probably what you want. But I had a project where I was building a super resolution model for videos and the video compression artifacts kept getting enhanced as if they were actual edges. So I had to come up with a way to do H.264 video encoding on a single image to train it to fix the artifacts rather than enhance them. (jpg and H264 artifacts are very different)
compress_image_h264 takes an a numpy array image (h, w, c) and an amount of compression (25-50 recommended) and outputs a compressed version.
The result is copied before returning because I got an issue with it returning a "non writeable" array and copying fixed it.
def compress_image_h264(image, amount):
# Encode the image to PNG format
_, png_data = cv2.imencode('.png', image)
# Use ffmpeg to compress the image using H.264 codec and MKV container
ffmpeg_command = [
'ffmpeg',
'-y', # Overwrite output files without asking
'-i', 'pipe:0', # Input from stdin
'-vcodec', 'libx264', # Use H.264 codec
'-qp', str(amount), # Quality parameter
'-pix_fmt', 'yuv420p', # Pixel format
'-f', 'matroska', # Use MKV container
'pipe:1' # Output to stdout
]
result = subprocess.run(
ffmpeg_command,
input=png_data.tobytes(), # Pass PNG data to stdin
stdout=subprocess.PIPE, # Capture stdout
stderr=subprocess.PIPE # Capture stderr for debugging
)
if result.returncode != 0:
print("FFmpeg error during compression:", result.stderr.decode())
raise RuntimeError("FFmpeg compression failed")
# Get the compressed data from stdout
compressed_data = result.stdout
return np.copy(decompress_image_h264(compressed_data, image.shape[1], image.shape[0]))
def decompress_image_h264(compressed_data, width, height):
# Use ffmpeg to decompress the image from H.264 to raw format
ffmpeg_command = [
'ffmpeg',
'-i', 'pipe:0', # Input from stdin
'-f', 'rawvideo', # Output raw video format
'-pix_fmt', 'bgr24', # Pixel format
'pipe:1' # Output to stdout
]
result = subprocess.run(
ffmpeg_command,
input=compressed_data, # Pass compressed data to stdin
stdout=subprocess.PIPE, # Capture stdout
stderr=subprocess.PIPE # Capture stderr for debugging
)
if result.returncode != 0:
print("FFmpeg error during decompression:", result.stderr.decode())
raise RuntimeError("FFmpeg decompression failed")
# Get the raw image data from stdout
raw_image_data = result.stdout
# Ensure we have enough data to reshape into the desired format
expected_size = width * height * 3
if len(raw_image_data) != expected_size:
print("Unexpected raw image data size:", len(raw_image_data))
raise ValueError(f"Cannot reshape array of size {len(raw_image_data)} into shape ({height},{width},3)")
# Convert the raw data to a numpy array
frame = np.frombuffer(raw_image_data, dtype=np.uint8).reshape((height, width, 3))
return frame