commit 155c468d6c81a5f1a7d352f27ffd8f354dcad6c2
Author: Kris Yotam <krisyotam@protonmail.com>
Date: Fri, 20 Feb 2026 09:33:31 -0600
Initial commit: sframe - unique frame extractor
Suckless C99 tool using FFmpeg and perceptual hashing
to extract visually unique frames from video files.
Outputs timestamped images organized by video name.
Diffstat:
| A | .claude/CLAUDE.md | | | 126 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| A | .gitignore | | | 3 | +++ |
| A | LICENSE | | | 21 | +++++++++++++++++++++ |
| A | Makefile | | | 58 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| A | config.mk | | | 10 | ++++++++++ |
| A | sframe.1 | | | 77 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| A | src/decode.c | | | 180 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| A | src/decode.h | | | 26 | ++++++++++++++++++++++++++ |
| A | src/diff.c | | | 212 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| A | src/diff.h | | | 28 | ++++++++++++++++++++++++++++ |
| A | src/main.c | | | 248 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| A | src/util.c | | | 102 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| A | src/util.h | | | 15 | +++++++++++++++ |
13 files changed, 1106 insertions(+), 0 deletions(-)
diff --git a/.claude/CLAUDE.md b/.claude/CLAUDE.md
@@ -0,0 +1,126 @@
+# sframe — CLAUDE.md
+
+## Project
+
+sframe (Simple Frame) is a lightweight command-line tool written in C
+following the suckless philosophy. It extracts unique frames from video
+files for research purposes, using perceptual hashing to eliminate
+duplicates and embedding timestamps in filenames.
+
+## Coding Standards — Suckless C Style
+
+All code in this project MUST follow the suckless.org coding style:
+
+### Language
+- C99 (ISO/IEC 9899:1999), no extensions
+- POSIX.1-2008 (`_POSIX_C_SOURCE 200809L`)
+
+### Indentation & Whitespace
+- Tabs for indentation (1 tab = 1 level)
+- Spaces for alignment only, never for indentation
+- No tabs except at the beginning of a line
+- Maximum line length: 79 characters
+
+### Comments
+- Use `/* */` only, never `//`
+- Comment fallthrough cases in switch statements
+
+### Variables
+- All declarations at the top of the block
+- Pointer `*` adjacent to variable name: `char *p`, not `char* p`
+- No C99 `bool`; use `int` (0/1)
+- Global/static variables not used outside TU must be `static`
+
+### Functions
+- Return type on its own line
+- Function name at column 0 on next line (enables `grep ^funcname`)
+- Opening `{` on its own line for functions
+- Functions not used outside their file: `static`
+
+```c
+static void
+usage(void)
+{
+ fprintf(stderr, "usage: sframe [-t thresh] [-f fmt] video\n");
+ exit(1);
+}
+```
+
+### Braces
+- Opening `{` on same line for control flow (if, for, while, switch)
+- Closing `}` on its own line unless continuing (else, do-while)
+- Use braces even for single statements when sibling branches use them
+
+### Naming
+- lowercase_with_underscores for functions and variables
+- UPPERCASE for macros and constants
+- CamelCase for typedef'd struct types
+- No `_t` suffix (reserved by POSIX)
+- Prefix module functions: `dec_open()`, `diff_hash()`, `util_die()`
+
+### Control Flow
+- Space after `if`, `for`, `while`, `switch`
+- No space after `(` or before `)`
+- Use `goto` for cleanup/unwind, not nested ifs
+- Return/exit early on failure
+- Test against 0, not -1: `if (func() < 0)`
+
+### Error Handling
+- All allocation checked; goto cleanup on failure
+- `die()` for fatal errors (prints message, exits)
+- `warn()` for recoverable errors (prints, continues)
+
+### File Organization Order
+1. License header
+2. System includes (alphabetical)
+3. Local includes
+4. Macros
+5. Type definitions
+6. Function declarations
+7. Global variables
+8. Function definitions (same order as declarations)
+
+### Headers
+- System headers first, alphabetical
+- Local headers after blank line
+- No cyclic dependencies
+- Include only what is needed
+
+## Architecture Rules
+
+- **No global mutable state.** Pass context structs explicitly.
+- **No `system()` calls.** Use `fork()/execvp()` or `popen()`.
+- **No hardcoded paths** except sensible defaults (`~/frames`).
+- **Separate compilation.** Every .c file compiles independently.
+
+## Module Prefixes
+
+| Module | Prefix | File |
+|--------|--------|------|
+| Main/CLI | — | main.c |
+| Decode | `dec_` | decode.c |
+| Diff | `diff_` | diff.c |
+| Utilities | `die()`, `warn()`, `ecalloc()` | util.c |
+
+## Build
+
+```sh
+make # build sframe binary
+make clean # remove build artifacts
+make install PREFIX=/usr/local # install
+```
+
+## Dependencies
+
+- FFmpeg libraries: libavformat, libavcodec, libavutil, libswscale
+
+## Git Conventions
+
+- No `Co-Authored-By: Claude` lines
+- Commit messages: imperative, <72 chars, no period
+- One logical change per commit
+
+## CRITICAL: No Building on Moirai
+
+NEVER run `make`, `sudo make install`, or any build command unless
+Kris explicitly says "build" in his message.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,3 @@
+sframe
+src/*.o
+*.o
diff --git a/LICENSE b/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2026 Kris Yotam
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/Makefile b/Makefile
@@ -0,0 +1,58 @@
+# sframe — suckless-style Makefile
+
+include config.mk
+
+VERSION = 0.1
+
+PREFIX = /usr/local
+MANPREFIX = $(PREFIX)/share/man
+
+CC = cc
+CFLAGS = -std=c99 -D_POSIX_C_SOURCE=200809L -Wall -Wextra -pedantic \
+ -DVERSION=\"$(VERSION)\" \
+ -DDEFAULT_THRESH=$(DEFAULT_THRESH) \
+ -Isrc
+LDFLAGS =
+
+# FFmpeg
+CFLAGS += $(shell pkg-config --cflags libavformat libavcodec \
+ libavutil libswscale 2>/dev/null)
+LDFLAGS += $(shell pkg-config --libs libavformat libavcodec \
+ libavutil libswscale 2>/dev/null)
+
+# libpng (for PNG output)
+CFLAGS += $(shell pkg-config --cflags libpng 2>/dev/null)
+LDFLAGS += $(shell pkg-config --libs libpng 2>/dev/null)
+
+# libjpeg (for JPEG output)
+LDFLAGS += -ljpeg
+
+LDFLAGS += -lm
+
+SRC = src/main.c src/decode.c src/diff.c src/util.c
+OBJ = $(SRC:.c=.o)
+
+all: sframe
+
+sframe: $(OBJ)
+ $(CC) -o $@ $(OBJ) $(LDFLAGS)
+
+.c.o:
+ $(CC) $(CFLAGS) -c -o $@ $<
+
+clean:
+ rm -f sframe src/*.o
+
+install: sframe
+ mkdir -p $(PREFIX)/bin
+ cp sframe $(PREFIX)/bin/sframe
+ chmod 755 $(PREFIX)/bin/sframe
+ mkdir -p $(MANPREFIX)/man1
+ cp sframe.1 $(MANPREFIX)/man1/sframe.1
+ chmod 644 $(MANPREFIX)/man1/sframe.1
+
+uninstall:
+ rm -f $(PREFIX)/bin/sframe
+ rm -f $(MANPREFIX)/man1/sframe.1
+
+.PHONY: all clean install uninstall
diff --git a/config.mk b/config.mk
@@ -0,0 +1,10 @@
+# sframe — config.mk
+
+# Output image format: png or jpg
+OUTPUT_FMT = png
+
+# Default perceptual hash threshold (0-64, lower = stricter)
+# 0 = only exact duplicates removed
+# 8 = very similar frames removed (recommended)
+# 16 = moderately similar frames removed
+DEFAULT_THRESH = 8
diff --git a/sframe.1 b/sframe.1
@@ -0,0 +1,77 @@
+.TH SFRAME 1 "2026-02-20" "sframe 0.1"
+.SH NAME
+sframe \- extract unique frames from video files
+.SH SYNOPSIS
+.B sframe
+.RB [ \-v ]
+.RB [ \-t
+.IR threshold ]
+.RB [ \-f
+.IR png | jpg ]
+.RB [ \-o
+.IR outdir ]
+.IR video
+.RI [ video... ]
+.SH DESCRIPTION
+.B sframe
+(Simple Frame) extracts visually unique frames from video files for
+research purposes. It uses perceptual hashing to detect and skip
+duplicate or near-duplicate frames, saving only frames that differ
+significantly from the previously saved frame.
+.PP
+Output is organized into subdirectories under the output directory,
+one per video file. Frames are named with the video name and
+timestamp:
+.IR video-name-HH-MM-SS.mmm.png .
+.SH OPTIONS
+.TP
+.BI \-t " threshold"
+Perceptual hash threshold (0\(en64). Lower values are stricter,
+meaning more frames are considered unique.
+A value of 0 only removes exact duplicates.
+Default: 8.
+.TP
+.BI \-f " format"
+Output image format:
+.B png
+(default) or
+.BR jpg .
+.TP
+.BI \-o " outdir"
+Output directory. Default:
+.IR ~/frames .
+.TP
+.B \-v
+Print version and exit.
+.SH EXAMPLES
+Extract unique frames from a lecture recording:
+.PP
+.RS
+sframe lecture.mp4
+.RE
+.PP
+Use a stricter threshold and JPEG output:
+.PP
+.RS
+sframe -t 4 -f jpg documentary.mkv
+.RE
+.PP
+Process multiple videos into a custom directory:
+.PP
+.RS
+sframe -o /tmp/research clip1.mp4 clip2.webm
+.RE
+.SH FILES
+.TP
+.I ~/frames/
+Default output directory.
+.SH EXIT STATUS
+.B sframe
+exits 0 on success, 1 if any video could not be processed.
+.SH DEPENDENCIES
+FFmpeg libraries (libavformat, libavcodec, libavutil, libswscale),
+libpng, libjpeg.
+.SH AUTHORS
+Kris Yotam.
+.SH LICENSE
+MIT License. See LICENSE file.
diff --git a/src/decode.c b/src/decode.c
@@ -0,0 +1,180 @@
+/* See LICENSE file for copyright and license details. */
+
+#include <string.h>
+
+#include <libavformat/avformat.h>
+#include <libavcodec/avcodec.h>
+#include <libavutil/avutil.h>
+#include <libavutil/imgutils.h>
+#include <libswscale/swscale.h>
+
+#include "decode.h"
+#include "util.h"
+
+int
+dec_open(Decoder *d, const char *path)
+{
+ const AVCodec *codec;
+ AVStream *st;
+ int ret;
+ int i;
+
+ memset(d, 0, sizeof(*d));
+ d->stream_idx = -1;
+
+ ret = avformat_open_input(&d->fmt_ctx, path, NULL, NULL);
+ if (ret < 0) {
+ warn("cannot open '%s': %s", path, av_err2str(ret));
+ return -1;
+ }
+
+ ret = avformat_find_stream_info(d->fmt_ctx, NULL);
+ if (ret < 0) {
+ warn("no stream info in '%s'", path);
+ goto fail;
+ }
+
+ /* find first video stream */
+ for (i = 0; i < (int)d->fmt_ctx->nb_streams; i++) {
+ if (d->fmt_ctx->streams[i]->codecpar->codec_type
+ == AVMEDIA_TYPE_VIDEO) {
+ d->stream_idx = i;
+ break;
+ }
+ }
+ if (d->stream_idx < 0) {
+ warn("no video stream in '%s'", path);
+ goto fail;
+ }
+
+ st = d->fmt_ctx->streams[d->stream_idx];
+ codec = avcodec_find_decoder(st->codecpar->codec_id);
+ if (!codec) {
+ warn("unsupported codec in '%s'", path);
+ goto fail;
+ }
+
+ d->codec_ctx = avcodec_alloc_context3(codec);
+ if (!d->codec_ctx) {
+ warn("cannot alloc codec context");
+ goto fail;
+ }
+
+ ret = avcodec_parameters_to_context(d->codec_ctx,
+ st->codecpar);
+ if (ret < 0) {
+ warn("cannot copy codec params");
+ goto fail;
+ }
+
+ ret = avcodec_open2(d->codec_ctx, codec, NULL);
+ if (ret < 0) {
+ warn("cannot open codec: %s", av_err2str(ret));
+ goto fail;
+ }
+
+ d->width = d->codec_ctx->width;
+ d->height = d->codec_ctx->height;
+
+ /* scaler for 8x8 grayscale (perceptual hash) */
+ d->sws_gray = sws_getContext(
+ d->width, d->height, d->codec_ctx->pix_fmt,
+ 8, 8, AV_PIX_FMT_GRAY8,
+ SWS_BILINEAR, NULL, NULL, NULL);
+ if (!d->sws_gray) {
+ warn("cannot create grayscale scaler");
+ goto fail;
+ }
+
+ /* scaler for full-res RGB (image saving) */
+ d->sws_rgb = sws_getContext(
+ d->width, d->height, d->codec_ctx->pix_fmt,
+ d->width, d->height, AV_PIX_FMT_RGB24,
+ SWS_BILINEAR, NULL, NULL, NULL);
+ if (!d->sws_rgb) {
+ warn("cannot create RGB scaler");
+ goto fail;
+ }
+
+ return 0;
+
+fail:
+ dec_close(d);
+ return -1;
+}
+
+int
+dec_read_frame(Decoder *d, AVFrame *frame)
+{
+ AVPacket *pkt;
+ int ret;
+
+ pkt = av_packet_alloc();
+ if (!pkt)
+ return -1;
+
+ for (;;) {
+ ret = av_read_frame(d->fmt_ctx, pkt);
+ if (ret < 0) {
+ av_packet_free(&pkt);
+ /* flush decoder */
+ avcodec_send_packet(d->codec_ctx, NULL);
+ ret = avcodec_receive_frame(d->codec_ctx,
+ frame);
+ return (ret == 0) ? 0 : -1;
+ }
+
+ if (pkt->stream_index != d->stream_idx) {
+ av_packet_unref(pkt);
+ continue;
+ }
+
+ ret = avcodec_send_packet(d->codec_ctx, pkt);
+ av_packet_unref(pkt);
+ if (ret < 0) {
+ av_packet_free(&pkt);
+ return -1;
+ }
+
+ ret = avcodec_receive_frame(d->codec_ctx, frame);
+ if (ret == AVERROR(EAGAIN))
+ continue;
+ if (ret < 0) {
+ av_packet_free(&pkt);
+ return -1;
+ }
+
+ av_packet_free(&pkt);
+ return 0;
+ }
+}
+
+double
+dec_frame_time(Decoder *d, AVFrame *frame)
+{
+ AVStream *st;
+ double tb;
+
+ st = d->fmt_ctx->streams[d->stream_idx];
+ tb = av_q2d(st->time_base);
+
+ if (frame->pts != AV_NOPTS_VALUE)
+ return frame->pts * tb;
+ if (frame->pkt_dts != AV_NOPTS_VALUE)
+ return frame->pkt_dts * tb;
+ return 0.0;
+}
+
+void
+dec_close(Decoder *d)
+{
+ if (d->sws_rgb)
+ sws_freeContext(d->sws_rgb);
+ if (d->sws_gray)
+ sws_freeContext(d->sws_gray);
+ if (d->codec_ctx)
+ avcodec_free_context(&d->codec_ctx);
+ if (d->fmt_ctx)
+ avformat_close_input(&d->fmt_ctx);
+ memset(d, 0, sizeof(*d));
+}
diff --git a/src/decode.h b/src/decode.h
@@ -0,0 +1,26 @@
+/* See LICENSE file for copyright and license details. */
+
+#ifndef DECODE_H
+#define DECODE_H
+
+#include <libavformat/avformat.h>
+#include <libavcodec/avcodec.h>
+#include <libavutil/avutil.h>
+#include <libswscale/swscale.h>
+
+typedef struct {
+ AVFormatContext *fmt_ctx;
+ AVCodecContext *codec_ctx;
+ struct SwsContext *sws_gray;
+ struct SwsContext *sws_rgb;
+ int stream_idx;
+ int width;
+ int height;
+} Decoder;
+
+int dec_open(Decoder *d, const char *path);
+int dec_read_frame(Decoder *d, AVFrame *frame);
+double dec_frame_time(Decoder *d, AVFrame *frame);
+void dec_close(Decoder *d);
+
+#endif /* DECODE_H */
diff --git a/src/diff.c b/src/diff.c
@@ -0,0 +1,212 @@
+/* See LICENSE file for copyright and license details. */
+
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <libavutil/frame.h>
+#include <libavutil/imgutils.h>
+#include <libswscale/swscale.h>
+#include <png.h>
+#include <jpeglib.h>
+
+#include "diff.h"
+#include "util.h"
+
+/*
+ * Compute 64-bit average perceptual hash.
+ *
+ * 1. Scale frame to 8x8 grayscale.
+ * 2. Compute mean pixel value.
+ * 3. Each bit = 1 if pixel > mean, 0 otherwise.
+ *
+ * Fast and effective for detecting duplicate/near-duplicate frames.
+ */
+uint64_t
+diff_phash(AVFrame *frame, struct SwsContext *sws,
+ int src_w, int src_h)
+{
+ uint8_t gray[64];
+ uint8_t *dst_data[1];
+ int dst_linesize[1];
+ uint64_t hash;
+ unsigned sum;
+ uint8_t mean;
+ int i;
+
+ dst_data[0] = gray;
+ dst_linesize[0] = 8;
+
+ sws_scale(sws,
+ (const uint8_t *const *)frame->data,
+ frame->linesize,
+ 0, src_h,
+ dst_data, dst_linesize);
+
+ /* compute mean */
+ sum = 0;
+ for (i = 0; i < 64; i++)
+ sum += gray[i];
+ mean = (uint8_t)(sum / 64);
+
+ /* build hash */
+ hash = 0;
+ for (i = 0; i < 64; i++) {
+ if (gray[i] > mean)
+ hash |= ((uint64_t)1 << i);
+ }
+
+ return hash;
+}
+
+int
+diff_hamming(uint64_t a, uint64_t b)
+{
+ uint64_t x;
+ int count;
+
+ x = a ^ b;
+ count = 0;
+ while (x) {
+ count++;
+ x &= x - 1;
+ }
+ return count;
+}
+
+static int
+save_png(const uint8_t *rgb, int w, int h, int stride,
+ const char *path)
+{
+ FILE *fp;
+ png_structp png;
+ png_infop info;
+ int y;
+
+ fp = fopen(path, "wb");
+ if (!fp) {
+ warn("cannot open '%s' for writing:", path);
+ return -1;
+ }
+
+ png = png_create_write_struct(PNG_LIBPNG_VER_STRING,
+ NULL, NULL, NULL);
+ if (!png) {
+ fclose(fp);
+ return -1;
+ }
+
+ info = png_create_info_struct(png);
+ if (!info) {
+ png_destroy_write_struct(&png, NULL);
+ fclose(fp);
+ return -1;
+ }
+
+ if (setjmp(png_jmpbuf(png))) {
+ png_destroy_write_struct(&png, &info);
+ fclose(fp);
+ return -1;
+ }
+
+ png_init_io(png, fp);
+ png_set_IHDR(png, info, w, h, 8,
+ PNG_COLOR_TYPE_RGB,
+ PNG_INTERLACE_NONE,
+ PNG_COMPRESSION_TYPE_DEFAULT,
+ PNG_FILTER_TYPE_DEFAULT);
+ png_write_info(png, info);
+
+ for (y = 0; y < h; y++)
+ png_write_row(png, rgb + y * stride);
+
+ png_write_end(png, NULL);
+ png_destroy_write_struct(&png, &info);
+ fclose(fp);
+ return 0;
+}
+
+static int
+save_jpg(const uint8_t *rgb, int w, int h, int stride,
+ const char *path)
+{
+ FILE *fp;
+ struct jpeg_compress_struct cinfo;
+ struct jpeg_error_mgr jerr;
+ JSAMPROW row;
+ int y;
+
+ fp = fopen(path, "wb");
+ if (!fp) {
+ warn("cannot open '%s' for writing:", path);
+ return -1;
+ }
+
+ cinfo.err = jpeg_std_error(&jerr);
+ jpeg_create_compress(&cinfo);
+ jpeg_stdio_dest(&cinfo, fp);
+
+ cinfo.image_width = w;
+ cinfo.image_height = h;
+ cinfo.input_components = 3;
+ cinfo.in_color_space = JCS_RGB;
+ jpeg_set_defaults(&cinfo);
+ jpeg_set_quality(&cinfo, 95, 1);
+ jpeg_start_compress(&cinfo, 1);
+
+ for (y = 0; y < h; y++) {
+ row = (JSAMPROW)(rgb + y * stride);
+ jpeg_write_scanlines(&cinfo, &row, 1);
+ }
+
+ jpeg_finish_compress(&cinfo);
+ jpeg_destroy_compress(&cinfo);
+ fclose(fp);
+ return 0;
+}
+
+int
+diff_save_frame(AVFrame *frame, struct SwsContext *sws_rgb,
+ int src_w, int src_h,
+ const char *path, const char *fmt)
+{
+ AVFrame *rgb_frame;
+ int ret;
+
+ rgb_frame = av_frame_alloc();
+ if (!rgb_frame)
+ return -1;
+
+ rgb_frame->format = AV_PIX_FMT_RGB24;
+ rgb_frame->width = src_w;
+ rgb_frame->height = src_h;
+
+ ret = av_image_alloc(rgb_frame->data,
+ rgb_frame->linesize,
+ src_w, src_h,
+ AV_PIX_FMT_RGB24, 32);
+ if (ret < 0) {
+ av_frame_free(&rgb_frame);
+ return -1;
+ }
+
+ sws_scale(sws_rgb,
+ (const uint8_t *const *)frame->data,
+ frame->linesize,
+ 0, src_h,
+ rgb_frame->data, rgb_frame->linesize);
+
+ if (strcmp(fmt, "jpg") == 0 || strcmp(fmt, "jpeg") == 0) {
+ ret = save_jpg(rgb_frame->data[0],
+ src_w, src_h,
+ rgb_frame->linesize[0], path);
+ } else {
+ ret = save_png(rgb_frame->data[0],
+ src_w, src_h,
+ rgb_frame->linesize[0], path);
+ }
+
+ av_freep(&rgb_frame->data[0]);
+ av_frame_free(&rgb_frame);
+ return ret;
+}
diff --git a/src/diff.h b/src/diff.h
@@ -0,0 +1,28 @@
+/* See LICENSE file for copyright and license details. */
+
+#ifndef DIFF_H
+#define DIFF_H
+
+#include <stdint.h>
+
+#include <libavutil/frame.h>
+#include <libswscale/swscale.h>
+
+/*
+ * 64-bit perceptual hash (8x8 average hash).
+ * Two frames with hamming distance <= threshold are
+ * considered duplicates.
+ */
+uint64_t diff_phash(AVFrame *frame, struct SwsContext *sws,
+ int src_w, int src_h);
+int diff_hamming(uint64_t a, uint64_t b);
+
+/*
+ * Save an RGB frame to disk as PNG or JPEG.
+ * fmt: "png" or "jpg"
+ */
+int diff_save_frame(AVFrame *frame, struct SwsContext *sws_rgb,
+ int src_w, int src_h,
+ const char *path, const char *fmt);
+
+#endif /* DIFF_H */
diff --git a/src/main.c b/src/main.c
@@ -0,0 +1,248 @@
+/* See LICENSE file for copyright and license details. */
+
+#define _POSIX_C_SOURCE 200809L
+
+#include <libgen.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "decode.h"
+#include "diff.h"
+#include "util.h"
+
+#ifndef VERSION
+#define VERSION "0.1"
+#endif
+
+#ifndef DEFAULT_THRESH
+#define DEFAULT_THRESH 8
+#endif
+
+static void
+usage(void)
+{
+ fprintf(stderr,
+ "usage: sframe [-v] [-t threshold] [-f png|jpg]"
+ " [-o outdir] video [...]\n"
+ "\n"
+ "options:\n"
+ " -t N hash threshold 0-64"
+ " (default: %d, lower = stricter)\n"
+ " -f fmt output format: png or jpg"
+ " (default: png)\n"
+ " -o dir output directory"
+ " (default: ~/frames)\n"
+ " -v print version\n",
+ DEFAULT_THRESH);
+ exit(1);
+}
+
+/*
+ * Strip file extension and directory, return allocated
+ * copy of the base name suitable for use as a directory
+ * and filename prefix. Replaces spaces with underscores.
+ */
+static char *
+video_name(const char *path)
+{
+ char *copy, *base, *dot, *name, *p;
+
+ copy = estrdup(path);
+ base = basename(copy);
+
+ dot = strrchr(base, '.');
+ if (dot)
+ *dot = '\0';
+
+ name = estrdup(base);
+ free(copy);
+
+ /* sanitize: replace spaces and slashes */
+ for (p = name; *p; p++) {
+ if (*p == ' ' || *p == '/')
+ *p = '_';
+ }
+
+ return name;
+}
+
+static void
+ts_string(double secs, char *buf, size_t len)
+{
+ int h, m, s, ms;
+
+ if (secs < 0.0)
+ secs = 0.0;
+
+ h = (int)(secs / 3600.0);
+ secs -= h * 3600.0;
+ m = (int)(secs / 60.0);
+ secs -= m * 60.0;
+ s = (int)secs;
+ ms = (int)((secs - s) * 1000.0);
+
+ snprintf(buf, len, "%02d-%02d-%02d.%03d", h, m, s, ms);
+}
+
+static int
+process_video(const char *path, const char *outdir,
+ int threshold, const char *fmt)
+{
+ Decoder dec;
+ AVFrame *frame;
+ char *name;
+ char dirpath[4096];
+ char filepath[4096];
+ char tsbuf[32];
+ uint64_t prev_hash;
+ uint64_t cur_hash;
+ double ts;
+ int dist;
+ int saved;
+ int total;
+ int first;
+
+ name = video_name(path);
+
+ snprintf(dirpath, sizeof(dirpath), "%s/%s", outdir, name);
+ if (mkdirp(dirpath) < 0)
+ die("cannot create directory '%s':", dirpath);
+
+ if (dec_open(&dec, path) < 0) {
+ free(name);
+ return -1;
+ }
+
+ frame = av_frame_alloc();
+ if (!frame) {
+ dec_close(&dec);
+ free(name);
+ die("cannot allocate frame");
+ }
+
+ fprintf(stderr, "processing: %s (%dx%d)\n",
+ path, dec.width, dec.height);
+ fprintf(stderr, "output: %s/\n", dirpath);
+ fprintf(stderr, "threshold: %d\n", threshold);
+
+ saved = 0;
+ total = 0;
+ first = 1;
+ prev_hash = 0;
+
+ while (dec_read_frame(&dec, frame) == 0) {
+ total++;
+
+ cur_hash = diff_phash(frame, dec.sws_gray,
+ dec.width, dec.height);
+
+ if (!first) {
+ dist = diff_hamming(prev_hash, cur_hash);
+ if (dist <= threshold) {
+ av_frame_unref(frame);
+ continue;
+ }
+ }
+
+ ts = dec_frame_time(&dec, frame);
+ ts_string(ts, tsbuf, sizeof(tsbuf));
+
+ snprintf(filepath, sizeof(filepath),
+ "%s/%s-%s.%s",
+ dirpath, name, tsbuf, fmt);
+
+ if (diff_save_frame(frame, dec.sws_rgb,
+ dec.width, dec.height,
+ filepath, fmt) == 0) {
+ saved++;
+ if (saved % 50 == 0) {
+ fprintf(stderr,
+ "\r saved %d unique frames"
+ " (%d scanned)...",
+ saved, total);
+ }
+ }
+
+ prev_hash = cur_hash;
+ first = 0;
+ av_frame_unref(frame);
+ }
+
+ fprintf(stderr,
+ "\r done: %d unique frames from %d total\n",
+ saved, total);
+
+ av_frame_free(&frame);
+ dec_close(&dec);
+ free(name);
+ return 0;
+}
+
+int
+main(int argc, char *argv[])
+{
+ const char *outdir;
+ const char *fmt;
+ char default_outdir[4096];
+ const char *home;
+ int threshold;
+ int opt;
+ int i;
+ int ret;
+
+ threshold = DEFAULT_THRESH;
+ fmt = "png";
+ outdir = NULL;
+
+ while ((opt = getopt(argc, argv, "t:f:o:v")) != -1) {
+ switch (opt) {
+ case 't':
+ threshold = atoi(optarg);
+ if (threshold < 0 || threshold > 64)
+ die("threshold must be 0-64");
+ break;
+ case 'f':
+ if (strcmp(optarg, "png") != 0
+ && strcmp(optarg, "jpg") != 0
+ && strcmp(optarg, "jpeg") != 0)
+ die("format must be png or jpg");
+ fmt = optarg;
+ break;
+ case 'o':
+ outdir = optarg;
+ break;
+ case 'v':
+ fprintf(stdout, "sframe %s\n", VERSION);
+ return 0;
+ default:
+ usage();
+ }
+ }
+
+ if (optind >= argc)
+ usage();
+
+ /* default output: ~/frames */
+ if (!outdir) {
+ home = getenv("HOME");
+ if (!home)
+ die("HOME not set");
+ snprintf(default_outdir, sizeof(default_outdir),
+ "%s/frames", home);
+ outdir = default_outdir;
+ }
+
+ if (mkdirp(outdir) < 0)
+ die("cannot create output directory '%s':", outdir);
+
+ ret = 0;
+ for (i = optind; i < argc; i++) {
+ if (process_video(argv[i], outdir,
+ threshold, fmt) < 0)
+ ret = 1;
+ }
+
+ return ret;
+}
diff --git a/src/util.c b/src/util.c
@@ -0,0 +1,102 @@
+/* See LICENSE file for copyright and license details. */
+
+#include <errno.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+
+#include "util.h"
+
+void
+die(const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ fprintf(stderr, "sframe: ");
+ vfprintf(stderr, fmt, ap);
+ if (fmt[0] && fmt[strlen(fmt) - 1] == ':') {
+ fprintf(stderr, " %s", strerror(errno));
+ }
+ fprintf(stderr, "\n");
+ va_end(ap);
+ exit(1);
+}
+
+void
+warn(const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ fprintf(stderr, "sframe: ");
+ vfprintf(stderr, fmt, ap);
+ if (fmt[0] && fmt[strlen(fmt) - 1] == ':') {
+ fprintf(stderr, " %s", strerror(errno));
+ }
+ fprintf(stderr, "\n");
+ va_end(ap);
+}
+
+void *
+ecalloc(size_t nmemb, size_t size)
+{
+ void *p;
+
+ p = calloc(nmemb, size);
+ if (!p)
+ die("calloc:");
+ return p;
+}
+
+void *
+emalloc(size_t size)
+{
+ void *p;
+
+ p = malloc(size);
+ if (!p)
+ die("malloc:");
+ return p;
+}
+
+char *
+estrdup(const char *s)
+{
+ char *p;
+
+ p = strdup(s);
+ if (!p)
+ die("strdup:");
+ return p;
+}
+
+/* mkdir -p: create all components of path */
+int
+mkdirp(const char *path)
+{
+ char buf[4096];
+ char *p;
+ size_t len;
+
+ len = strlen(path);
+ if (len == 0 || len >= sizeof(buf))
+ return -1;
+
+ memcpy(buf, path, len + 1);
+
+ for (p = buf + 1; *p; p++) {
+ if (*p == '/') {
+ *p = '\0';
+ if (mkdir(buf, 0755) < 0 && errno != EEXIST)
+ return -1;
+ *p = '/';
+ }
+ }
+ if (mkdir(buf, 0755) < 0 && errno != EEXIST)
+ return -1;
+
+ return 0;
+}
diff --git a/src/util.h b/src/util.h
@@ -0,0 +1,15 @@
+/* See LICENSE file for copyright and license details. */
+
+#ifndef UTIL_H
+#define UTIL_H
+
+#include <stddef.h>
+
+void die(const char *fmt, ...);
+void warn(const char *fmt, ...);
+void *ecalloc(size_t nmemb, size_t size);
+void *emalloc(size_t size);
+char *estrdup(const char *s);
+int mkdirp(const char *path);
+
+#endif /* UTIL_H */