readr

Minimal Terminal RSS Reader
Log | Files | Refs | README | LICENSE

utils_derive.c (3457B)



// C11
#include <ctype.h>
#include <stdlib.h>
#include <string.h>
#include "utils.h"

static int
ieq_n(const char* a, const char* b, size_t n)
{
	for (size_t i = 0; i < n; i++) {
		unsigned char ca = (unsigned char)a[i];
		unsigned char cb = (unsigned char)b[i];
		if (tolower(ca) != tolower(cb)) return 0;
		if (ca == '\0') return 1;
	}
	return 1;
}

int
contains(const char* haystack, const char* needle)
{
	size_t nlen = strlen(needle);
	if (nlen == 0) return 1;
	for (const char* p = haystack; *p; p++) {
		if (tolower((unsigned char)*p) == tolower((unsigned char)needle[0])) {
			if (ieq_n(p, needle, nlen)) return 1;
		}
	}
	return 0;
}

static const char*
ext_to_type(const char* ext)
{
	// Documents
	if (!strcmp(ext, "pdf")) return "pdf";
	if (!strcmp(ext, "doc") || !strcmp(ext, "docx")) return "doc";
	if (!strcmp(ext, "ppt") || !strcmp(ext, "pptx")) return "ppt";
	if (!strcmp(ext, "xls") || !strcmp(ext, "xlsx")) return "xls";
	if (!strcmp(ext, "txt")) return "txt";
	if (!strcmp(ext, "rtf")) return "rtf";
	if (!strcmp(ext, "csv")) return "csv";

	// Images
	if (!strcmp(ext, "jpg") || !strcmp(ext, "jpeg")) return "jpg";
	if (!strcmp(ext, "png")) return "png";
	if (!strcmp(ext, "gif")) return "gif";
	if (!strcmp(ext, "webp")) return "webp";
	if (!strcmp(ext, "heic") || !strcmp(ext, "heif")) return "heic";
	if (!strcmp(ext, "bmp")) return "bmp";
	if (!strcmp(ext, "tif") || !strcmp(ext, "tiff")) return "tif";
	if (!strcmp(ext, "svg")) return "svg";

	// Video (common on social platforms)
	if (!strcmp(ext, "mp4")) return "mp4";
	if (!strcmp(ext, "mov")) return "mov";
	if (!strcmp(ext, "m4v")) return "m4v";
	if (!strcmp(ext, "webm")) return "webm";
	if (!strcmp(ext, "mkv")) return "mkv";
	if (!strcmp(ext, "avi")) return "avi";

	// Audio
	if (!strcmp(ext, "mp3")) return "mp3";
	if (!strcmp(ext, "m4a")) return "m4a";
	if (!strcmp(ext, "aac")) return "aac";
	if (!strcmp(ext, "wav")) return "wav";
	if (!strcmp(ext, "ogg")) return "ogg";
	if (!strcmp(ext, "flac")) return "flac";

	// Archives (often shared)
	if (!strcmp(ext, "zip")) return "zip";
	if (!strcmp(ext, "rar")) return "rar";
	if (!strcmp(ext, "7z")) return "7z";

	return NULL;
}

const char*
filetype_from_url(const char* url)
{
	if (!url) { return NULL; }

	// Rule: any youtube.com in domain => "video"
	// (Simple heuristic: look for "://...youtube.com" or "youtube.com" early-ish)
	if (contains(url, "youtube.com")) return "video";
	if (contains(url, "youtu.be")) return "video";
	if (contains(url, "vimeo.com")) return "video";

	// Find end of path segment (before ? or #)
	const char* q = strchr(url, '?');
	const char* h = strchr(url, '#');
	const char* end = url + strlen(url);
	if (q && q < end) end = q;
	if (h && h < end) end = h;

	// If URL ends with '/', there's no filename extension
	if (end > url && end[-1] == '/') return NULL;

	// Find last '.' in the last path segment
	const char* slash = url;
	for (const char* p = url; p < end; p++) {
		if (*p == '/') slash = p + 1;
	}
	const char* dot = NULL;
	for (const char* p = slash; p < end; p++) {
		if (*p == '.') dot = p;
	}
	if (!dot || dot + 1 >= end) return NULL;

	// Extract extension, lowercase it
	size_t ext_len = (size_t)(end - (dot + 1));
	if (ext_len == 0 || ext_len > 16) return NULL;

	char ext[17];
	for (size_t i = 0; i < ext_len; i++) {
		ext[i] = (char)tolower((unsigned char)dot[1 + i]);
	}
	ext[ext_len] = '\0';

	const char* type = ext_to_type(ext);
	return type ? type : NULL;
}