readr

Minimal Terminal RSS Reader
Log | Files | Refs | README | LICENSE

utils_derive.c (3457B)


      1 // C11
      2 #include <ctype.h>
      3 #include <stdlib.h>
      4 #include <string.h>
      5 #include "utils.h"
      6 
      7 static int
      8 ieq_n(const char* a, const char* b, size_t n)
      9 {
     10 	for (size_t i = 0; i < n; i++) {
     11 		unsigned char ca = (unsigned char)a[i];
     12 		unsigned char cb = (unsigned char)b[i];
     13 		if (tolower(ca) != tolower(cb)) return 0;
     14 		if (ca == '\0') return 1;
     15 	}
     16 	return 1;
     17 }
     18 
     19 int
     20 contains(const char* haystack, const char* needle)
     21 {
     22 	size_t nlen = strlen(needle);
     23 	if (nlen == 0) return 1;
     24 	for (const char* p = haystack; *p; p++) {
     25 		if (tolower((unsigned char)*p) == tolower((unsigned char)needle[0])) {
     26 			if (ieq_n(p, needle, nlen)) return 1;
     27 		}
     28 	}
     29 	return 0;
     30 }
     31 
     32 static const char*
     33 ext_to_type(const char* ext)
     34 {
     35 	// Documents
     36 	if (!strcmp(ext, "pdf")) return "pdf";
     37 	if (!strcmp(ext, "doc") || !strcmp(ext, "docx")) return "doc";
     38 	if (!strcmp(ext, "ppt") || !strcmp(ext, "pptx")) return "ppt";
     39 	if (!strcmp(ext, "xls") || !strcmp(ext, "xlsx")) return "xls";
     40 	if (!strcmp(ext, "txt")) return "txt";
     41 	if (!strcmp(ext, "rtf")) return "rtf";
     42 	if (!strcmp(ext, "csv")) return "csv";
     43 
     44 	// Images
     45 	if (!strcmp(ext, "jpg") || !strcmp(ext, "jpeg")) return "jpg";
     46 	if (!strcmp(ext, "png")) return "png";
     47 	if (!strcmp(ext, "gif")) return "gif";
     48 	if (!strcmp(ext, "webp")) return "webp";
     49 	if (!strcmp(ext, "heic") || !strcmp(ext, "heif")) return "heic";
     50 	if (!strcmp(ext, "bmp")) return "bmp";
     51 	if (!strcmp(ext, "tif") || !strcmp(ext, "tiff")) return "tif";
     52 	if (!strcmp(ext, "svg")) return "svg";
     53 
     54 	// Video (common on social platforms)
     55 	if (!strcmp(ext, "mp4")) return "mp4";
     56 	if (!strcmp(ext, "mov")) return "mov";
     57 	if (!strcmp(ext, "m4v")) return "m4v";
     58 	if (!strcmp(ext, "webm")) return "webm";
     59 	if (!strcmp(ext, "mkv")) return "mkv";
     60 	if (!strcmp(ext, "avi")) return "avi";
     61 
     62 	// Audio
     63 	if (!strcmp(ext, "mp3")) return "mp3";
     64 	if (!strcmp(ext, "m4a")) return "m4a";
     65 	if (!strcmp(ext, "aac")) return "aac";
     66 	if (!strcmp(ext, "wav")) return "wav";
     67 	if (!strcmp(ext, "ogg")) return "ogg";
     68 	if (!strcmp(ext, "flac")) return "flac";
     69 
     70 	// Archives (often shared)
     71 	if (!strcmp(ext, "zip")) return "zip";
     72 	if (!strcmp(ext, "rar")) return "rar";
     73 	if (!strcmp(ext, "7z")) return "7z";
     74 
     75 	return NULL;
     76 }
     77 
     78 const char*
     79 filetype_from_url(const char* url)
     80 {
     81 	if (!url) { return NULL; }
     82 
     83 	// Rule: any youtube.com in domain => "video"
     84 	// (Simple heuristic: look for "://...youtube.com" or "youtube.com" early-ish)
     85 	if (contains(url, "youtube.com")) return "video";
     86 	if (contains(url, "youtu.be")) return "video";
     87 	if (contains(url, "vimeo.com")) return "video";
     88 
     89 	// Find end of path segment (before ? or #)
     90 	const char* q = strchr(url, '?');
     91 	const char* h = strchr(url, '#');
     92 	const char* end = url + strlen(url);
     93 	if (q && q < end) end = q;
     94 	if (h && h < end) end = h;
     95 
     96 	// If URL ends with '/', there's no filename extension
     97 	if (end > url && end[-1] == '/') return NULL;
     98 
     99 	// Find last '.' in the last path segment
    100 	const char* slash = url;
    101 	for (const char* p = url; p < end; p++) {
    102 		if (*p == '/') slash = p + 1;
    103 	}
    104 	const char* dot = NULL;
    105 	for (const char* p = slash; p < end; p++) {
    106 		if (*p == '.') dot = p;
    107 	}
    108 	if (!dot || dot + 1 >= end) return NULL;
    109 
    110 	// Extract extension, lowercase it
    111 	size_t ext_len = (size_t)(end - (dot + 1));
    112 	if (ext_len == 0 || ext_len > 16) return NULL;
    113 
    114 	char ext[17];
    115 	for (size_t i = 0; i < ext_len; i++) {
    116 		ext[i] = (char)tolower((unsigned char)dot[1 + i]);
    117 	}
    118 	ext[ext_len] = '\0';
    119 
    120 	const char* type = ext_to_type(ext);
    121 	return type ? type : NULL;
    122 }