detect.h (1116B)
1 /* See LICENSE file for copyright and license details. */ 2 3 #ifndef DETECT_H 4 #define DETECT_H 5 6 /* Detected site/CMS types */ 7 typedef enum { 8 SITE_UNKNOWN, 9 SITE_WORDPRESS, 10 SITE_BLOGGER, 11 SITE_HUGO, 12 SITE_JEKYLL, 13 SITE_GHOST, 14 SITE_DRUPAL, 15 SITE_MEDIAWIKI 16 } SiteType; 17 18 /* Detection result with hints for archiving */ 19 typedef struct { 20 SiteType type; 21 const char *name; /* human-readable CMS name */ 22 char *feed_url; /* RSS/Atom feed URL if found */ 23 char *api_url; /* REST API base if found */ 24 char *sitemap_url; /* sitemap.xml URL if found */ 25 int has_json_api; /* site has a JSON API */ 26 } SiteInfo; 27 28 /* Detect CMS type from HTML and URL */ 29 SiteInfo *detect_site(const char *html, const char *url); 30 31 /* Free detection result */ 32 void siteinfo_free(SiteInfo *info); 33 34 /* Get sitemap URLs for a detected site */ 35 char **detect_sitemap_urls(SiteInfo *info, const char *domain, 36 int *count); 37 38 /* Get additional seed URLs based on CMS type */ 39 char **detect_seed_urls(SiteInfo *info, const char *domain, 40 int *count); 41 42 #endif /* DETECT_H */