sbot

Simple web archiver — self-contained GWTAR archives
git clone git clone https://git.krisyotam.com/krisyotam/sbot.git
Log | Files | Refs | README | LICENSE

robots.h (754B)


      1 /* See LICENSE file for copyright and license details. */
      2 
      3 #ifndef ROBOTS_H
      4 #define ROBOTS_H
      5 
      6 /* Maximum rules per robots.txt */
      7 #define MAX_RULES 512
      8 
      9 /* Rule types */
     10 typedef struct {
     11 	char *path;
     12 	int allow; /* 1 = allow, 0 = disallow */
     13 } RobotsRule;
     14 
     15 /* Parsed robots.txt for a domain */
     16 typedef struct {
     17 	char *domain;
     18 	RobotsRule rules[MAX_RULES];
     19 	int nrules;
     20 	int crawl_delay; /* seconds, 0 = none specified */
     21 } Robots;
     22 
     23 /* Fetch and parse robots.txt for a domain */
     24 Robots *robots_fetch(const char *domain);
     25 
     26 /* Check if a path is allowed */
     27 int robots_allowed(Robots *r, const char *path);
     28 
     29 /* Get crawl delay in seconds (0 = none) */
     30 int robots_delay(Robots *r);
     31 
     32 /* Free robots struct */
     33 void robots_free(Robots *r);
     34 
     35 #endif /* ROBOTS_H */