1 /*
   2  * Copyright (c) 2009-2020 Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a copy
   5  * of this software and associated documentation files (the "Software"), to deal
   6  * in the Software without restriction, including without limitation the rights
   7  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
   8  * copies of the Software, and to permit persons to whom the Software is
   9  * furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice shall be included in
  12  * all copies or substantial portions of the Software.
  13  *
  14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  17  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  19  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  20  * SOFTWARE.
  21  */
  22 
  23 #include <stdlib.h>
  24 #include <stddef.h>
  25 #include <stdio.h>
  26 #include <limits.h>
  27 #include <string.h>
  28 #include <sys/mman.h>
  29 #include <sys/types.h>
  30 #include <sys/stat.h>
  31 #include <unistd.h>
  32 #include <fcntl.h>
  33 #include <time.h>
  34 #include <dirent.h>
  35 
  36 #include <jeffpc/taskq.h>
  37 #include <jeffpc/error.h>
  38 #include <jeffpc/io.h>
  39 #include <jeffpc/mem.h>
  40 #include <jeffpc/file-cache.h>
  41 
  42 #include "post.h"
  43 #include "vars.h"
  44 #include "req.h"
  45 #include "parse.h"
  46 #include "utils.h"
  47 #include "debug.h"
  48 
  49 static struct mem_cache *post_cache;
  50 static struct mem_cache *comment_cache;
  51 
  52 static LOCK_CLASS(post_lc);
  53 
  54 static void post_remove_all_tags(struct rb_tree *taglist);
  55 static void post_remove_all_comments(struct post *post);
  56 
  57 static int tag_cmp(const void *va, const void *vb)
  58 {
  59         const struct post_tag *a = va;
  60         const struct post_tag *b = vb;
  61         int ret;
  62 
  63         ret = strcasecmp(str_cstr(a->tag), str_cstr(b->tag));
  64 
  65         if (ret < 0)
  66                 return -1;
  67         if (ret > 0)
  68                 return 1;
  69         return 0;
  70 }
  71 
  72 void init_post_subsys(void)
  73 {
  74         post_cache = mem_cache_create("post-cache", sizeof(struct post), 0);
  75         ASSERT(!IS_ERR(post_cache));
  76 
  77         comment_cache = mem_cache_create("comment-cache",
  78                                          sizeof(struct comment), 0);
  79         ASSERT(!IS_ERR(comment_cache));
  80 
  81         init_post_index();
  82 }
  83 
  84 struct str *post_get_cached_file(struct post *post, const char *path)
  85 {
  86         struct str *out;
  87         uint64_t rev;
  88         int err;
  89 
  90         out = file_cache_get(path, &rev);
  91         if (IS_ERR(out))
  92                 return out;
  93 
  94         err = nvl_set_int(post->files, path, rev);
  95         if (err) {
  96                 str_putref(out);
  97                 out = ERR_PTR(err);
  98         }
  99 
 100         return out;
 101 }
 102 
 103 static void post_remove_all_filenames(struct post *post)
 104 {
 105         const struct nvpair *pair;
 106 
 107         while ((pair = nvl_iter_start(post->files)) != NULL) {
 108                 struct str *name = nvpair_name_str(pair);
 109 
 110                 VERIFY0(nvl_unset(post->files, str_cstr(name)));
 111 
 112                 str_putref(name);
 113         }
 114 }
 115 
 116 /* consumes the struct val reference */
 117 static void post_add_tags(struct rb_tree *taglist, struct val *list)
 118 {
 119         struct val *tagval;
 120         struct val *tmp;
 121 
 122         sexpr_for_each_noref(tagval, tmp, list) {
 123                 struct post_tag *tag;
 124 
 125                 /* sanity check */
 126                 ASSERT3U(tagval->type, ==, VT_STR);
 127 
 128                 tag = malloc(sizeof(struct post_tag));
 129                 ASSERT(tag);
 130 
 131                 tag->tag = val_getref_str(tagval);
 132 
 133                 if (rb_insert(taglist, tag)) {
 134                         /* found a duplicate */
 135                         str_putref(tag->tag);
 136                         free(tag);
 137                 }
 138         }
 139 
 140         val_putref(list);
 141 }
 142 
 143 static void post_remove_all_comments(struct post *post)
 144 {
 145         struct comment *com;
 146 
 147         while ((com = list_remove_head(&post->comments))) {
 148                 str_putref(com->author);
 149                 str_putref(com->email);
 150                 str_putref(com->ip);
 151                 str_putref(com->url);
 152                 str_putref(com->body);
 153                 mem_cache_free(comment_cache, com);
 154         }
 155 
 156         post->numcom = 0;
 157 }
 158 
 159 static struct str *load_comment(struct post *post, int commid)
 160 {
 161         char path[FILENAME_MAX];
 162         struct str *out;
 163 
 164         snprintf(path, FILENAME_MAX, "%s/posts/%d/comments/%d/text.txt",
 165                  str_cstr(config.data_dir), post->id, commid);
 166 
 167         out = post_get_cached_file(post, path);
 168         if (IS_ERR(out))
 169                 out = STATIC_STR("Error: could not load comment text.");
 170 
 171         return out;
 172 }
 173 
 174 static void post_add_comment(struct post *post, int commid)
 175 {
 176         char path[FILENAME_MAX];
 177         struct comment *comm;
 178         struct str *meta;
 179         struct val *lv;
 180         struct val *v;
 181 
 182         snprintf(path, FILENAME_MAX, "%s/posts/%d/comments/%d/meta.lisp",
 183                  str_cstr(config.data_dir), post->id, commid);
 184 
 185         meta = post_get_cached_file(post, path);
 186         ASSERT(!IS_ERR(meta));
 187 
 188         lv = sexpr_parse_str(meta);
 189         ASSERT(!IS_ERR(lv));
 190 
 191         v = sexpr_cdr(sexpr_assoc(lv, "moderated"));
 192         if (!v || (v->type != VT_BOOL) || !v->b)
 193                 goto done;
 194 
 195         comm = mem_cache_alloc(comment_cache);
 196         ASSERT(comm);
 197 
 198         comm->id     = commid;
 199         comm->author = sexpr_alist_lookup_str(lv, "author");
 200         comm->email  = sexpr_alist_lookup_str(lv, "email");
 201         comm->time   = parse_time_str(sexpr_alist_lookup_str(lv, "time"));
 202         comm->ip     = sexpr_alist_lookup_str(lv, "ip");
 203         comm->url    = sexpr_alist_lookup_str(lv, "url");
 204         comm->body   = load_comment(post, comm->id);
 205 
 206         if (!comm->author)
 207                 comm->author = STATIC_STR("[unknown]");
 208 
 209         list_insert_tail(&post->comments, comm);
 210 
 211         post->numcom++;
 212 
 213 done:
 214         val_putref(v);
 215         val_putref(lv);
 216         str_putref(meta);
 217 }
 218 
 219 /* consumes the struct val reference */
 220 static void post_add_comments(struct post *post, struct val *list)
 221 {
 222         struct val *val;
 223         struct val *tmp;
 224 
 225         sexpr_for_each_noref(val, tmp, list) {
 226                 /* sanity check */
 227                 ASSERT3U(val->type, ==, VT_INT);
 228 
 229                 /* add the comment */
 230                 post_add_comment(post, val->i);
 231         }
 232 
 233         val_putref(list);
 234 }
 235 
 236 static int __do_load_post_body_fmt2(struct post *post, struct str *html)
 237 {
 238         str_putref(post->body); /* free the previous */
 239         post->body = str_getref(html);
 240         ASSERT(post->body);
 241 
 242         return 0;
 243 }
 244 
 245 static int __do_load_post_body_fmt3(struct post *post, const struct str *input)
 246 {
 247         struct parser_output x;
 248         int ret;
 249 
 250         x.req            = NULL;
 251         x.post           = post;
 252         x.input          = str_cstr(input);
 253         x.len            = str_len(input);
 254         x.pos            = 0;
 255         x.lineno         = 0;
 256         x.table_nesting  = 0;
 257         x.texttt_nesting = 0;
 258         x.sc_title       = NULL;
 259         x.sc_pub         = NULL;
 260         x.sc_tags        = NULL;
 261         x.sc_twitter_img = NULL;
 262 
 263         fmt3_lex_init(&x.scanner);
 264         fmt3_set_extra(&x, x.scanner);
 265 
 266         ret = fmt3_parse(&x);
 267         if (ret)
 268                 panic("failed to parse post id %u", post->id);
 269 
 270         fmt3_lex_destroy(x.scanner);
 271 
 272         /*
 273          * Now update struct post based on what we got from the .tex file.
 274          * The struct is already populated by data from the metadata file.
 275          * For the simple string values, we merely override whatever was
 276          * there.  For tags we use the union.
 277          */
 278 
 279         if (x.sc_title) {
 280                 str_putref(post->title);
 281                 post->title = str_getref(x.sc_title);
 282         }
 283 
 284         if (x.sc_pub)
 285                 post->time = parse_time_str(str_getref(x.sc_pub));
 286 
 287         if (x.sc_twitter_img) {
 288                 str_putref(post->twitter_img);
 289                 post->twitter_img = str_getref(x.sc_twitter_img);
 290         }
 291 
 292         post_add_tags(&post->tags, x.sc_tags);
 293 
 294         str_putref(x.sc_title);
 295         str_putref(x.sc_pub);
 296         str_putref(x.sc_twitter_img);
 297 
 298         str_putref(post->body); /* free the previous */
 299         post->body = x.stroutput;
 300         ASSERT(post->body);
 301 
 302         return 0;
 303 }
 304 
 305 static int __load_post_body(struct post *post)
 306 {
 307         static const char *exts[4] = {
 308                 [2] = "html",
 309                 [3] = "tex",
 310         };
 311 
 312         char path[FILENAME_MAX];
 313         struct str *raw;
 314         int ret;
 315 
 316         ASSERT3U(post->fmt, >=, 2);
 317         ASSERT3U(post->fmt, <=, 3);
 318 
 319         snprintf(path, FILENAME_MAX, "%s/posts/%d/post.%s",
 320                  str_cstr(config.data_dir), post->id, exts[post->fmt]);
 321 
 322         raw = post_get_cached_file(post, path);
 323         if (IS_ERR(raw))
 324                 return PTR_ERR(raw);
 325 
 326         switch (post->fmt) {
 327                 case 2:
 328                         ret = __do_load_post_body_fmt2(post, raw);
 329                         break;
 330                 case 3:
 331                         ret = __do_load_post_body_fmt3(post, raw);
 332                         break;
 333         }
 334 
 335         str_putref(raw);
 336 
 337         return ret;
 338 }
 339 
 340 static void __refresh_published_prop(struct post *post, struct val *lv)
 341 {
 342         /* update the time */
 343         post->time = parse_time_str(sexpr_alist_lookup_str(lv, "time"));
 344 
 345         /* update the title */
 346         post->title = sexpr_alist_lookup_str(lv, "title");
 347 
 348         /* update the format */
 349         post->fmt = sexpr_alist_lookup_int(lv, "fmt", NULL);
 350 
 351         /* update the listed bool */
 352         post->listed = sexpr_alist_lookup_bool(lv, "listed", true, NULL);
 353 }
 354 
 355 static int __refresh_published(struct post *post)
 356 {
 357         char path[FILENAME_MAX];
 358         struct str *meta;
 359         struct val *lv;
 360 
 361         snprintf(path, FILENAME_MAX, "%s/posts/%d/post.lisp",
 362                  str_cstr(config.data_dir), post->id);
 363 
 364         meta = post_get_cached_file(post, path);
 365         if (IS_ERR(meta))
 366                 return PTR_ERR(meta);
 367 
 368         lv = sexpr_parse_str(meta);
 369         if (IS_ERR(lv)) {
 370                 str_putref(meta);
 371                 return PTR_ERR(lv);
 372         }
 373 
 374         __refresh_published_prop(post, lv);
 375 
 376         /* empty out the tags/comments lists */
 377         post_remove_all_tags(&post->tags);
 378         post_remove_all_comments(post);
 379 
 380         /* populate the tags/comments lists */
 381         post_add_tags(&post->tags, sexpr_alist_lookup_list(lv, "tags"));
 382         post_add_comments(post, sexpr_alist_lookup_list(lv, "comments"));
 383 
 384         val_putref(lv);
 385         str_putref(meta);
 386 
 387         return 0;
 388 }
 389 
 390 static bool must_refresh(struct post *post)
 391 {
 392         const struct nvpair *pair;
 393 
 394         if (post->preview)
 395                 return true; /* always refresh previews */
 396 
 397         if (nvl_iter_start(post->files) == NULL)
 398                 return true; /* no files means we have no idea what is needed */
 399 
 400         nvl_for_each(pair, post->files) {
 401                 struct str *name = nvpair_name_str(pair);
 402                 uint64_t file_rev;
 403 
 404                 ASSERT0(nvpair_value_int(pair, &file_rev));
 405 
 406                 if (!file_cache_has_newer(str_cstr(name), file_rev)) {
 407                         str_putref(name);
 408                         continue;
 409                 }
 410 
 411                 cmn_err(CE_DEBUG, "post %u needs a refresh "
 412                         "('%s' changed, old rev %"PRIu64")", post->id,
 413                         str_cstr(name), file_rev);
 414 
 415                 str_putref(name);
 416 
 417                 return true; /* no need to check oher files, we are refreshing */
 418         }
 419 
 420         return false;
 421 }
 422 
 423 int post_refresh(struct post *post)
 424 {
 425         int ret;
 426 
 427         if (!must_refresh(post))
 428                 return 0;
 429 
 430         post_remove_all_filenames(post);
 431 
 432         str_putref(post->title);
 433         post->title = NULL;
 434 
 435         if (post->preview) {
 436                 post->title = STATIC_STR("PREVIEW");
 437                 post->time  = time(NULL);
 438                 post->fmt   = 3;
 439         } else {
 440                 ret = __refresh_published(post);
 441                 if (ret)
 442                         return ret;
 443         }
 444 
 445         if ((ret = __load_post_body(post)))
 446                 return ret;
 447 
 448         /* No title set at all?  Set it to something non-NULL. */
 449         if (!post->title)
 450                 post->title = STATIC_STR("Untitled");
 451 
 452         return 0;
 453 }
 454 
 455 struct post *load_post(int postid, bool preview)
 456 {
 457         struct post *post;
 458         int err;
 459 
 460         /*
 461          * If it is *not* a preview, try to get it from the cache.
 462          */
 463         if (!preview) {
 464                 post = index_lookup_post(postid);
 465                 if (post)
 466                         return post;
 467         }
 468 
 469         post = mem_cache_alloc(post_cache);
 470         if (!post) {
 471                 err = -ENOMEM;
 472                 goto err;
 473         }
 474 
 475         memset(post, 0, sizeof(struct post));
 476 
 477         post->id = postid;
 478         post->title = NULL;
 479         post->body = NULL;
 480         post->numcom = 0;
 481         post->preview = preview;
 482 
 483         rb_create(&post->tags, tag_cmp, sizeof(struct post_tag),
 484                   offsetof(struct post_tag, node));
 485         list_create(&post->comments, sizeof(struct comment),
 486                     offsetof(struct comment, list));
 487         refcnt_init(&post->refcnt, 1);
 488         MXINIT(&post->lock, &post_lc);
 489 
 490         post->files = nvl_alloc();
 491         if (IS_ERR(post->files)) {
 492                 err = PTR_ERR(post->files);
 493                 post->files = NULL;
 494                 goto err_free;
 495         }
 496 
 497         if ((err = post_refresh(post)))
 498                 goto err_free;
 499 
 500         if (!post->preview)
 501                 ASSERT0(index_insert_post(post));
 502 
 503         return post;
 504 
 505 err_free:
 506         post_destroy(post);
 507 
 508 err:
 509         cmn_err(CE_ERROR, "Failed to load post id %u: %s", postid,
 510                 xstrerror(err));
 511         return NULL;
 512 }
 513 
 514 static void post_remove_all_tags(struct rb_tree *taglist)
 515 {
 516         struct post_tag *tag;
 517         struct rb_cookie cookie;
 518 
 519         memset(&cookie, 0, sizeof(cookie));
 520         while ((tag = rb_destroy_nodes(taglist, &cookie))) {
 521                 str_putref(tag->tag);
 522                 free(tag);
 523         }
 524 
 525         rb_create(taglist, tag_cmp, sizeof(struct post_tag),
 526                   offsetof(struct post_tag, node));
 527 }
 528 
 529 void post_destroy(struct post *post)
 530 {
 531         post_remove_all_tags(&post->tags);
 532         post_remove_all_comments(post);
 533 
 534         nvl_putref(post->files);
 535 
 536         str_putref(post->title);
 537         str_putref(post->body);
 538 
 539         MXDESTROY(&post->lock);
 540 
 541         mem_cache_free(post_cache, post);
 542 }
 543 
 544 static void __tq_load_post(void *arg)
 545 {
 546         int postid = (uintptr_t) arg;
 547 
 548         /* load the post, but then free it since we don't need it */
 549         post_putref(load_post(postid, false));
 550 }
 551 
 552 int load_all_posts(void)
 553 {
 554         const char *data_dir = str_cstr(config.data_dir);
 555         char path[FILENAME_MAX];
 556         struct stat statbuf;
 557         struct dirent *de;
 558         uint32_t postid;
 559         uint64_t start_ts, end_ts;
 560         unsigned nposts;
 561         struct taskq *tq;
 562         DIR *dir;
 563         int ret;
 564 
 565         snprintf(path, sizeof(path), "%s/posts", data_dir);
 566         dir = opendir(path);
 567         if (!dir)
 568                 return -errno;
 569 
 570         tq = taskq_create_fixed("load-all-posts", -1);
 571         if (IS_ERR(tq)) {
 572                 closedir(dir);
 573                 return PTR_ERR(tq);
 574         }
 575 
 576         nposts = 0;
 577         start_ts = gettime();
 578 
 579         while ((de = readdir(dir))) {
 580                 if (!strcmp(de->d_name, ".") ||
 581                     !strcmp(de->d_name, ".."))
 582                         continue;
 583 
 584                 ret = str2u32(de->d_name, &postid);
 585                 if (ret) {
 586                         cmn_err(CE_INFO, "skipping '%s/%s' - not a number",
 587                                 data_dir, de->d_name);
 588                         continue;
 589                 }
 590 
 591                 snprintf(path, FILENAME_MAX, "%s/posts/%u", data_dir, postid);
 592 
 593                 /* check that it is a directory */
 594                 ret = xlstat(path, &statbuf);
 595                 if (ret) {
 596                         cmn_err(CE_INFO, "skipping '%s' - failed to xlstat: %s",
 597                                 path, xstrerror(ret));
 598                         continue;
 599                 }
 600 
 601                 if (!S_ISDIR(statbuf.st_mode)) {
 602                         cmn_err(CE_INFO, "skipping '%s' - not a directory; "
 603                                 "mode = %o", path,
 604                                 (unsigned int) statbuf.st_mode);
 605                         continue;
 606                 }
 607 
 608                 /* load the post asynchronously */
 609                 if (taskq_dispatch(tq, __tq_load_post, (void *)(uintptr_t) postid))
 610                         __tq_load_post((void *)(uintptr_t) postid);
 611 
 612                 nposts++;
 613         }
 614 
 615         taskq_wait(tq);
 616         taskq_destroy(tq);
 617 
 618         end_ts = gettime();
 619 
 620         cmn_err(CE_INFO, "Loaded %u posts in %"PRIu64".%09"PRIu64" seconds",
 621                 nposts,
 622                 (end_ts - start_ts) / 1000000000UL,
 623                 (end_ts - start_ts) % 1000000000UL);
 624 
 625         closedir(dir);
 626 
 627         return 0;
 628 }