1 /*
   2  * Copyright (c) 2009-2020 Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
   3  * Copyright 2020, Kebe Software & Services
   4  *
   5  * Permission is hereby granted, free of charge, to any person obtaining a copy
   6  * of this software and associated documentation files (the "Software"), to deal
   7  * in the Software without restriction, including without limitation the rights
   8  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
   9  * copies of the Software, and to permit persons to whom the Software is
  10  * furnished to do so, subject to the following conditions:
  11  *
  12  * The above copyright notice and this permission notice shall be included in
  13  * all copies or substantial portions of the Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  18  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  21  * SOFTWARE.
  22  */
  23 
  24 #include <stdlib.h>
  25 #include <stddef.h>
  26 #include <stdio.h>
  27 #include <limits.h>
  28 #include <string.h>
  29 #include <sys/mman.h>
  30 #include <sys/types.h>
  31 #include <sys/stat.h>
  32 #include <unistd.h>
  33 #include <fcntl.h>
  34 #include <time.h>
  35 #include <dirent.h>
  36 
  37 #include <jeffpc/taskq.h>
  38 #include <jeffpc/error.h>
  39 #include <jeffpc/io.h>
  40 #include <jeffpc/mem.h>
  41 #include <jeffpc/file-cache.h>
  42 
  43 #include "post.h"
  44 #include "vars.h"
  45 #include "req.h"
  46 #include "parse.h"
  47 #include "utils.h"
  48 #include "debug.h"
  49 
  50 static struct mem_cache *post_cache;
  51 static struct mem_cache *comment_cache;
  52 
  53 static LOCK_CLASS(post_lc);
  54 
  55 static void post_remove_all_tags(struct rb_tree *taglist);
  56 static void post_remove_all_comments(struct post *post);
  57 
  58 static int tag_cmp(const void *va, const void *vb)
  59 {
  60         const struct post_tag *a = va;
  61         const struct post_tag *b = vb;
  62         int ret;
  63 
  64         ret = strcasecmp(str_cstr(a->tag), str_cstr(b->tag));
  65 
  66         if (ret < 0)
  67                 return -1;
  68         if (ret > 0)
  69                 return 1;
  70         return 0;
  71 }
  72 
  73 void init_post_subsys(void)
  74 {
  75         post_cache = mem_cache_create("post-cache", sizeof(struct post), 0);
  76         ASSERT(!IS_ERR(post_cache));
  77 
  78         comment_cache = mem_cache_create("comment-cache",
  79                                          sizeof(struct comment), 0);
  80         ASSERT(!IS_ERR(comment_cache));
  81 
  82         init_post_index();
  83 }
  84 
  85 struct str *post_get_cached_file(struct post *post, const char *path)
  86 {
  87         struct str *out;
  88         uint64_t rev;
  89         int err;
  90 
  91         out = file_cache_get(path, &rev);
  92         if (IS_ERR(out))
  93                 return out;
  94 
  95         err = nvl_set_int(post->files, path, rev);
  96         if (err) {
  97                 str_putref(out);
  98                 out = ERR_PTR(err);
  99         }
 100 
 101         return out;
 102 }
 103 
 104 static void post_remove_all_filenames(struct post *post)
 105 {
 106         const struct nvpair *pair;
 107 
 108         while ((pair = nvl_iter_start(post->files)) != NULL) {
 109                 struct str *name = nvpair_name_str(pair);
 110 
 111                 VERIFY0(nvl_unset(post->files, str_cstr(name)));
 112 
 113                 str_putref(name);
 114         }
 115 }
 116 
 117 /* consumes the struct val reference */
 118 static void post_add_tags(struct rb_tree *taglist, struct val *list)
 119 {
 120         struct val *tagval;
 121         struct val *tmp;
 122 
 123         sexpr_for_each_noref(tagval, tmp, list) {
 124                 struct post_tag *tag;
 125 
 126                 /* sanity check */
 127                 ASSERT3U(tagval->type, ==, VT_STR);
 128 
 129                 tag = malloc(sizeof(struct post_tag));
 130                 ASSERT(tag);
 131 
 132                 tag->tag = val_getref_str(tagval);
 133 
 134                 if (rb_insert(taglist, tag)) {
 135                         /* found a duplicate */
 136                         str_putref(tag->tag);
 137                         free(tag);
 138                 }
 139         }
 140 
 141         val_putref(list);
 142 }
 143 
 144 static void post_remove_all_comments(struct post *post)
 145 {
 146         struct comment *com;
 147 
 148         while ((com = list_remove_head(&post->comments))) {
 149                 str_putref(com->author);
 150                 str_putref(com->email);
 151                 str_putref(com->ip);
 152                 str_putref(com->url);
 153                 str_putref(com->body);
 154                 mem_cache_free(comment_cache, com);
 155         }
 156 
 157         post->numcom = 0;
 158 }
 159 
 160 static struct str *load_comment(struct post *post, int commid)
 161 {
 162         char path[FILENAME_MAX];
 163         struct str *out;
 164 
 165         snprintf(path, FILENAME_MAX, "%s/posts/%d/comments/%d/text.txt",
 166                  str_cstr(config.data_dir), post->id, commid);
 167 
 168         out = post_get_cached_file(post, path);
 169         if (IS_ERR(out))
 170                 out = STATIC_STR("Error: could not load comment text.");
 171 
 172         return out;
 173 }
 174 
 175 static void post_add_comment(struct post *post, int commid)
 176 {
 177         char path[FILENAME_MAX];
 178         struct comment *comm;
 179         struct str *meta;
 180         struct val *lv;
 181         struct val *v;
 182 
 183         snprintf(path, FILENAME_MAX, "%s/posts/%d/comments/%d/meta.lisp",
 184                  str_cstr(config.data_dir), post->id, commid);
 185 
 186         meta = post_get_cached_file(post, path);
 187         ASSERT(!IS_ERR(meta));
 188 
 189         lv = sexpr_parse_str(meta);
 190         ASSERT(!IS_ERR(lv));
 191 
 192         v = sexpr_cdr(sexpr_assoc(lv, "moderated"));
 193         if (!v || (v->type != VT_BOOL) || !v->b)
 194                 goto done;
 195 
 196         comm = mem_cache_alloc(comment_cache);
 197         ASSERT(comm);
 198 
 199         comm->id     = commid;
 200         comm->author = sexpr_alist_lookup_str(lv, "author");
 201         comm->email  = sexpr_alist_lookup_str(lv, "email");
 202         comm->time   = parse_time_str(sexpr_alist_lookup_str(lv, "time"));
 203         comm->ip     = sexpr_alist_lookup_str(lv, "ip");
 204         comm->url    = sexpr_alist_lookup_str(lv, "url");
 205         comm->body   = load_comment(post, comm->id);
 206 
 207         if (!comm->author)
 208                 comm->author = STATIC_STR("[unknown]");
 209 
 210         list_insert_tail(&post->comments, comm);
 211 
 212         post->numcom++;
 213 
 214 done:
 215         val_putref(v);
 216         val_putref(lv);
 217         str_putref(meta);
 218 }
 219 
 220 /* consumes the struct val reference */
 221 static void post_add_comments(struct post *post, struct val *list)
 222 {
 223         struct val *val;
 224         struct val *tmp;
 225 
 226         sexpr_for_each_noref(val, tmp, list) {
 227                 /* sanity check */
 228                 ASSERT3U(val->type, ==, VT_INT);
 229 
 230                 /* add the comment */
 231                 post_add_comment(post, val->i);
 232         }
 233 
 234         val_putref(list);
 235 }
 236 
 237 static int __do_load_post_body_fmt2(struct post *post, struct str *html)
 238 {
 239         str_putref(post->body); /* free the previous */
 240         post->body = str_getref(html);
 241         ASSERT(post->body);
 242 
 243         return 0;
 244 }
 245 
 246 static int
 247 __do_load_post_body_fmt4(struct post *post, const struct str *md)
 248 {
 249         /*
 250          * Like HTML, assume the .lisp file contains all of the other post
 251          * metadata.  If that assumption changes, update here.
 252          *
 253          * Unlike HTML, we plan to provide a clean error string if MD
 254          * parsing fails.
 255          */
 256         str_putref(post->body);      /* Free the previous text. */
 257         post->body = fmt4_md_to_html(md);
 258         if (post->body == NULL) {
 259                 /*
 260                  * XXX KEBE SAYS do something clever here with a small HTML
 261                  * string.  For now, panic.
 262                  */
 263                 ASSERT(post->body != NULL);
 264         }
 265 
 266         return (0);
 267 }
 268 
 269 static int __do_load_post_body_fmt3(struct post *post, const struct str *input)
 270 {
 271         struct parser_output x;
 272         int ret;
 273 
 274         x.req            = NULL;
 275         x.post           = post;
 276         x.input          = str_cstr(input);
 277         x.len            = str_len(input);
 278         x.pos            = 0;
 279         x.lineno         = 0;
 280         x.table_nesting  = 0;
 281         x.texttt_nesting = 0;
 282         x.sc_title       = NULL;
 283         x.sc_pub         = NULL;
 284         x.sc_tags        = NULL;
 285         x.sc_twitter_img = NULL;
 286 
 287         fmt3_lex_init(&x.scanner);
 288         fmt3_set_extra(&x, x.scanner);
 289 
 290         ret = fmt3_parse(&x);
 291         if (ret)
 292                 panic("failed to parse post id %u", post->id);
 293 
 294         fmt3_lex_destroy(x.scanner);
 295 
 296         /*
 297          * Now update struct post based on what we got from the .tex file.
 298          * The struct is already populated by data from the metadata file.
 299          * For the simple string values, we merely override whatever was
 300          * there.  For tags we use the union.
 301          */
 302 
 303         if (x.sc_title) {
 304                 str_putref(post->title);
 305                 post->title = str_getref(x.sc_title);
 306         }
 307 
 308         if (x.sc_pub)
 309                 post->time = parse_time_str(str_getref(x.sc_pub));
 310 
 311         if (x.sc_twitter_img) {
 312                 str_putref(post->twitter_img);
 313                 post->twitter_img = str_getref(x.sc_twitter_img);
 314         }
 315 
 316         post_add_tags(&post->tags, x.sc_tags);
 317 
 318         str_putref(x.sc_title);
 319         str_putref(x.sc_pub);
 320         str_putref(x.sc_twitter_img);
 321 
 322         str_putref(post->body); /* free the previous */
 323         post->body = x.stroutput;
 324         ASSERT(post->body);
 325 
 326         return 0;
 327 }
 328 
 329 static int __load_post_body(struct post *post)
 330 {
 331         static const char *exts[5] = {
 332                 [2] = "html",
 333                 [3] = "tex",
 334                 [4] = "md",
 335         };
 336 
 337         char path[FILENAME_MAX];
 338         struct str *raw;
 339         int ret;
 340 
 341         ASSERT3U(post->fmt, >=, 2);
 342         ASSERT3U(post->fmt, <=, 4);
 343 
 344         snprintf(path, FILENAME_MAX, "%s/posts/%d/post.%s",
 345                  str_cstr(config.data_dir), post->id, exts[post->fmt]);
 346 
 347         raw = post_get_cached_file(post, path);
 348         if (IS_ERR(raw))
 349                 return PTR_ERR(raw);
 350 
 351         switch (post->fmt) {
 352                 case 2:
 353                         ret = __do_load_post_body_fmt2(post, raw);
 354                         break;
 355                 case 3:
 356                         ret = __do_load_post_body_fmt3(post, raw);
 357                         break;
 358                 case 4:
 359                         ret = __do_load_post_body_fmt4(post, raw);
 360                         break;
 361         }
 362 
 363         str_putref(raw);
 364 
 365         return ret;
 366 }
 367 
 368 static void __refresh_published_prop(struct post *post, struct val *lv)
 369 {
 370         /* update the time */
 371         post->time = parse_time_str(sexpr_alist_lookup_str(lv, "time"));
 372 
 373         /* update the title */
 374         post->title = sexpr_alist_lookup_str(lv, "title");
 375 
 376         /* update the format */
 377         post->fmt = sexpr_alist_lookup_int(lv, "fmt", NULL);
 378 
 379         /* update the listed bool */
 380         post->listed = sexpr_alist_lookup_bool(lv, "listed", true, NULL);
 381 }
 382 
 383 static int __refresh_published(struct post *post)
 384 {
 385         char path[FILENAME_MAX];
 386         struct str *meta;
 387         struct val *lv;
 388 
 389         snprintf(path, FILENAME_MAX, "%s/posts/%d/post.lisp",
 390                  str_cstr(config.data_dir), post->id);
 391 
 392         meta = post_get_cached_file(post, path);
 393         if (IS_ERR(meta))
 394                 return PTR_ERR(meta);
 395 
 396         lv = sexpr_parse_str(meta);
 397         if (IS_ERR(lv)) {
 398                 str_putref(meta);
 399                 return PTR_ERR(lv);
 400         }
 401 
 402         __refresh_published_prop(post, lv);
 403 
 404         /* empty out the tags/comments lists */
 405         post_remove_all_tags(&post->tags);
 406         post_remove_all_comments(post);
 407 
 408         /* populate the tags/comments lists */
 409         post_add_tags(&post->tags, sexpr_alist_lookup_list(lv, "tags"));
 410         post_add_comments(post, sexpr_alist_lookup_list(lv, "comments"));
 411 
 412         val_putref(lv);
 413         str_putref(meta);
 414 
 415         return 0;
 416 }
 417 
 418 static bool must_refresh(struct post *post)
 419 {
 420         const struct nvpair *pair;
 421 
 422         if (post->preview)
 423                 return true; /* always refresh previews */
 424 
 425         if (nvl_iter_start(post->files) == NULL)
 426                 return true; /* no files means we have no idea what is needed */
 427 
 428         nvl_for_each(pair, post->files) {
 429                 struct str *name = nvpair_name_str(pair);
 430                 uint64_t file_rev;
 431 
 432                 ASSERT0(nvpair_value_int(pair, &file_rev));
 433 
 434                 if (!file_cache_has_newer(str_cstr(name), file_rev)) {
 435                         str_putref(name);
 436                         continue;
 437                 }
 438 
 439                 cmn_err(CE_DEBUG, "post %u needs a refresh "
 440                         "('%s' changed, old rev %"PRIu64")", post->id,
 441                         str_cstr(name), file_rev);
 442 
 443                 str_putref(name);
 444 
 445                 return true; /* no need to check oher files, we are refreshing */
 446         }
 447 
 448         return false;
 449 }
 450 
 451 int post_refresh(struct post *post)
 452 {
 453         int ret;
 454 
 455         if (!must_refresh(post))
 456                 return 0;
 457 
 458         post_remove_all_filenames(post);
 459 
 460         str_putref(post->title);
 461         post->title = NULL;
 462 
 463         if (post->preview) {
 464                 post->title = STATIC_STR("PREVIEW");
 465                 post->time  = time(NULL);
 466                 post->fmt   = 3;
 467         } else {
 468                 ret = __refresh_published(post);
 469                 if (ret)
 470                         return ret;
 471         }
 472 
 473         if ((ret = __load_post_body(post)))
 474                 return ret;
 475 
 476         /* No title set at all?  Set it to something non-NULL. */
 477         if (!post->title)
 478                 post->title = STATIC_STR("Untitled");
 479 
 480         return 0;
 481 }
 482 
 483 struct post *load_post(int postid, bool preview)
 484 {
 485         struct post *post;
 486         int err;
 487 
 488         /*
 489          * If it is *not* a preview, try to get it from the cache.
 490          */
 491         if (!preview) {
 492                 post = index_lookup_post(postid);
 493                 if (post)
 494                         return post;
 495         }
 496 
 497         post = mem_cache_alloc(post_cache);
 498         if (!post) {
 499                 err = -ENOMEM;
 500                 goto err;
 501         }
 502 
 503         memset(post, 0, sizeof(struct post));
 504 
 505         post->id = postid;
 506         post->title = NULL;
 507         post->body = NULL;
 508         post->numcom = 0;
 509         post->preview = preview;
 510 
 511         rb_create(&post->tags, tag_cmp, sizeof(struct post_tag),
 512                   offsetof(struct post_tag, node));
 513         list_create(&post->comments, sizeof(struct comment),
 514                     offsetof(struct comment, list));
 515         refcnt_init(&post->refcnt, 1);
 516         MXINIT(&post->lock, &post_lc);
 517 
 518         post->files = nvl_alloc();
 519         if (IS_ERR(post->files)) {
 520                 err = PTR_ERR(post->files);
 521                 post->files = NULL;
 522                 goto err_free;
 523         }
 524 
 525         if ((err = post_refresh(post)))
 526                 goto err_free;
 527 
 528         if (!post->preview)
 529                 ASSERT0(index_insert_post(post));
 530 
 531         return post;
 532 
 533 err_free:
 534         post_destroy(post);
 535 
 536 err:
 537         cmn_err(CE_ERROR, "Failed to load post id %u: %s", postid,
 538                 xstrerror(err));
 539         return NULL;
 540 }
 541 
 542 static void post_remove_all_tags(struct rb_tree *taglist)
 543 {
 544         struct post_tag *tag;
 545         struct rb_cookie cookie;
 546 
 547         memset(&cookie, 0, sizeof(cookie));
 548         while ((tag = rb_destroy_nodes(taglist, &cookie))) {
 549                 str_putref(tag->tag);
 550                 free(tag);
 551         }
 552 
 553         rb_create(taglist, tag_cmp, sizeof(struct post_tag),
 554                   offsetof(struct post_tag, node));
 555 }
 556 
 557 void post_destroy(struct post *post)
 558 {
 559         post_remove_all_tags(&post->tags);
 560         post_remove_all_comments(post);
 561 
 562         nvl_putref(post->files);
 563 
 564         str_putref(post->title);
 565         str_putref(post->body);
 566 
 567         MXDESTROY(&post->lock);
 568 
 569         mem_cache_free(post_cache, post);
 570 }
 571 
 572 static void __tq_load_post(void *arg)
 573 {
 574         int postid = (uintptr_t) arg;
 575 
 576         /* load the post, but then free it since we don't need it */
 577         post_putref(load_post(postid, false));
 578 }
 579 
 580 int load_all_posts(void)
 581 {
 582         const char *data_dir = str_cstr(config.data_dir);
 583         char path[FILENAME_MAX];
 584         struct stat statbuf;
 585         struct dirent *de;
 586         uint32_t postid;
 587         uint64_t start_ts, end_ts;
 588         unsigned nposts;
 589         struct taskq *tq;
 590         DIR *dir;
 591         int ret;
 592 
 593         snprintf(path, sizeof(path), "%s/posts", data_dir);
 594         dir = opendir(path);
 595         if (!dir)
 596                 return -errno;
 597 
 598         tq = taskq_create_fixed("load-all-posts", -1);
 599         if (IS_ERR(tq)) {
 600                 closedir(dir);
 601                 return PTR_ERR(tq);
 602         }
 603 
 604         nposts = 0;
 605         start_ts = gettime();
 606 
 607         while ((de = readdir(dir))) {
 608                 if (!strcmp(de->d_name, ".") ||
 609                     !strcmp(de->d_name, ".."))
 610                         continue;
 611 
 612                 ret = str2u32(de->d_name, &postid);
 613                 if (ret) {
 614                         cmn_err(CE_INFO, "skipping '%s/%s' - not a number",
 615                                 data_dir, de->d_name);
 616                         continue;
 617                 }
 618 
 619                 snprintf(path, FILENAME_MAX, "%s/posts/%u", data_dir, postid);
 620 
 621                 /* check that it is a directory */
 622                 ret = xlstat(path, &statbuf);
 623                 if (ret) {
 624                         cmn_err(CE_INFO, "skipping '%s' - failed to xlstat: %s",
 625                                 path, xstrerror(ret));
 626                         continue;
 627                 }
 628 
 629                 if (!S_ISDIR(statbuf.st_mode)) {
 630                         cmn_err(CE_INFO, "skipping '%s' - not a directory; "
 631                                 "mode = %o", path,
 632                                 (unsigned int) statbuf.st_mode);
 633                         continue;
 634                 }
 635 
 636                 /* load the post asynchronously */
 637                 if (taskq_dispatch(tq, __tq_load_post, (void *)(uintptr_t) postid))
 638                         __tq_load_post((void *)(uintptr_t) postid);
 639 
 640                 nposts++;
 641         }
 642 
 643         taskq_wait(tq);
 644         taskq_destroy(tq);
 645 
 646         end_ts = gettime();
 647 
 648         cmn_err(CE_INFO, "Loaded %u posts in %"PRIu64".%09"PRIu64" seconds",
 649                 nposts,
 650                 (end_ts - start_ts) / 1000000000UL,
 651                 (end_ts - start_ts) % 1000000000UL);
 652 
 653         closedir(dir);
 654 
 655         return 0;
 656 }