1 /*
2 * Copyright (c) 2009-2020 Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
3 * Copyright 2020, Kebe Software & Services
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a copy
6 * of this software and associated documentation files (the "Software"), to deal
7 * in the Software without restriction, including without limitation the rights
8 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 * copies of the Software, and to permit persons to whom the Software is
10 * furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 #include <stdlib.h>
25 #include <stddef.h>
26 #include <stdio.h>
27 #include <limits.h>
28 #include <string.h>
29 #include <sys/mman.h>
30 #include <sys/types.h>
31 #include <sys/stat.h>
32 #include <unistd.h>
33 #include <fcntl.h>
34 #include <time.h>
35 #include <dirent.h>
36
37 #include <jeffpc/taskq.h>
38 #include <jeffpc/error.h>
39 #include <jeffpc/io.h>
40 #include <jeffpc/mem.h>
41 #include <jeffpc/file-cache.h>
42
43 #include "post.h"
44 #include "vars.h"
45 #include "req.h"
46 #include "parse.h"
47 #include "utils.h"
48 #include "debug.h"
49
50 static struct mem_cache *post_cache;
51 static struct mem_cache *comment_cache;
52
53 static LOCK_CLASS(post_lc);
54
55 static void post_remove_all_tags(struct rb_tree *taglist);
56 static void post_remove_all_comments(struct post *post);
57
58 static int tag_cmp(const void *va, const void *vb)
59 {
60 const struct post_tag *a = va;
61 const struct post_tag *b = vb;
62 int ret;
63
64 ret = strcasecmp(str_cstr(a->tag), str_cstr(b->tag));
65
66 if (ret < 0)
67 return -1;
68 if (ret > 0)
69 return 1;
70 return 0;
71 }
72
73 void init_post_subsys(void)
74 {
75 post_cache = mem_cache_create("post-cache", sizeof(struct post), 0);
76 ASSERT(!IS_ERR(post_cache));
77
78 comment_cache = mem_cache_create("comment-cache",
79 sizeof(struct comment), 0);
80 ASSERT(!IS_ERR(comment_cache));
81
82 init_post_index();
83 }
84
85 struct str *post_get_cached_file(struct post *post, const char *path)
86 {
87 struct str *out;
88 uint64_t rev;
89 int err;
90
91 out = file_cache_get(path, &rev);
92 if (IS_ERR(out))
93 return out;
94
95 err = nvl_set_int(post->files, path, rev);
96 if (err) {
97 str_putref(out);
98 out = ERR_PTR(err);
99 }
100
101 return out;
102 }
103
104 static void post_remove_all_filenames(struct post *post)
105 {
106 const struct nvpair *pair;
107
108 while ((pair = nvl_iter_start(post->files)) != NULL) {
109 struct str *name = nvpair_name_str(pair);
110
111 VERIFY0(nvl_unset(post->files, str_cstr(name)));
112
113 str_putref(name);
114 }
115 }
116
117 /* consumes the struct val reference */
118 static void post_add_tags(struct rb_tree *taglist, struct val *list)
119 {
120 struct val *tagval;
121 struct val *tmp;
122
123 sexpr_for_each_noref(tagval, tmp, list) {
124 struct post_tag *tag;
125
126 /* sanity check */
127 ASSERT3U(tagval->type, ==, VT_STR);
128
129 tag = malloc(sizeof(struct post_tag));
130 ASSERT(tag);
131
132 tag->tag = val_getref_str(tagval);
133
134 if (rb_insert(taglist, tag)) {
135 /* found a duplicate */
136 str_putref(tag->tag);
137 free(tag);
138 }
139 }
140
141 val_putref(list);
142 }
143
144 static void post_remove_all_comments(struct post *post)
145 {
146 struct comment *com;
147
148 while ((com = list_remove_head(&post->comments))) {
149 str_putref(com->author);
150 str_putref(com->email);
151 str_putref(com->ip);
152 str_putref(com->url);
153 str_putref(com->body);
154 mem_cache_free(comment_cache, com);
155 }
156
157 post->numcom = 0;
158 }
159
160 static struct str *load_comment(struct post *post, int commid)
161 {
162 char path[FILENAME_MAX];
163 struct str *out;
164
165 snprintf(path, FILENAME_MAX, "%s/posts/%d/comments/%d/text.txt",
166 str_cstr(config.data_dir), post->id, commid);
167
168 out = post_get_cached_file(post, path);
169 if (IS_ERR(out))
170 out = STATIC_STR("Error: could not load comment text.");
171
172 return out;
173 }
174
175 static void post_add_comment(struct post *post, int commid)
176 {
177 char path[FILENAME_MAX];
178 struct comment *comm;
179 struct str *meta;
180 struct val *lv;
181 struct val *v;
182
183 snprintf(path, FILENAME_MAX, "%s/posts/%d/comments/%d/meta.lisp",
184 str_cstr(config.data_dir), post->id, commid);
185
186 meta = post_get_cached_file(post, path);
187 ASSERT(!IS_ERR(meta));
188
189 lv = sexpr_parse_str(meta);
190 ASSERT(!IS_ERR(lv));
191
192 v = sexpr_cdr(sexpr_assoc(lv, "moderated"));
193 if (!v || (v->type != VT_BOOL) || !v->b)
194 goto done;
195
196 comm = mem_cache_alloc(comment_cache);
197 ASSERT(comm);
198
199 comm->id = commid;
200 comm->author = sexpr_alist_lookup_str(lv, "author");
201 comm->email = sexpr_alist_lookup_str(lv, "email");
202 comm->time = parse_time_str(sexpr_alist_lookup_str(lv, "time"));
203 comm->ip = sexpr_alist_lookup_str(lv, "ip");
204 comm->url = sexpr_alist_lookup_str(lv, "url");
205 comm->body = load_comment(post, comm->id);
206
207 if (!comm->author)
208 comm->author = STATIC_STR("[unknown]");
209
210 list_insert_tail(&post->comments, comm);
211
212 post->numcom++;
213
214 done:
215 val_putref(v);
216 val_putref(lv);
217 str_putref(meta);
218 }
219
220 /* consumes the struct val reference */
221 static void post_add_comments(struct post *post, struct val *list)
222 {
223 struct val *val;
224 struct val *tmp;
225
226 sexpr_for_each_noref(val, tmp, list) {
227 /* sanity check */
228 ASSERT3U(val->type, ==, VT_INT);
229
230 /* add the comment */
231 post_add_comment(post, val->i);
232 }
233
234 val_putref(list);
235 }
236
237 static int __do_load_post_body_fmt2(struct post *post, struct str *html)
238 {
239 str_putref(post->body); /* free the previous */
240 post->body = str_getref(html);
241 ASSERT(post->body);
242
243 return 0;
244 }
245
246 static int
247 __do_load_post_body_fmt4(struct post *post, const struct str *md)
248 {
249 /*
250 * Like HTML, assume the .lisp file contains all of the other post
251 * metadata. If that assumption changes, update here.
252 *
253 * Unlike HTML, we plan to provide a clean error string if MD
254 * parsing fails.
255 */
256 str_putref(post->body); /* Free the previous text. */
257 post->body = fmt4_md_to_html(md);
258 if (post->body == NULL) {
259 /*
260 * XXX KEBE SAYS do something clever here with a small HTML
261 * string. For now, panic.
262 */
263 ASSERT(post->body != NULL);
264 }
265
266 return (0);
267 }
268
269 static int __do_load_post_body_fmt3(struct post *post, const struct str *input)
270 {
271 struct parser_output x;
272 int ret;
273
274 x.req = NULL;
275 x.post = post;
276 x.input = str_cstr(input);
277 x.len = str_len(input);
278 x.pos = 0;
279 x.lineno = 0;
280 x.table_nesting = 0;
281 x.texttt_nesting = 0;
282 x.sc_title = NULL;
283 x.sc_pub = NULL;
284 x.sc_tags = NULL;
285 x.sc_twitter_img = NULL;
286
287 fmt3_lex_init(&x.scanner);
288 fmt3_set_extra(&x, x.scanner);
289
290 ret = fmt3_parse(&x);
291 if (ret)
292 panic("failed to parse post id %u", post->id);
293
294 fmt3_lex_destroy(x.scanner);
295
296 /*
297 * Now update struct post based on what we got from the .tex file.
298 * The struct is already populated by data from the metadata file.
299 * For the simple string values, we merely override whatever was
300 * there. For tags we use the union.
301 */
302
303 if (x.sc_title) {
304 str_putref(post->title);
305 post->title = str_getref(x.sc_title);
306 }
307
308 if (x.sc_pub)
309 post->time = parse_time_str(str_getref(x.sc_pub));
310
311 if (x.sc_twitter_img) {
312 str_putref(post->twitter_img);
313 post->twitter_img = str_getref(x.sc_twitter_img);
314 }
315
316 post_add_tags(&post->tags, x.sc_tags);
317
318 str_putref(x.sc_title);
319 str_putref(x.sc_pub);
320 str_putref(x.sc_twitter_img);
321
322 str_putref(post->body); /* free the previous */
323 post->body = x.stroutput;
324 ASSERT(post->body);
325
326 return 0;
327 }
328
329 static int __load_post_body(struct post *post)
330 {
331 static const char *exts[5] = {
332 [2] = "html",
333 [3] = "tex",
334 [4] = "md",
335 };
336
337 char path[FILENAME_MAX];
338 struct str *raw;
339 int ret;
340
341 ASSERT3U(post->fmt, >=, 2);
342 ASSERT3U(post->fmt, <=, 4);
343
344 snprintf(path, FILENAME_MAX, "%s/posts/%d/post.%s",
345 str_cstr(config.data_dir), post->id, exts[post->fmt]);
346
347 raw = post_get_cached_file(post, path);
348 if (IS_ERR(raw))
349 return PTR_ERR(raw);
350
351 switch (post->fmt) {
352 case 2:
353 ret = __do_load_post_body_fmt2(post, raw);
354 break;
355 case 3:
356 ret = __do_load_post_body_fmt3(post, raw);
357 break;
358 case 4:
359 ret = __do_load_post_body_fmt4(post, raw);
360 break;
361 }
362
363 str_putref(raw);
364
365 return ret;
366 }
367
368 static void __refresh_published_prop(struct post *post, struct val *lv)
369 {
370 /* update the time */
371 post->time = parse_time_str(sexpr_alist_lookup_str(lv, "time"));
372
373 /* update the title */
374 post->title = sexpr_alist_lookup_str(lv, "title");
375
376 /* update the format */
377 post->fmt = sexpr_alist_lookup_int(lv, "fmt", NULL);
378
379 /* update the listed bool */
380 post->listed = sexpr_alist_lookup_bool(lv, "listed", true, NULL);
381 }
382
383 static int __refresh_published(struct post *post)
384 {
385 char path[FILENAME_MAX];
386 struct str *meta;
387 struct val *lv;
388
389 snprintf(path, FILENAME_MAX, "%s/posts/%d/post.lisp",
390 str_cstr(config.data_dir), post->id);
391
392 meta = post_get_cached_file(post, path);
393 if (IS_ERR(meta))
394 return PTR_ERR(meta);
395
396 lv = sexpr_parse_str(meta);
397 if (IS_ERR(lv)) {
398 str_putref(meta);
399 return PTR_ERR(lv);
400 }
401
402 __refresh_published_prop(post, lv);
403
404 /* empty out the tags/comments lists */
405 post_remove_all_tags(&post->tags);
406 post_remove_all_comments(post);
407
408 /* populate the tags/comments lists */
409 post_add_tags(&post->tags, sexpr_alist_lookup_list(lv, "tags"));
410 post_add_comments(post, sexpr_alist_lookup_list(lv, "comments"));
411
412 val_putref(lv);
413 str_putref(meta);
414
415 return 0;
416 }
417
418 static bool must_refresh(struct post *post)
419 {
420 const struct nvpair *pair;
421
422 if (post->preview)
423 return true; /* always refresh previews */
424
425 if (nvl_iter_start(post->files) == NULL)
426 return true; /* no files means we have no idea what is needed */
427
428 nvl_for_each(pair, post->files) {
429 struct str *name = nvpair_name_str(pair);
430 uint64_t file_rev;
431
432 ASSERT0(nvpair_value_int(pair, &file_rev));
433
434 if (!file_cache_has_newer(str_cstr(name), file_rev)) {
435 str_putref(name);
436 continue;
437 }
438
439 cmn_err(CE_DEBUG, "post %u needs a refresh "
440 "('%s' changed, old rev %"PRIu64")", post->id,
441 str_cstr(name), file_rev);
442
443 str_putref(name);
444
445 return true; /* no need to check oher files, we are refreshing */
446 }
447
448 return false;
449 }
450
451 int post_refresh(struct post *post)
452 {
453 int ret;
454
455 if (!must_refresh(post))
456 return 0;
457
458 post_remove_all_filenames(post);
459
460 str_putref(post->title);
461 post->title = NULL;
462
463 if (post->preview) {
464 post->title = STATIC_STR("PREVIEW");
465 post->time = time(NULL);
466 post->fmt = 3;
467 } else {
468 ret = __refresh_published(post);
469 if (ret)
470 return ret;
471 }
472
473 if ((ret = __load_post_body(post)))
474 return ret;
475
476 /* No title set at all? Set it to something non-NULL. */
477 if (!post->title)
478 post->title = STATIC_STR("Untitled");
479
480 return 0;
481 }
482
483 struct post *load_post(int postid, bool preview)
484 {
485 struct post *post;
486 int err;
487
488 /*
489 * If it is *not* a preview, try to get it from the cache.
490 */
491 if (!preview) {
492 post = index_lookup_post(postid);
493 if (post)
494 return post;
495 }
496
497 post = mem_cache_alloc(post_cache);
498 if (!post) {
499 err = -ENOMEM;
500 goto err;
501 }
502
503 memset(post, 0, sizeof(struct post));
504
505 post->id = postid;
506 post->title = NULL;
507 post->body = NULL;
508 post->numcom = 0;
509 post->preview = preview;
510
511 rb_create(&post->tags, tag_cmp, sizeof(struct post_tag),
512 offsetof(struct post_tag, node));
513 list_create(&post->comments, sizeof(struct comment),
514 offsetof(struct comment, list));
515 refcnt_init(&post->refcnt, 1);
516 MXINIT(&post->lock, &post_lc);
517
518 post->files = nvl_alloc();
519 if (IS_ERR(post->files)) {
520 err = PTR_ERR(post->files);
521 post->files = NULL;
522 goto err_free;
523 }
524
525 if ((err = post_refresh(post)))
526 goto err_free;
527
528 if (!post->preview)
529 ASSERT0(index_insert_post(post));
530
531 return post;
532
533 err_free:
534 post_destroy(post);
535
536 err:
537 cmn_err(CE_ERROR, "Failed to load post id %u: %s", postid,
538 xstrerror(err));
539 return NULL;
540 }
541
542 static void post_remove_all_tags(struct rb_tree *taglist)
543 {
544 struct post_tag *tag;
545 struct rb_cookie cookie;
546
547 memset(&cookie, 0, sizeof(cookie));
548 while ((tag = rb_destroy_nodes(taglist, &cookie))) {
549 str_putref(tag->tag);
550 free(tag);
551 }
552
553 rb_create(taglist, tag_cmp, sizeof(struct post_tag),
554 offsetof(struct post_tag, node));
555 }
556
557 void post_destroy(struct post *post)
558 {
559 post_remove_all_tags(&post->tags);
560 post_remove_all_comments(post);
561
562 nvl_putref(post->files);
563
564 str_putref(post->title);
565 str_putref(post->body);
566
567 MXDESTROY(&post->lock);
568
569 mem_cache_free(post_cache, post);
570 }
571
572 static void __tq_load_post(void *arg)
573 {
574 int postid = (uintptr_t) arg;
575
576 /* load the post, but then free it since we don't need it */
577 post_putref(load_post(postid, false));
578 }
579
580 int load_all_posts(void)
581 {
582 const char *data_dir = str_cstr(config.data_dir);
583 char path[FILENAME_MAX];
584 struct stat statbuf;
585 struct dirent *de;
586 uint32_t postid;
587 uint64_t start_ts, end_ts;
588 unsigned nposts;
589 struct taskq *tq;
590 DIR *dir;
591 int ret;
592
593 snprintf(path, sizeof(path), "%s/posts", data_dir);
594 dir = opendir(path);
595 if (!dir)
596 return -errno;
597
598 tq = taskq_create_fixed("load-all-posts", -1);
599 if (IS_ERR(tq)) {
600 closedir(dir);
601 return PTR_ERR(tq);
602 }
603
604 nposts = 0;
605 start_ts = gettime();
606
607 while ((de = readdir(dir))) {
608 if (!strcmp(de->d_name, ".") ||
609 !strcmp(de->d_name, ".."))
610 continue;
611
612 ret = str2u32(de->d_name, &postid);
613 if (ret) {
614 cmn_err(CE_INFO, "skipping '%s/%s' - not a number",
615 data_dir, de->d_name);
616 continue;
617 }
618
619 snprintf(path, FILENAME_MAX, "%s/posts/%u", data_dir, postid);
620
621 /* check that it is a directory */
622 ret = xlstat(path, &statbuf);
623 if (ret) {
624 cmn_err(CE_INFO, "skipping '%s' - failed to xlstat: %s",
625 path, xstrerror(ret));
626 continue;
627 }
628
629 if (!S_ISDIR(statbuf.st_mode)) {
630 cmn_err(CE_INFO, "skipping '%s' - not a directory; "
631 "mode = %o", path,
632 (unsigned int) statbuf.st_mode);
633 continue;
634 }
635
636 /* load the post asynchronously */
637 if (taskq_dispatch(tq, __tq_load_post, (void *)(uintptr_t) postid))
638 __tq_load_post((void *)(uintptr_t) postid);
639
640 nposts++;
641 }
642
643 taskq_wait(tq);
644 taskq_destroy(tq);
645
646 end_ts = gettime();
647
648 cmn_err(CE_INFO, "Loaded %u posts in %"PRIu64".%09"PRIu64" seconds",
649 nposts,
650 (end_ts - start_ts) / 1000000000UL,
651 (end_ts - start_ts) % 1000000000UL);
652
653 closedir(dir);
654
655 return 0;
656 }