1 /*
2 * Copyright (c) 2009-2020 Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a copy
5 * of this software and associated documentation files (the "Software"), to deal
6 * in the Software without restriction, including without limitation the rights
7 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 * copies of the Software, and to permit persons to whom the Software is
9 * furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 * SOFTWARE.
21 */
22
23 #include <stdlib.h>
24 #include <stddef.h>
25 #include <stdio.h>
26 #include <limits.h>
27 #include <string.h>
28 #include <sys/mman.h>
29 #include <sys/types.h>
30 #include <sys/stat.h>
31 #include <unistd.h>
32 #include <fcntl.h>
33 #include <time.h>
34 #include <dirent.h>
35
36 #include <jeffpc/taskq.h>
37 #include <jeffpc/error.h>
38 #include <jeffpc/io.h>
39 #include <jeffpc/mem.h>
40 #include <jeffpc/file-cache.h>
41
42 #include "post.h"
43 #include "vars.h"
44 #include "req.h"
45 #include "parse.h"
46 #include "utils.h"
47 #include "debug.h"
48
49 static struct mem_cache *post_cache;
50 static struct mem_cache *comment_cache;
51
52 static LOCK_CLASS(post_lc);
53
54 static void post_remove_all_tags(struct rb_tree *taglist);
55 static void post_remove_all_comments(struct post *post);
56
57 static int tag_cmp(const void *va, const void *vb)
58 {
59 const struct post_tag *a = va;
60 const struct post_tag *b = vb;
61 int ret;
62
63 ret = strcasecmp(str_cstr(a->tag), str_cstr(b->tag));
64
65 if (ret < 0)
66 return -1;
67 if (ret > 0)
68 return 1;
69 return 0;
70 }
71
72 void init_post_subsys(void)
73 {
74 post_cache = mem_cache_create("post-cache", sizeof(struct post), 0);
75 ASSERT(!IS_ERR(post_cache));
76
77 comment_cache = mem_cache_create("comment-cache",
78 sizeof(struct comment), 0);
79 ASSERT(!IS_ERR(comment_cache));
80
81 init_post_index();
82 }
83
84 struct str *post_get_cached_file(struct post *post, const char *path)
85 {
86 struct str *out;
87 uint64_t rev;
88 int err;
89
90 out = file_cache_get(path, &rev);
91 if (IS_ERR(out))
92 return out;
93
94 err = nvl_set_int(post->files, path, rev);
95 if (err) {
96 str_putref(out);
97 out = ERR_PTR(err);
98 }
99
100 return out;
101 }
102
103 static void post_remove_all_filenames(struct post *post)
104 {
105 const struct nvpair *pair;
106
107 while ((pair = nvl_iter_start(post->files)) != NULL) {
108 struct str *name = nvpair_name_str(pair);
109
110 VERIFY0(nvl_unset(post->files, str_cstr(name)));
111
112 str_putref(name);
113 }
114 }
115
116 /* consumes the struct val reference */
117 static void post_add_tags(struct rb_tree *taglist, struct val *list)
118 {
119 struct val *tagval;
120 struct val *tmp;
121
122 sexpr_for_each_noref(tagval, tmp, list) {
123 struct post_tag *tag;
124
125 /* sanity check */
126 ASSERT3U(tagval->type, ==, VT_STR);
127
128 tag = malloc(sizeof(struct post_tag));
129 ASSERT(tag);
130
131 tag->tag = val_getref_str(tagval);
132
133 if (rb_insert(taglist, tag)) {
134 /* found a duplicate */
135 str_putref(tag->tag);
136 free(tag);
137 }
138 }
139
140 val_putref(list);
141 }
142
143 static void post_remove_all_comments(struct post *post)
144 {
145 struct comment *com;
146
147 while ((com = list_remove_head(&post->comments))) {
148 str_putref(com->author);
149 str_putref(com->email);
150 str_putref(com->ip);
151 str_putref(com->url);
152 str_putref(com->body);
153 mem_cache_free(comment_cache, com);
154 }
155
156 post->numcom = 0;
157 }
158
159 static struct str *load_comment(struct post *post, int commid)
160 {
161 char path[FILENAME_MAX];
162 struct str *out;
163
164 snprintf(path, FILENAME_MAX, "%s/posts/%d/comments/%d/text.txt",
165 str_cstr(config.data_dir), post->id, commid);
166
167 out = post_get_cached_file(post, path);
168 if (IS_ERR(out))
169 out = STATIC_STR("Error: could not load comment text.");
170
171 return out;
172 }
173
174 static void post_add_comment(struct post *post, int commid)
175 {
176 char path[FILENAME_MAX];
177 struct comment *comm;
178 struct str *meta;
179 struct val *lv;
180 struct val *v;
181
182 snprintf(path, FILENAME_MAX, "%s/posts/%d/comments/%d/meta.lisp",
183 str_cstr(config.data_dir), post->id, commid);
184
185 meta = post_get_cached_file(post, path);
186 ASSERT(!IS_ERR(meta));
187
188 lv = sexpr_parse_str(meta);
189 ASSERT(!IS_ERR(lv));
190
191 v = sexpr_cdr(sexpr_assoc(lv, "moderated"));
192 if (!v || (v->type != VT_BOOL) || !v->b)
193 goto done;
194
195 comm = mem_cache_alloc(comment_cache);
196 ASSERT(comm);
197
198 comm->id = commid;
199 comm->author = sexpr_alist_lookup_str(lv, "author");
200 comm->email = sexpr_alist_lookup_str(lv, "email");
201 comm->time = parse_time_str(sexpr_alist_lookup_str(lv, "time"));
202 comm->ip = sexpr_alist_lookup_str(lv, "ip");
203 comm->url = sexpr_alist_lookup_str(lv, "url");
204 comm->body = load_comment(post, comm->id);
205
206 if (!comm->author)
207 comm->author = STATIC_STR("[unknown]");
208
209 list_insert_tail(&post->comments, comm);
210
211 post->numcom++;
212
213 done:
214 val_putref(v);
215 val_putref(lv);
216 str_putref(meta);
217 }
218
219 /* consumes the struct val reference */
220 static void post_add_comments(struct post *post, struct val *list)
221 {
222 struct val *val;
223 struct val *tmp;
224
225 sexpr_for_each_noref(val, tmp, list) {
226 /* sanity check */
227 ASSERT3U(val->type, ==, VT_INT);
228
229 /* add the comment */
230 post_add_comment(post, val->i);
231 }
232
233 val_putref(list);
234 }
235
236 static int __do_load_post_body_fmt2(struct post *post, struct str *html)
237 {
238 str_putref(post->body); /* free the previous */
239 post->body = str_getref(html);
240 ASSERT(post->body);
241
242 return 0;
243 }
244
245 static int __do_load_post_body_fmt3(struct post *post, const struct str *input)
246 {
247 struct parser_output x;
248 int ret;
249
250 x.req = NULL;
251 x.post = post;
252 x.input = str_cstr(input);
253 x.len = str_len(input);
254 x.pos = 0;
255 x.lineno = 0;
256 x.table_nesting = 0;
257 x.texttt_nesting = 0;
258 x.sc_title = NULL;
259 x.sc_pub = NULL;
260 x.sc_tags = NULL;
261 x.sc_twitter_img = NULL;
262
263 fmt3_lex_init(&x.scanner);
264 fmt3_set_extra(&x, x.scanner);
265
266 ret = fmt3_parse(&x);
267 if (ret)
268 panic("failed to parse post id %u", post->id);
269
270 fmt3_lex_destroy(x.scanner);
271
272 /*
273 * Now update struct post based on what we got from the .tex file.
274 * The struct is already populated by data from the metadata file.
275 * For the simple string values, we merely override whatever was
276 * there. For tags we use the union.
277 */
278
279 if (x.sc_title) {
280 str_putref(post->title);
281 post->title = str_getref(x.sc_title);
282 }
283
284 if (x.sc_pub)
285 post->time = parse_time_str(str_getref(x.sc_pub));
286
287 if (x.sc_twitter_img) {
288 str_putref(post->twitter_img);
289 post->twitter_img = str_getref(x.sc_twitter_img);
290 }
291
292 post_add_tags(&post->tags, x.sc_tags);
293
294 str_putref(x.sc_title);
295 str_putref(x.sc_pub);
296 str_putref(x.sc_twitter_img);
297
298 str_putref(post->body); /* free the previous */
299 post->body = x.stroutput;
300 ASSERT(post->body);
301
302 return 0;
303 }
304
305 static int __load_post_body(struct post *post)
306 {
307 static const char *exts[4] = {
308 [2] = "html",
309 [3] = "tex",
310 };
311
312 char path[FILENAME_MAX];
313 struct str *raw;
314 int ret;
315
316 ASSERT3U(post->fmt, >=, 2);
317 ASSERT3U(post->fmt, <=, 3);
318
319 snprintf(path, FILENAME_MAX, "%s/posts/%d/post.%s",
320 str_cstr(config.data_dir), post->id, exts[post->fmt]);
321
322 raw = post_get_cached_file(post, path);
323 if (IS_ERR(raw))
324 return PTR_ERR(raw);
325
326 switch (post->fmt) {
327 case 2:
328 ret = __do_load_post_body_fmt2(post, raw);
329 break;
330 case 3:
331 ret = __do_load_post_body_fmt3(post, raw);
332 break;
333 }
334
335 str_putref(raw);
336
337 return ret;
338 }
339
340 static void __refresh_published_prop(struct post *post, struct val *lv)
341 {
342 /* update the time */
343 post->time = parse_time_str(sexpr_alist_lookup_str(lv, "time"));
344
345 /* update the title */
346 post->title = sexpr_alist_lookup_str(lv, "title");
347
348 /* update the format */
349 post->fmt = sexpr_alist_lookup_int(lv, "fmt", NULL);
350
351 /* update the listed bool */
352 post->listed = sexpr_alist_lookup_bool(lv, "listed", true, NULL);
353 }
354
355 static int __refresh_published(struct post *post)
356 {
357 char path[FILENAME_MAX];
358 struct str *meta;
359 struct val *lv;
360
361 snprintf(path, FILENAME_MAX, "%s/posts/%d/post.lisp",
362 str_cstr(config.data_dir), post->id);
363
364 meta = post_get_cached_file(post, path);
365 if (IS_ERR(meta))
366 return PTR_ERR(meta);
367
368 lv = sexpr_parse_str(meta);
369 if (IS_ERR(lv)) {
370 str_putref(meta);
371 return PTR_ERR(lv);
372 }
373
374 __refresh_published_prop(post, lv);
375
376 /* empty out the tags/comments lists */
377 post_remove_all_tags(&post->tags);
378 post_remove_all_comments(post);
379
380 /* populate the tags/comments lists */
381 post_add_tags(&post->tags, sexpr_alist_lookup_list(lv, "tags"));
382 post_add_comments(post, sexpr_alist_lookup_list(lv, "comments"));
383
384 val_putref(lv);
385 str_putref(meta);
386
387 return 0;
388 }
389
390 static bool must_refresh(struct post *post)
391 {
392 const struct nvpair *pair;
393
394 if (post->preview)
395 return true; /* always refresh previews */
396
397 if (nvl_iter_start(post->files) == NULL)
398 return true; /* no files means we have no idea what is needed */
399
400 nvl_for_each(pair, post->files) {
401 struct str *name = nvpair_name_str(pair);
402 uint64_t file_rev;
403
404 ASSERT0(nvpair_value_int(pair, &file_rev));
405
406 if (!file_cache_has_newer(str_cstr(name), file_rev)) {
407 str_putref(name);
408 continue;
409 }
410
411 cmn_err(CE_DEBUG, "post %u needs a refresh "
412 "('%s' changed, old rev %"PRIu64")", post->id,
413 str_cstr(name), file_rev);
414
415 str_putref(name);
416
417 return true; /* no need to check oher files, we are refreshing */
418 }
419
420 return false;
421 }
422
423 int post_refresh(struct post *post)
424 {
425 int ret;
426
427 if (!must_refresh(post))
428 return 0;
429
430 post_remove_all_filenames(post);
431
432 str_putref(post->title);
433 post->title = NULL;
434
435 if (post->preview) {
436 post->title = STATIC_STR("PREVIEW");
437 post->time = time(NULL);
438 post->fmt = 3;
439 } else {
440 ret = __refresh_published(post);
441 if (ret)
442 return ret;
443 }
444
445 if ((ret = __load_post_body(post)))
446 return ret;
447
448 /* No title set at all? Set it to something non-NULL. */
449 if (!post->title)
450 post->title = STATIC_STR("Untitled");
451
452 return 0;
453 }
454
455 struct post *load_post(int postid, bool preview)
456 {
457 struct post *post;
458 int err;
459
460 /*
461 * If it is *not* a preview, try to get it from the cache.
462 */
463 if (!preview) {
464 post = index_lookup_post(postid);
465 if (post)
466 return post;
467 }
468
469 post = mem_cache_alloc(post_cache);
470 if (!post) {
471 err = -ENOMEM;
472 goto err;
473 }
474
475 memset(post, 0, sizeof(struct post));
476
477 post->id = postid;
478 post->title = NULL;
479 post->body = NULL;
480 post->numcom = 0;
481 post->preview = preview;
482
483 rb_create(&post->tags, tag_cmp, sizeof(struct post_tag),
484 offsetof(struct post_tag, node));
485 list_create(&post->comments, sizeof(struct comment),
486 offsetof(struct comment, list));
487 refcnt_init(&post->refcnt, 1);
488 MXINIT(&post->lock, &post_lc);
489
490 post->files = nvl_alloc();
491 if (IS_ERR(post->files)) {
492 err = PTR_ERR(post->files);
493 post->files = NULL;
494 goto err_free;
495 }
496
497 if ((err = post_refresh(post)))
498 goto err_free;
499
500 if (!post->preview)
501 ASSERT0(index_insert_post(post));
502
503 return post;
504
505 err_free:
506 post_destroy(post);
507
508 err:
509 cmn_err(CE_ERROR, "Failed to load post id %u: %s", postid,
510 xstrerror(err));
511 return NULL;
512 }
513
514 static void post_remove_all_tags(struct rb_tree *taglist)
515 {
516 struct post_tag *tag;
517 struct rb_cookie cookie;
518
519 memset(&cookie, 0, sizeof(cookie));
520 while ((tag = rb_destroy_nodes(taglist, &cookie))) {
521 str_putref(tag->tag);
522 free(tag);
523 }
524
525 rb_create(taglist, tag_cmp, sizeof(struct post_tag),
526 offsetof(struct post_tag, node));
527 }
528
529 void post_destroy(struct post *post)
530 {
531 post_remove_all_tags(&post->tags);
532 post_remove_all_comments(post);
533
534 nvl_putref(post->files);
535
536 str_putref(post->title);
537 str_putref(post->body);
538
539 MXDESTROY(&post->lock);
540
541 mem_cache_free(post_cache, post);
542 }
543
544 static void __tq_load_post(void *arg)
545 {
546 int postid = (uintptr_t) arg;
547
548 /* load the post, but then free it since we don't need it */
549 post_putref(load_post(postid, false));
550 }
551
552 int load_all_posts(void)
553 {
554 const char *data_dir = str_cstr(config.data_dir);
555 char path[FILENAME_MAX];
556 struct stat statbuf;
557 struct dirent *de;
558 uint32_t postid;
559 uint64_t start_ts, end_ts;
560 unsigned nposts;
561 struct taskq *tq;
562 DIR *dir;
563 int ret;
564
565 snprintf(path, sizeof(path), "%s/posts", data_dir);
566 dir = opendir(path);
567 if (!dir)
568 return -errno;
569
570 tq = taskq_create_fixed("load-all-posts", -1);
571 if (IS_ERR(tq)) {
572 closedir(dir);
573 return PTR_ERR(tq);
574 }
575
576 nposts = 0;
577 start_ts = gettime();
578
579 while ((de = readdir(dir))) {
580 if (!strcmp(de->d_name, ".") ||
581 !strcmp(de->d_name, ".."))
582 continue;
583
584 ret = str2u32(de->d_name, &postid);
585 if (ret) {
586 cmn_err(CE_INFO, "skipping '%s/%s' - not a number",
587 data_dir, de->d_name);
588 continue;
589 }
590
591 snprintf(path, FILENAME_MAX, "%s/posts/%u", data_dir, postid);
592
593 /* check that it is a directory */
594 ret = xlstat(path, &statbuf);
595 if (ret) {
596 cmn_err(CE_INFO, "skipping '%s' - failed to xlstat: %s",
597 path, xstrerror(ret));
598 continue;
599 }
600
601 if (!S_ISDIR(statbuf.st_mode)) {
602 cmn_err(CE_INFO, "skipping '%s' - not a directory; "
603 "mode = %o", path,
604 (unsigned int) statbuf.st_mode);
605 continue;
606 }
607
608 /* load the post asynchronously */
609 if (taskq_dispatch(tq, __tq_load_post, (void *)(uintptr_t) postid))
610 __tq_load_post((void *)(uintptr_t) postid);
611
612 nposts++;
613 }
614
615 taskq_wait(tq);
616 taskq_destroy(tq);
617
618 end_ts = gettime();
619
620 cmn_err(CE_INFO, "Loaded %u posts in %"PRIu64".%09"PRIu64" seconds",
621 nposts,
622 (end_ts - start_ts) / 1000000000UL,
623 (end_ts - start_ts) % 1000000000UL);
624
625 closedir(dir);
626
627 return 0;
628 }