128995c151b7267459f320b0b158bd060cb20121
[~bandali/bndl.org] / txt2html
1 #!/usr/bin/env perl
2 # txt2html --- simple script to convert my site's txt files to html
3
4 # Copyright (C) 2014-2021 all contributors <meta@public-inbox.org>
5 # Copyright (c) 2021 Amin Bandali <bandali@gnu.org>
6 #
7 # This program is free software: you can redistribute it and/or modify
8 # it under the terms of the GNU Affero General Public License as
9 # published by the Free Software Foundation, either version 3 of the
10 # License, or (at your option) any later version.
11 #
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU Affero General Public License for more details.
16 #
17 # You should have received a copy of the GNU Affero General Public License
18 # along with this program. If not, see <https://www.gnu.org/licenses/>.
19
20 # This simple script borrows from the wonderful `txt2pre' from
21 # public-inbox.git, under AGPLv3+, with a few additions of my own.
22
23
24 use strict;
25 use warnings 'all';
26
27 my $link_re =
28 qr{([\('!])?\b((?:ftps?|https?|nntps?|imaps?|s?news|gopher)://
29 [\@:\w\.-]+(?:/
30 (?:[a-z0-9\-\._~!\$\&\';\(\)\*\+,;=:@/%]*)
31 (?:\?[a-z0-9\-\._~!\$\&\';\(\)\*\+,;=:@/%]+)?
32 (?:\#[a-z0-9\-\._~!\$\&\';\(\)\*\+,;=:@/%\?]+)?
33 )?
34 )}xi;
35
36 my %pairs = (
37 "(" => qr/(\)[\.,;\+]?)\z/, # Markdown (,), Ruby (+) (, for arrays)
38 "'" => qr/('[\.,;\+]?)\z/, # Perl / Ruby
39 "!" => qr/(![\.,;\+]?)\z/, # Perl / Ruby
40 );
41
42 my %html_map = (
43 '&' => '&amp;',
44 '<' => '&lt;',
45 '>' => '&gt;',
46 # '"' => '&quot;',
47 # "'" => '&#39;',
48 );
49
50 sub html_esc {
51 my ($s) = @_;
52 $s =~ s/([&<>])/$html_map{$1}/sge;
53 $s;
54 }
55
56 sub linkify {
57 my ($s) = @_;
58 $s =~ s^$link_re^
59 my $beg = $1 || '';
60 my $url = $2;
61 my $end = '';
62
63 # it's fairly common to end URLs in messages with
64 # '.', ',' or ';' to denote the end of a statement;
65 # assume the intent was to end the statement/sentence
66 # in English
67 if (defined(my $re = $pairs{$beg})) {
68 if ($url =~ s/$re//) {
69 $end = $1;
70 }
71 } elsif ($url =~ s/(\))?([\.,;])\z//) {
72 $end = $2;
73 # require ')' to be paired with '('
74 if (defined $1) { # ')'
75 if (index($url, '(') < 0) {
76 $end = ")$end";
77 } else {
78 $url .= ')';
79 }
80 }
81 } elsif ($url !~ /\(/ && $url =~ s/\)\z//) {
82 $end = ')';
83 }
84
85 $beg . "<a href=\"$url\">$url</a>" . $end;
86 ^geo;
87 $s;
88 }
89
90
91 my $txt = do { local $/; <STDIN> };
92 my $title = html_esc($txt =~ /\A([^\n]+)/);
93 $title =~ s/^\s+|\s+$//g;
94 if ($ARGV[0] and $ARGV[0] eq 'fa') {
95 $title .= ' &mdash; بندعلی' if $title !~ /بندعلی/;
96 } else {
97 $title .= ' &mdash; bandali' if $title !~ /bandali/;
98 }
99
100 $txt = linkify(html_esc($txt));
101
102 print('<!doctype html>',
103 ($ARGV[0] and $ARGV[0] eq 'fa')
104 ? '<html lang="fa" dir="rtl">'
105 : '<html>',
106 '<head>',
107 '<meta
108 http-equiv="Content-Type"
109 content="text/html; charset=utf-8"
110 />',
111 "<title>$title</title>",
112 ($ARGV[0] and $ARGV[0] eq 'fa')
113 ? "\n<style>\@font-face{font-family:sahel;font-weight:normal;
114 src:local('Sahel WOL'),local('Sahel'),
115 url('sahel.woff2')format('woff2');}pre{font-family:sahel}</style>\n"
116 : '',
117 '</head><body><pre>', $txt, '</pre></body></html>');
118 STDOUT->flush;