Commit | Line | Data |
---|---|---|
d3adcff4 AB |
1 | #!/usr/bin/env perl |
2 | # txt2html --- simple script to convert my site's txt files to html | |
3 | ||
4 | # Copyright (C) 2014-2021 all contributors <meta@public-inbox.org> | |
5 | # Copyright (c) 2021 Amin Bandali <bandali@gnu.org> | |
6 | # | |
7 | # This program is free software: you can redistribute it and/or modify | |
8 | # it under the terms of the GNU Affero General Public License as | |
9 | # published by the Free Software Foundation, either version 3 of the | |
10 | # License, or (at your option) any later version. | |
11 | # | |
12 | # This program is distributed in the hope that it will be useful, | |
13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 | # GNU Affero General Public License for more details. | |
16 | # | |
17 | # You should have received a copy of the GNU Affero General Public License | |
18 | # along with this program. If not, see <https://www.gnu.org/licenses/>. | |
19 | ||
20 | # This simple script borrows from the wonderful `txt2pre' from | |
21 | # public-inbox.git, under AGPLv3+, with a few additions of my own. | |
22 | ||
23 | ||
24 | use strict; | |
25 | use warnings 'all'; | |
e02deb23 AB |
26 | use Getopt::Long; |
27 | ||
28 | my $opt_lang = 'en'; | |
29 | GetOptions ('lang=s' => \$opt_lang) | |
30 | or die("bad command line arguments\n"); | |
d3adcff4 AB |
31 | |
32 | my $link_re = | |
33 | qr{([\('!])?\b((?:ftps?|https?|nntps?|imaps?|s?news|gopher):// | |
34 | [\@:\w\.-]+(?:/ | |
35 | (?:[a-z0-9\-\._~!\$\&\';\(\)\*\+,;=:@/%]*) | |
36 | (?:\?[a-z0-9\-\._~!\$\&\';\(\)\*\+,;=:@/%]+)? | |
37 | (?:\#[a-z0-9\-\._~!\$\&\';\(\)\*\+,;=:@/%\?]+)? | |
38 | )? | |
39 | )}xi; | |
40 | ||
41 | my %pairs = ( | |
42 | "(" => qr/(\)[\.,;\+]?)\z/, # Markdown (,), Ruby (+) (, for arrays) | |
43 | "'" => qr/('[\.,;\+]?)\z/, # Perl / Ruby | |
44 | "!" => qr/(![\.,;\+]?)\z/, # Perl / Ruby | |
45 | ); | |
46 | ||
47 | my %html_map = ( | |
48 | '&' => '&', | |
49 | '<' => '<', | |
50 | '>' => '>', | |
51 | # '"' => '"', | |
52 | # "'" => ''', | |
53 | ); | |
54 | ||
55 | sub html_esc { | |
56 | my ($s) = @_; | |
57 | $s =~ s/([&<>])/$html_map{$1}/sge; | |
58 | $s; | |
59 | } | |
60 | ||
61 | sub linkify { | |
62 | my ($s) = @_; | |
63 | $s =~ s^$link_re^ | |
64 | my $beg = $1 || ''; | |
65 | my $url = $2; | |
66 | my $end = ''; | |
67 | ||
68 | # it's fairly common to end URLs in messages with | |
69 | # '.', ',' or ';' to denote the end of a statement; | |
70 | # assume the intent was to end the statement/sentence | |
71 | # in English | |
72 | if (defined(my $re = $pairs{$beg})) { | |
73 | if ($url =~ s/$re//) { | |
74 | $end = $1; | |
75 | } | |
76 | } elsif ($url =~ s/(\))?([\.,;])\z//) { | |
77 | $end = $2; | |
78 | # require ')' to be paired with '(' | |
79 | if (defined $1) { # ')' | |
80 | if (index($url, '(') < 0) { | |
81 | $end = ")$end"; | |
82 | } else { | |
83 | $url .= ')'; | |
84 | } | |
85 | } | |
86 | } elsif ($url !~ /\(/ && $url =~ s/\)\z//) { | |
87 | $end = ')'; | |
88 | } | |
89 | ||
90 | $beg . "<a href=\"$url\">$url</a>" . $end; | |
91 | ^geo; | |
92 | $s; | |
93 | } | |
94 | ||
95 | ||
96 | my $txt = do { local $/; <STDIN> }; | |
97 | my $title = html_esc($txt =~ /\A([^\n]+)/); | |
98 | $title =~ s/^\s+|\s+$//g; | |
e02deb23 | 99 | if ($opt_lang eq 'fa') { |
d3adcff4 AB |
100 | $title .= ' — بندعلی' if $title !~ /بندعلی/; |
101 | } else { | |
102 | $title .= ' — bandali' if $title !~ /bandali/; | |
103 | } | |
104 | ||
105 | $txt = linkify(html_esc($txt)); | |
106 | ||
107 | print('<!doctype html>', | |
e02deb23 AB |
108 | qq(<html lang="$opt_lang"), |
109 | $opt_lang eq 'fa' ? ' dir="rtl"' : '', '>', | |
110 | '<head><meta | |
d3adcff4 AB |
111 | http-equiv="Content-Type" |
112 | content="text/html; charset=utf-8" | |
113 | />', | |
114 | "<title>$title</title>", | |
e02deb23 | 115 | $opt_lang eq 'fa' |
d3adcff4 AB |
116 | ? "\n<style>\@font-face{font-family:sahel;font-weight:normal; |
117 | src:local('Sahel WOL'),local('Sahel'), | |
118 | url('sahel.woff2')format('woff2');}pre{font-family:sahel}</style>\n" | |
119 | : '', | |
120 | '</head><body><pre>', $txt, '</pre></body></html>'); | |
121 | STDOUT->flush; |