Commit | Line | Data |
---|---|---|
d3adcff4 AB |
1 | #!/usr/bin/env perl |
2 | # txt2html --- simple script to convert my site's txt files to html | |
3 | ||
4 | # Copyright (C) 2014-2021 all contributors <meta@public-inbox.org> | |
5 | # Copyright (c) 2021 Amin Bandali <bandali@gnu.org> | |
6 | # | |
7 | # This program is free software: you can redistribute it and/or modify | |
8 | # it under the terms of the GNU Affero General Public License as | |
9 | # published by the Free Software Foundation, either version 3 of the | |
10 | # License, or (at your option) any later version. | |
11 | # | |
12 | # This program is distributed in the hope that it will be useful, | |
13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 | # GNU Affero General Public License for more details. | |
16 | # | |
17 | # You should have received a copy of the GNU Affero General Public License | |
18 | # along with this program. If not, see <https://www.gnu.org/licenses/>. | |
19 | ||
20 | # This simple script borrows from the wonderful `txt2pre' from | |
21 | # public-inbox.git, under AGPLv3+, with a few additions of my own. | |
22 | ||
23 | ||
24 | use strict; | |
25 | use warnings 'all'; | |
e02deb23 AB |
26 | use Getopt::Long; |
27 | ||
28 | my $opt_lang = 'en'; | |
ce2fcdbc AB |
29 | my $opt_index; |
30 | GetOptions ('lang=s' => \$opt_lang, | |
31 | 'index' => \$opt_index) | |
e02deb23 | 32 | or die("bad command line arguments\n"); |
d3adcff4 AB |
33 | |
34 | my $link_re = | |
35 | qr{([\('!])?\b((?:ftps?|https?|nntps?|imaps?|s?news|gopher):// | |
36 | [\@:\w\.-]+(?:/ | |
37 | (?:[a-z0-9\-\._~!\$\&\';\(\)\*\+,;=:@/%]*) | |
38 | (?:\?[a-z0-9\-\._~!\$\&\';\(\)\*\+,;=:@/%]+)? | |
39 | (?:\#[a-z0-9\-\._~!\$\&\';\(\)\*\+,;=:@/%\?]+)? | |
40 | )? | |
41 | )}xi; | |
42 | ||
43 | my %pairs = ( | |
44 | "(" => qr/(\)[\.,;\+]?)\z/, # Markdown (,), Ruby (+) (, for arrays) | |
45 | "'" => qr/('[\.,;\+]?)\z/, # Perl / Ruby | |
46 | "!" => qr/(![\.,;\+]?)\z/, # Perl / Ruby | |
47 | ); | |
48 | ||
49 | my %html_map = ( | |
50 | '&' => '&', | |
51 | '<' => '<', | |
52 | '>' => '>', | |
53 | # '"' => '"', | |
54 | # "'" => ''', | |
55 | ); | |
56 | ||
57 | sub html_esc { | |
58 | my ($s) = @_; | |
59 | $s =~ s/([&<>])/$html_map{$1}/sge; | |
60 | $s; | |
61 | } | |
62 | ||
63 | sub linkify { | |
64 | my ($s) = @_; | |
65 | $s =~ s^$link_re^ | |
66 | my $beg = $1 || ''; | |
67 | my $url = $2; | |
68 | my $end = ''; | |
69 | ||
70 | # it's fairly common to end URLs in messages with | |
71 | # '.', ',' or ';' to denote the end of a statement; | |
72 | # assume the intent was to end the statement/sentence | |
73 | # in English | |
74 | if (defined(my $re = $pairs{$beg})) { | |
75 | if ($url =~ s/$re//) { | |
76 | $end = $1; | |
77 | } | |
78 | } elsif ($url =~ s/(\))?([\.,;])\z//) { | |
79 | $end = $2; | |
80 | # require ')' to be paired with '(' | |
81 | if (defined $1) { # ')' | |
82 | if (index($url, '(') < 0) { | |
83 | $end = ")$end"; | |
84 | } else { | |
85 | $url .= ')'; | |
86 | } | |
87 | } | |
88 | } elsif ($url !~ /\(/ && $url =~ s/\)\z//) { | |
89 | $end = ')'; | |
90 | } | |
91 | ||
92 | $beg . "<a href=\"$url\">$url</a>" . $end; | |
93 | ^geo; | |
94 | $s; | |
95 | } | |
96 | ||
97 | ||
98 | my $txt = do { local $/; <STDIN> }; | |
99 | my $title = html_esc($txt =~ /\A([^\n]+)/); | |
100 | $title =~ s/^\s+|\s+$//g; | |
e02deb23 | 101 | if ($opt_lang eq 'fa') { |
d3adcff4 AB |
102 | $title .= ' — بندعلی' if $title !~ /بندعلی/; |
103 | } else { | |
104 | $title .= ' — bandali' if $title !~ /bandali/; | |
105 | } | |
ce2fcdbc AB |
106 | my ($url) = $txt =~ /(?:plain text|متن ساده): (.*)\r?\n?\z/; |
107 | $url = 'https://bndl.org/bandali-cv.txt' | |
108 | if (!$url and $title =~ /curriculum vitae/); | |
109 | $url = html_esc($url) if $url; | |
d3adcff4 AB |
110 | |
111 | $txt = linkify(html_esc($txt)); | |
112 | ||
ce2fcdbc | 113 | print("<!doctype html>", |
e02deb23 | 114 | qq(<html lang="$opt_lang"), |
ce2fcdbc AB |
115 | $opt_lang eq 'fa' ? ' dir="rtl"' : '', |
116 | ">", | |
117 | qq(<head> | |
118 | <meta http-equiv="Content-Type" | |
119 | content="text/html; charset=utf-8" />\n), | |
120 | "<title>$title</title>\n", | |
121 | $url ? qq(<link rel="alternate" href="$url" | |
122 | title="plain text" type="text/plain" />\n) : '', | |
123 | ($opt_index and $opt_lang eq 'en') | |
124 | ? qq(<link rel="alternate" href="https://bndl.org/fa/" | |
125 | hreflang="fa" title="persian" />\n) | |
126 | : ($opt_index and $opt_lang eq 'fa') | |
127 | ? qq(<link rel="alternate" href="https://bndl.org/" | |
128 | hreflang="en" title="english" />\n) | |
129 | : '', | |
e02deb23 | 130 | $opt_lang eq 'fa' |
ce2fcdbc | 131 | ? qq(<style>\@font-face{font-family:sahel;font-weight:normal; |
d3adcff4 | 132 | src:local('Sahel WOL'),local('Sahel'), |
ce2fcdbc | 133 | url('sahel.woff2')format('woff2');}pre{font-family:sahel}</style>\n) |
d3adcff4 | 134 | : '', |
ce2fcdbc | 135 | "</head><body><pre>$txt</pre></body></html>\n"); |
d3adcff4 | 136 | STDOUT->flush; |