Commit | Line | Data |
---|---|---|
d3adcff4 | 1 | #!/usr/bin/env perl |
4ce760d0 | 2 | # txt2pre --- convert my site's txt files to `pre'-based html |
d3adcff4 AB |
3 | |
4 | # Copyright (C) 2014-2021 all contributors <meta@public-inbox.org> | |
5 | # Copyright (c) 2021 Amin Bandali <bandali@gnu.org> | |
6 | # | |
7 | # This program is free software: you can redistribute it and/or modify | |
8 | # it under the terms of the GNU Affero General Public License as | |
9 | # published by the Free Software Foundation, either version 3 of the | |
10 | # License, or (at your option) any later version. | |
11 | # | |
12 | # This program is distributed in the hope that it will be useful, | |
13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 | # GNU Affero General Public License for more details. | |
16 | # | |
17 | # You should have received a copy of the GNU Affero General Public License | |
18 | # along with this program. If not, see <https://www.gnu.org/licenses/>. | |
19 | ||
4ce760d0 AB |
20 | # This simple script borrows from a script of the same name from the |
21 | # wonderful public-inbox project, under AGPLv3+, with additions of | |
22 | # my own. | |
d3adcff4 AB |
23 | |
24 | ||
25 | use strict; | |
26 | use warnings 'all'; | |
e02deb23 AB |
27 | use Getopt::Long; |
28 | ||
29 | my $opt_lang = 'en'; | |
ce2fcdbc AB |
30 | my $opt_index; |
31 | GetOptions ('lang=s' => \$opt_lang, | |
32 | 'index' => \$opt_index) | |
e02deb23 | 33 | or die("bad command line arguments\n"); |
d3adcff4 AB |
34 | |
35 | my $link_re = | |
36 | qr{([\('!])?\b((?:ftps?|https?|nntps?|imaps?|s?news|gopher):// | |
37 | [\@:\w\.-]+(?:/ | |
38 | (?:[a-z0-9\-\._~!\$\&\';\(\)\*\+,;=:@/%]*) | |
39 | (?:\?[a-z0-9\-\._~!\$\&\';\(\)\*\+,;=:@/%]+)? | |
40 | (?:\#[a-z0-9\-\._~!\$\&\';\(\)\*\+,;=:@/%\?]+)? | |
41 | )? | |
42 | )}xi; | |
43 | ||
44 | my %pairs = ( | |
45 | "(" => qr/(\)[\.,;\+]?)\z/, # Markdown (,), Ruby (+) (, for arrays) | |
46 | "'" => qr/('[\.,;\+]?)\z/, # Perl / Ruby | |
47 | "!" => qr/(![\.,;\+]?)\z/, # Perl / Ruby | |
48 | ); | |
49 | ||
50 | my %html_map = ( | |
51 | '&' => '&', | |
52 | '<' => '<', | |
53 | '>' => '>', | |
54 | # '"' => '"', | |
55 | # "'" => ''', | |
56 | ); | |
57 | ||
58 | sub html_esc { | |
59 | my ($s) = @_; | |
60 | $s =~ s/([&<>])/$html_map{$1}/sge; | |
61 | $s; | |
62 | } | |
63 | ||
64 | sub linkify { | |
65 | my ($s) = @_; | |
66 | $s =~ s^$link_re^ | |
67 | my $beg = $1 || ''; | |
68 | my $url = $2; | |
69 | my $end = ''; | |
70 | ||
71 | # it's fairly common to end URLs in messages with | |
72 | # '.', ',' or ';' to denote the end of a statement; | |
73 | # assume the intent was to end the statement/sentence | |
74 | # in English | |
75 | if (defined(my $re = $pairs{$beg})) { | |
76 | if ($url =~ s/$re//) { | |
77 | $end = $1; | |
78 | } | |
79 | } elsif ($url =~ s/(\))?([\.,;])\z//) { | |
80 | $end = $2; | |
81 | # require ')' to be paired with '(' | |
82 | if (defined $1) { # ')' | |
83 | if (index($url, '(') < 0) { | |
84 | $end = ")$end"; | |
85 | } else { | |
86 | $url .= ')'; | |
87 | } | |
88 | } | |
89 | } elsif ($url !~ /\(/ && $url =~ s/\)\z//) { | |
90 | $end = ')'; | |
91 | } | |
92 | ||
93 | $beg . "<a href=\"$url\">$url</a>" . $end; | |
94 | ^geo; | |
95 | $s; | |
96 | } | |
97 | ||
98 | ||
99 | my $txt = do { local $/; <STDIN> }; | |
212ec2e6 | 100 | |
d3adcff4 AB |
101 | my $title = html_esc($txt =~ /\A([^\n]+)/); |
102 | $title =~ s/^\s+|\s+$//g; | |
e02deb23 | 103 | if ($opt_lang eq 'fa') { |
d3adcff4 AB |
104 | $title .= ' — بندعلی' if $title !~ /بندعلی/; |
105 | } else { | |
106 | $title .= ' — bandali' if $title !~ /bandali/; | |
107 | } | |
212ec2e6 AB |
108 | |
109 | my ($upd, $pub, $url) = $txt =~ /(.*)\r?\n(.*)\r?\n(.*)\r?\n?\z/; | |
110 | ($upd) = $upd =~ /(?:updated|ویرایش): (.*)/ if $upd; | |
111 | ($pub) = $pub =~ /(?:published|انتشار): (.*)/ if $pub; | |
112 | ($url) = $url =~ /(?:plain text|متن ساده): (.*)/ if $url; | |
ce2fcdbc AB |
113 | $url = 'https://bndl.org/bandali-cv.txt' |
114 | if (!$url and $title =~ /curriculum vitae/); | |
115 | $url = html_esc($url) if $url; | |
d3adcff4 AB |
116 | |
117 | $txt = linkify(html_esc($txt)); | |
118 | ||
ce2fcdbc | 119 | print("<!doctype html>", |
e02deb23 | 120 | qq(<html lang="$opt_lang"), |
ce2fcdbc AB |
121 | $opt_lang eq 'fa' ? ' dir="rtl"' : '', |
122 | ">", | |
123 | qq(<head> | |
124 | <meta http-equiv="Content-Type" | |
125 | content="text/html; charset=utf-8" />\n), | |
126 | "<title>$title</title>\n", | |
4236f6db | 127 | qq(<link rel="icon" href="data:,">\n), |
ce2fcdbc AB |
128 | $url ? qq(<link rel="alternate" href="$url" |
129 | title="plain text" type="text/plain" />\n) : '', | |
130 | ($opt_index and $opt_lang eq 'en') | |
131 | ? qq(<link rel="alternate" href="https://bndl.org/fa/" | |
132 | hreflang="fa" title="persian" />\n) | |
133 | : ($opt_index and $opt_lang eq 'fa') | |
134 | ? qq(<link rel="alternate" href="https://bndl.org/" | |
135 | hreflang="en" title="english" />\n) | |
136 | : '', | |
19580079 AB |
137 | qq(<style>\@media(prefers-color-scheme:dark){ |
138 | body{background:#1c1c1c;color:white;}a:link{color:#acdeff;} | |
139 | a:visited{color:#f8f;}a:active{color:#e00;}}), | |
e02deb23 | 140 | $opt_lang eq 'fa' |
19580079 | 141 | ? qq(\n\@font-face{font-family:sahel;font-weight:normal; |
d3adcff4 | 142 | src:local('Sahel WOL'),local('Sahel'), |
19580079 | 143 | url('sahel.woff2')format('woff2');}pre{font-family:sahel}) |
d3adcff4 | 144 | : '', |
19580079 | 145 | "</style>\n", |
ce2fcdbc | 146 | "</head><body><pre>$txt</pre></body></html>\n"); |
d3adcff4 | 147 | STDOUT->flush; |