txt2html: rename to txt2pre
[~bandali/bndl.org] / txt2pre
CommitLineData
d3adcff4 1#!/usr/bin/env perl
4ce760d0 2# txt2pre --- convert my site's txt files to `pre'-based html
d3adcff4
AB
3
4# Copyright (C) 2014-2021 all contributors <meta@public-inbox.org>
5# Copyright (c) 2021 Amin Bandali <bandali@gnu.org>
6#
7# This program is free software: you can redistribute it and/or modify
8# it under the terms of the GNU Affero General Public License as
9# published by the Free Software Foundation, either version 3 of the
10# License, or (at your option) any later version.
11#
12# This program is distributed in the hope that it will be useful,
13# but WITHOUT ANY WARRANTY; without even the implied warranty of
14# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15# GNU Affero General Public License for more details.
16#
17# You should have received a copy of the GNU Affero General Public License
18# along with this program. If not, see <https://www.gnu.org/licenses/>.
19
4ce760d0
AB
20# This simple script borrows from a script of the same name from the
21# wonderful public-inbox project, under AGPLv3+, with additions of
22# my own.
d3adcff4
AB
23
24
25use strict;
26use warnings 'all';
e02deb23
AB
27use Getopt::Long;
28
29my $opt_lang = 'en';
ce2fcdbc
AB
30my $opt_index;
31GetOptions ('lang=s' => \$opt_lang,
32 'index' => \$opt_index)
e02deb23 33 or die("bad command line arguments\n");
d3adcff4
AB
34
35my $link_re =
36 qr{([\('!])?\b((?:ftps?|https?|nntps?|imaps?|s?news|gopher)://
37 [\@:\w\.-]+(?:/
38 (?:[a-z0-9\-\._~!\$\&\';\(\)\*\+,;=:@/%]*)
39 (?:\?[a-z0-9\-\._~!\$\&\';\(\)\*\+,;=:@/%]+)?
40 (?:\#[a-z0-9\-\._~!\$\&\';\(\)\*\+,;=:@/%\?]+)?
41 )?
42 )}xi;
43
44my %pairs = (
45 "(" => qr/(\)[\.,;\+]?)\z/, # Markdown (,), Ruby (+) (, for arrays)
46 "'" => qr/('[\.,;\+]?)\z/, # Perl / Ruby
47 "!" => qr/(![\.,;\+]?)\z/, # Perl / Ruby
48);
49
50my %html_map = (
51 '&' => '&amp;',
52 '<' => '&lt;',
53 '>' => '&gt;',
54 # '"' => '&quot;',
55 # "'" => '&#39;',
56);
57
58sub html_esc {
59 my ($s) = @_;
60 $s =~ s/([&<>])/$html_map{$1}/sge;
61 $s;
62}
63
64sub linkify {
65 my ($s) = @_;
66 $s =~ s^$link_re^
67 my $beg = $1 || '';
68 my $url = $2;
69 my $end = '';
70
71 # it's fairly common to end URLs in messages with
72 # '.', ',' or ';' to denote the end of a statement;
73 # assume the intent was to end the statement/sentence
74 # in English
75 if (defined(my $re = $pairs{$beg})) {
76 if ($url =~ s/$re//) {
77 $end = $1;
78 }
79 } elsif ($url =~ s/(\))?([\.,;])\z//) {
80 $end = $2;
81 # require ')' to be paired with '('
82 if (defined $1) { # ')'
83 if (index($url, '(') < 0) {
84 $end = ")$end";
85 } else {
86 $url .= ')';
87 }
88 }
89 } elsif ($url !~ /\(/ && $url =~ s/\)\z//) {
90 $end = ')';
91 }
92
93 $beg . "<a href=\"$url\">$url</a>" . $end;
94 ^geo;
95 $s;
96}
97
98
99my $txt = do { local $/; <STDIN> };
212ec2e6 100
d3adcff4
AB
101my $title = html_esc($txt =~ /\A([^\n]+)/);
102$title =~ s/^\s+|\s+$//g;
e02deb23 103if ($opt_lang eq 'fa') {
d3adcff4
AB
104 $title .= ' &mdash; بندعلی' if $title !~ /بندعلی/;
105} else {
106 $title .= ' &mdash; bandali' if $title !~ /bandali/;
107}
212ec2e6
AB
108
109my ($upd, $pub, $url) = $txt =~ /(.*)\r?\n(.*)\r?\n(.*)\r?\n?\z/;
110($upd) = $upd =~ /(?:updated|ویرایش): (.*)/ if $upd;
111($pub) = $pub =~ /(?:published|انتشار): (.*)/ if $pub;
112($url) = $url =~ /(?:plain text|متن ساده): (.*)/ if $url;
ce2fcdbc
AB
113$url = 'https://bndl.org/bandali-cv.txt'
114 if (!$url and $title =~ /curriculum vitae/);
115$url = html_esc($url) if $url;
d3adcff4
AB
116
117$txt = linkify(html_esc($txt));
118
ce2fcdbc 119print("<!doctype html>",
e02deb23 120 qq(<html lang="$opt_lang"),
ce2fcdbc
AB
121 $opt_lang eq 'fa' ? ' dir="rtl"' : '',
122 ">",
123 qq(<head>
124<meta http-equiv="Content-Type"
125content="text/html; charset=utf-8" />\n),
126 "<title>$title</title>\n",
4236f6db 127 qq(<link rel="icon" href="data:,">\n),
ce2fcdbc
AB
128 $url ? qq(<link rel="alternate" href="$url"
129title="plain text" type="text/plain" />\n) : '',
130 ($opt_index and $opt_lang eq 'en')
131 ? qq(<link rel="alternate" href="https://bndl.org/fa/"
132hreflang="fa" title="persian" />\n)
133 : ($opt_index and $opt_lang eq 'fa')
134 ? qq(<link rel="alternate" href="https://bndl.org/"
135hreflang="en" title="english" />\n)
136 : '',
19580079
AB
137 qq(<style>\@media(prefers-color-scheme:dark){
138body{background:#1c1c1c;color:white;}a:link{color:#acdeff;}
139a:visited{color:#f8f;}a:active{color:#e00;}}),
e02deb23 140 $opt_lang eq 'fa'
19580079 141 ? qq(\n\@font-face{font-family:sahel;font-weight:normal;
d3adcff4 142src:local('Sahel WOL'),local('Sahel'),
19580079 143url('sahel.woff2')format('woff2');}pre{font-family:sahel})
d3adcff4 144 : '',
19580079 145 "</style>\n",
ce2fcdbc 146 "</head><body><pre>$txt</pre></body></html>\n");
d3adcff4 147STDOUT->flush;