Commit | Line | Data |
---|---|---|
d3adcff4 AB |
1 | #!/usr/bin/env perl |
2 | # txt2html --- simple script to convert my site's txt files to html | |
3 | ||
4 | # Copyright (C) 2014-2021 all contributors <meta@public-inbox.org> | |
5 | # Copyright (c) 2021 Amin Bandali <bandali@gnu.org> | |
6 | # | |
7 | # This program is free software: you can redistribute it and/or modify | |
8 | # it under the terms of the GNU Affero General Public License as | |
9 | # published by the Free Software Foundation, either version 3 of the | |
10 | # License, or (at your option) any later version. | |
11 | # | |
12 | # This program is distributed in the hope that it will be useful, | |
13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 | # GNU Affero General Public License for more details. | |
16 | # | |
17 | # You should have received a copy of the GNU Affero General Public License | |
18 | # along with this program. If not, see <https://www.gnu.org/licenses/>. | |
19 | ||
20 | # This simple script borrows from the wonderful `txt2pre' from | |
21 | # public-inbox.git, under AGPLv3+, with a few additions of my own. | |
22 | ||
23 | ||
24 | use strict; | |
25 | use warnings 'all'; | |
26 | ||
27 | my $link_re = | |
28 | qr{([\('!])?\b((?:ftps?|https?|nntps?|imaps?|s?news|gopher):// | |
29 | [\@:\w\.-]+(?:/ | |
30 | (?:[a-z0-9\-\._~!\$\&\';\(\)\*\+,;=:@/%]*) | |
31 | (?:\?[a-z0-9\-\._~!\$\&\';\(\)\*\+,;=:@/%]+)? | |
32 | (?:\#[a-z0-9\-\._~!\$\&\';\(\)\*\+,;=:@/%\?]+)? | |
33 | )? | |
34 | )}xi; | |
35 | ||
36 | my %pairs = ( | |
37 | "(" => qr/(\)[\.,;\+]?)\z/, # Markdown (,), Ruby (+) (, for arrays) | |
38 | "'" => qr/('[\.,;\+]?)\z/, # Perl / Ruby | |
39 | "!" => qr/(![\.,;\+]?)\z/, # Perl / Ruby | |
40 | ); | |
41 | ||
42 | my %html_map = ( | |
43 | '&' => '&', | |
44 | '<' => '<', | |
45 | '>' => '>', | |
46 | # '"' => '"', | |
47 | # "'" => ''', | |
48 | ); | |
49 | ||
50 | sub html_esc { | |
51 | my ($s) = @_; | |
52 | $s =~ s/([&<>])/$html_map{$1}/sge; | |
53 | $s; | |
54 | } | |
55 | ||
56 | sub linkify { | |
57 | my ($s) = @_; | |
58 | $s =~ s^$link_re^ | |
59 | my $beg = $1 || ''; | |
60 | my $url = $2; | |
61 | my $end = ''; | |
62 | ||
63 | # it's fairly common to end URLs in messages with | |
64 | # '.', ',' or ';' to denote the end of a statement; | |
65 | # assume the intent was to end the statement/sentence | |
66 | # in English | |
67 | if (defined(my $re = $pairs{$beg})) { | |
68 | if ($url =~ s/$re//) { | |
69 | $end = $1; | |
70 | } | |
71 | } elsif ($url =~ s/(\))?([\.,;])\z//) { | |
72 | $end = $2; | |
73 | # require ')' to be paired with '(' | |
74 | if (defined $1) { # ')' | |
75 | if (index($url, '(') < 0) { | |
76 | $end = ")$end"; | |
77 | } else { | |
78 | $url .= ')'; | |
79 | } | |
80 | } | |
81 | } elsif ($url !~ /\(/ && $url =~ s/\)\z//) { | |
82 | $end = ')'; | |
83 | } | |
84 | ||
85 | $beg . "<a href=\"$url\">$url</a>" . $end; | |
86 | ^geo; | |
87 | $s; | |
88 | } | |
89 | ||
90 | ||
91 | my $txt = do { local $/; <STDIN> }; | |
92 | my $title = html_esc($txt =~ /\A([^\n]+)/); | |
93 | $title =~ s/^\s+|\s+$//g; | |
94 | if ($ARGV[0] and $ARGV[0] eq 'fa') { | |
95 | $title .= ' — بندعلی' if $title !~ /بندعلی/; | |
96 | } else { | |
97 | $title .= ' — bandali' if $title !~ /bandali/; | |
98 | } | |
99 | ||
100 | $txt = linkify(html_esc($txt)); | |
101 | ||
102 | print('<!doctype html>', | |
103 | ($ARGV[0] and $ARGV[0] eq 'fa') | |
104 | ? '<html lang="fa" dir="rtl">' | |
105 | : '<html>', | |
106 | '<head>', | |
107 | '<meta | |
108 | http-equiv="Content-Type" | |
109 | content="text/html; charset=utf-8" | |
110 | />', | |
111 | "<title>$title</title>", | |
112 | ($ARGV[0] and $ARGV[0] eq 'fa') | |
113 | ? "\n<style>\@font-face{font-family:sahel;font-weight:normal; | |
114 | src:local('Sahel WOL'),local('Sahel'), | |
115 | url('sahel.woff2')format('woff2');}pre{font-family:sahel}</style>\n" | |
116 | : '', | |
117 | '</head><body><pre>', $txt, '</pre></body></html>'); | |
118 | STDOUT->flush; |