2 # txt2pre --- convert my site's txt files to `pre'-based atom/rss/html
4 # Copyright (C) 2014-2021 all contributors <meta@public-inbox.org>
5 # Copyright (c) 2021 Amin Bandali <bandali@gnu.org>
7 # This program is free software: you can redistribute it and/or modify
8 # it under the terms of the GNU Affero General Public License as
9 # published by the Free Software Foundation, either version 3 of the
10 # License, or (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU Affero General Public License for more details.
17 # You should have received a copy of the GNU Affero General Public License
18 # along with this program. If not, see <https://www.gnu.org/licenses/>.
20 # This simple script borrows from a script of the same name from the
21 # wonderful public-inbox project, under AGPLv3+, with additions of
35 GetOptions
('format=s' => \
$format,
40 or die("bad command line arguments\n");
43 $lang eq 'en' ?
'bandali'
44 : $lang eq 'fa' ?
'بندعلی'
47 $lang eq 'en' ?
"${author}'s personal site"
48 : $lang eq 'fa' ?
"سایت شخصی $author"
51 $lang eq 'en' ?
"notes and blog posts by $author"
52 : $lang eq 'fa' ?
"نوشتهها و بلاگ پستهای $author"
55 ($lang eq 'en') ?
'https://bndl.org'
56 : ($lang eq 'fa') ?
'https://bndl.org/fa/'
59 ($lang eq 'en') ?
"tag:bndl.org,2020:notes.$format"
60 : ($lang eq 'fa') ?
"tag:bndl.org,2020:fa/notes.$format"
64 qr{([\
('!])?\b((?:ftps?|https?|nntps?|imaps?|s?news|gopher)://
66 (?:[a-z0-9\-\._~!\$\&\';\(\)\*\+,;=:@/%]*)
67 (?:\?[a-z0-9\-\._~!\$\&\';\(\)\*\+,;=:@/%]+)?
68 (?:\#[a-z0-9\-\._~!\$\&\';\(\)\*\+,;=:@/%\?]+)?
73 "(" => qr/(\)[\.,;\+]?)\z/, # Markdown (,), Ruby (+) (, for arrays)
74 "'" => qr/('[\.,;\+]?)\z/, # Perl / Ruby
75 "!" => qr/(![\.,;\+]?)\z/, # Perl / Ruby
88 $s =~ s/([&<>])/$html_map{$1}/sge;
99 # it's fairly common to end URLs in messages with
100 # '.', ',' or ';' to denote the end of a statement;
101 # assume the intent was to end the statement/sentence
103 if (defined(my $re = $pairs{$beg})) {
104 if ($url =~ s/$re//) {
107 } elsif ($url =~ s/(\))?([\.,;])\z//) {
109 # require ')' to be paired with '('
110 if (defined $1) { # ')'
111 if (index($url, '(') < 0) {
117 } elsif ($url !~ /\(/ && $url =~ s/\)\z//) {
121 $beg . "<a href
=\"$url\">$url</a
>" . $end;
129 # atom/rss feed header and footer
130 if ($index and ($format eq 'atom' or $format eq 'rss')) {
132 my $now_iso8601 = `date -Iseconds -u | tr -d \\\\n`;
133 my $now_rfc5322 = `date -uR | tr -d \\\\n`;
134 my $atom_rel = $format eq 'atom' ? 'self' : 'alternate';
135 my $rss_rel = $format eq 'rss' ? 'self' : 'alternate';
136 my $link = $format eq 'atom' ? 'link' : 'atom:link';
140 <$link hreflang="fa
" href="https
://bndl
.org
/fa/notes
.atom
" rel="alternate
" type="application
/atom+xml" />
141 <$link hreflang
="fa" href
="https://bndl.org/fa/notes.rss" rel
="alternate" type
="application/rss+xml" />
142 <$link hreflang
="fa" href
="https://bndl.org/fa/bandali.fa.txt" rel
="alternate" type
="text/plain" />
143 <$link hreflang
="fa" href
="https://bndl.org/fa/" rel
="alternate" type
="text/html" />
144 <$link href
="https://bndl.org/notes.atom" rel
="$atom_rel" type
="application/atom+xml" />
145 <$link href
="https://bndl.org/notes.rss" rel
="$rss_rel" type
="application/rss+xml" />
146 <$link href
="https://bndl.org/bandali.txt" rel
="alternate" type
="text/plain" />
147 <$link href
="https://bndl.org" rel
="alternate" type
="text/html" />);
148 } elsif ($lang eq 'fa') {
150 <link hreflang
="en" href
="https://bndl.org/notes.atom" rel
="alternate" type
="application/atom+xml" />
151 <link hreflang
="en" href
="https://bndl.org/notes.rss" rel
="alternate" type
="application/rss+xml" />
152 <link hreflang
="en" href
="https://bndl.org/bandali.txt" rel
="alternate" type
="text/plain" />
153 <link hreflang
="en" href
="https://bndl.org" rel
="alternate" type
="text/html" />
154 <link href
="https://bndl.org/fa/notes.atom" rel
="$atom_rel" type
="application/atom+xml" />
155 <link href
="https://bndl.org/fa/notes.rss" rel
="$rss_rel" type
="application/rss+xml" />
156 <link href
="https://bndl.org/fa/bandali.fa.txt" rel
="alternate" type
="text/plain" />
157 <link href
="https://bndl.org/fa/" rel
="alternate" type
="text/html" />);
161 $out .= '<?xml version="1.0" encoding="UTF-8" ?>';
162 $out .= ($format eq 'atom') ?
qq(
163 <feed xml
:lang
="$lang" xmlns
="http://www.w3.org/2005/Atom">
164 <title
>$site_title</title
>
165 <subtitle
>$site_desc</subtitle
>
168 <updated
>$now_iso8601</updated
>)
169 : ($format eq 'rss') ?
qq(
171 xmlns
:atom
="http://www.w3.org/2005/Atom"
172 xmlns
:content
="http://purl.org/rss/1.0/modules/content/">
174 <title
>$site_title</title
>
175 <description
>$site_desc</description
>
176 <link>$site_url</link>
177 <language
>$lang</language
>
178 <lastBuildDate
>$now_rfc5322</lastBuildDate
>
179 <pubDate
>$now_rfc5322</pubDate
>
184 $out .= ($format eq 'atom') ?
'</feed>'
185 : ($format eq 'rss') ?
'</channel></rss>'
194 my $txt = do { local $/; <STDIN
> };
196 my $title = html_esc
($txt =~ /\A([^\n]+)/);
197 $title =~ s/^\s+|\s+$//g;
198 $title .= " — $author" if $title !~ /$author/;
200 my ($upd, $pub, $url) = $txt =~ /(.*)\r?\n(.*)\r?\n(.*)\r?\n?\z/;
201 ($upd) = $upd =~ /(?:updated|ویرایش): (.*)/ if $upd;
202 ($pub) = $pub =~ /(?:published|انتشار): (.*)/ if $pub;
203 ($url) = $url =~ /(?:plain text|متن ساده): (.*)/ if $url;
204 $url = 'https://bndl.org/bandali-cv.txt'
205 if (!$url and $title =~ /curriculum vitae/);
206 $url = html_esc
($url) if $url;
208 $txt = linkify
(html_esc
($txt));
211 my $upd_iso8601 = `date -Iseconds -ud '$upd' | tr -d \\\\n` if $upd;
212 my $pub_iso8601 = `date -Iseconds -ud '$pub' | tr -d \\\\n` if $pub;
213 my $pub_rfc5322 = `date -uRd '$pub' | tr -d \\\\n` if $pub;
214 my $url_html = $url =~ s/(?:[.]$lang)?[.]txt$/.html/r if $url;
215 $url_html =~ s
|/bandali-(.*)|/$1| if $url_html;
216 my $slug = $url_html =~ s
|.*/(.*)[.]html
$|$1|r
if $url_html;
217 my $note_id = "$feed_id:$slug" if $url_html;
220 if ($format eq 'html') {
223 . qq(<html lang
="$lang") . ($lang eq 'fa'
227 <meta http
-equiv
="Content-Type"
228 content
="text/html; charset=utf-8" />\n)
229 . "<title>$title</title>\n"
230 . qq(<link rel
="icon" href
="data:,">\n)
232 ?
qq(<link rel
="alternate" href
="$url"
233 title
="plain text" type
="text/plain" />\n)
235 . (($index and $lang eq 'en')
236 ?
qq(<link rel
="alternate" href
="https://bndl.org/fa/"
237 hreflang
="fa" title
="persian" />\n)
238 : ($index and $lang eq 'fa')
239 ?
qq(<link rel
="alternate" href
="https://bndl.org/"
240 hreflang
="en" title
="english" />\n)
242 . qq(<style
>\
@media(prefers
-color
-scheme
:dark
){
243 body
{background
:#1c1c1c;color:white;}a:link{color:#acdeff;}
244 a
:visited
{color
:#f8f;}a:active{color:#e00;}})
246 ?
qq(\n\
@font-face
{font
-family
:sahel
;font
-weight
:normal
;
247 src
:local('Sahel WOL'),local('Sahel'),
248 url
('sahel.woff2')format
('woff2');}pre
{font
-family
:sahel
})
251 . '</head><body><pre>';
252 } elsif ($format eq 'atom' or $format eq 'rss') {
254 ($format eq 'atom') ?
'updated'
255 : ($format eq 'rss') ?
'atom:updated'
258 "<$atom_updated>$upd_iso8601</$atom_updated>\n" if $upd;
259 $out .= ($format eq 'atom') ?
qq(
260 <entry xml
:base
="$site_url">
261 <author
><name
>$author</name></author
>
263 <published
>$pub_iso8601</published
>\n)
264 . ($updated ?
$updated : '') .
265 qq(<link href
="$url" rel
="alternate" type
="text/plain" />
266 <link href
="$url_html" rel
="alternate" type
="text/html" />
267 <title
>$title</title
>
268 <content type
="html"><![CDATA
[<pre
>)
269 : ($format eq 'rss') ?
qq(
271 <title
>$title</title
>
272 <link>$url_html</link>
273 <guid isPermaLink
="false">$note_id</guid
>
274 <pubDate
>$pub_rfc5322</pubDate
>\n)
275 . ($updated ?
$updated : '') .
276 qq(<content
:encoded
><![CDATA
[<pre
>)
282 if ($format eq 'html') {
283 $out .= '</pre></body></html>';
284 } elsif ($format eq 'atom') {
285 $out .= "</pre>]]></content></entry>";
286 } elsif ($format eq 'rss') {
287 $out .= "</pre>]]></content:encoded></item>";