#!/usr/bin/env perl
# txt2html --- simple script to convert my site's txt files to html
# Copyright (C) 2014-2021 all contributors
# Copyright (c) 2021 Amin Bandali
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see .
# This simple script borrows from the wonderful `txt2pre' from
# public-inbox.git, under AGPLv3+, with a few additions of my own.
use strict;
use warnings 'all';
use Getopt::Long;
my $opt_lang = 'en';
my $opt_index;
GetOptions ('lang=s' => \$opt_lang,
'index' => \$opt_index)
or die("bad command line arguments\n");
my $link_re =
qr{([\('!])?\b((?:ftps?|https?|nntps?|imaps?|s?news|gopher)://
[\@:\w\.-]+(?:/
(?:[a-z0-9\-\._~!\$\&\';\(\)\*\+,;=:@/%]*)
(?:\?[a-z0-9\-\._~!\$\&\';\(\)\*\+,;=:@/%]+)?
(?:\#[a-z0-9\-\._~!\$\&\';\(\)\*\+,;=:@/%\?]+)?
)?
)}xi;
my %pairs = (
"(" => qr/(\)[\.,;\+]?)\z/, # Markdown (,), Ruby (+) (, for arrays)
"'" => qr/('[\.,;\+]?)\z/, # Perl / Ruby
"!" => qr/(![\.,;\+]?)\z/, # Perl / Ruby
);
my %html_map = (
'&' => '&',
'<' => '<',
'>' => '>',
# '"' => '"',
# "'" => ''',
);
sub html_esc {
my ($s) = @_;
$s =~ s/([&<>])/$html_map{$1}/sge;
$s;
}
sub linkify {
my ($s) = @_;
$s =~ s^$link_re^
my $beg = $1 || '';
my $url = $2;
my $end = '';
# it's fairly common to end URLs in messages with
# '.', ',' or ';' to denote the end of a statement;
# assume the intent was to end the statement/sentence
# in English
if (defined(my $re = $pairs{$beg})) {
if ($url =~ s/$re//) {
$end = $1;
}
} elsif ($url =~ s/(\))?([\.,;])\z//) {
$end = $2;
# require ')' to be paired with '('
if (defined $1) { # ')'
if (index($url, '(') < 0) {
$end = ")$end";
} else {
$url .= ')';
}
}
} elsif ($url !~ /\(/ && $url =~ s/\)\z//) {
$end = ')';
}
$beg . "$url" . $end;
^geo;
$s;
}
my $txt = do { local $/; };
my $title = html_esc($txt =~ /\A([^\n]+)/);
$title =~ s/^\s+|\s+$//g;
if ($opt_lang eq 'fa') {
$title .= ' — بندعلی' if $title !~ /بندعلی/;
} else {
$title .= ' — bandali' if $title !~ /bandali/;
}
my ($url) = $txt =~ /(?:plain text|متن ساده): (.*)\r?\n?\z/;
$url = 'https://bndl.org/bandali-cv.txt'
if (!$url and $title =~ /curriculum vitae/);
$url = html_esc($url) if $url;
$txt = linkify(html_esc($txt));
print("",
qq(",
qq(
\n),
"$title\n",
qq(\n),
$url ? qq(\n) : '',
($opt_index and $opt_lang eq 'en')
? qq(\n)
: ($opt_index and $opt_lang eq 'fa')
? qq(\n)
: '',
qq(\n",
"