#!/usr/bin/env perl # txt2html --- simple script to convert my site's txt files to html # Copyright (C) 2014-2021 all contributors # Copyright (c) 2021 Amin Bandali # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as # published by the Free Software Foundation, either version 3 of the # License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . # This simple script borrows from the wonderful `txt2pre' from # public-inbox.git, under AGPLv3+, with a few additions of my own. use strict; use warnings 'all'; use Getopt::Long; my $opt_lang = 'en'; my $opt_index; GetOptions ('lang=s' => \$opt_lang, 'index' => \$opt_index) or die("bad command line arguments\n"); my $link_re = qr{([\('!])?\b((?:ftps?|https?|nntps?|imaps?|s?news|gopher):// [\@:\w\.-]+(?:/ (?:[a-z0-9\-\._~!\$\&\';\(\)\*\+,;=:@/%]*) (?:\?[a-z0-9\-\._~!\$\&\';\(\)\*\+,;=:@/%]+)? (?:\#[a-z0-9\-\._~!\$\&\';\(\)\*\+,;=:@/%\?]+)? )? )}xi; my %pairs = ( "(" => qr/(\)[\.,;\+]?)\z/, # Markdown (,), Ruby (+) (, for arrays) "'" => qr/('[\.,;\+]?)\z/, # Perl / Ruby "!" => qr/(![\.,;\+]?)\z/, # Perl / Ruby ); my %html_map = ( '&' => '&', '<' => '<', '>' => '>', # '"' => '"', # "'" => ''', ); sub html_esc { my ($s) = @_; $s =~ s/([&<>])/$html_map{$1}/sge; $s; } sub linkify { my ($s) = @_; $s =~ s^$link_re^ my $beg = $1 || ''; my $url = $2; my $end = ''; # it's fairly common to end URLs in messages with # '.', ',' or ';' to denote the end of a statement; # assume the intent was to end the statement/sentence # in English if (defined(my $re = $pairs{$beg})) { if ($url =~ s/$re//) { $end = $1; } } elsif ($url =~ s/(\))?([\.,;])\z//) { $end = $2; # require ')' to be paired with '(' if (defined $1) { # ')' if (index($url, '(') < 0) { $end = ")$end"; } else { $url .= ')'; } } } elsif ($url !~ /\(/ && $url =~ s/\)\z//) { $end = ')'; } $beg . "$url" . $end; ^geo; $s; } my $txt = do { local $/; }; my $title = html_esc($txt =~ /\A([^\n]+)/); $title =~ s/^\s+|\s+$//g; if ($opt_lang eq 'fa') { $title .= ' — بندعلی' if $title !~ /بندعلی/; } else { $title .= ' — bandali' if $title !~ /bandali/; } my ($upd, $pub, $url) = $txt =~ /(.*)\r?\n(.*)\r?\n(.*)\r?\n?\z/; ($upd) = $upd =~ /(?:updated|ویرایش): (.*)/ if $upd; ($pub) = $pub =~ /(?:published|انتشار): (.*)/ if $pub; ($url) = $url =~ /(?:plain text|متن ساده): (.*)/ if $url; $url = 'https://bndl.org/bandali-cv.txt' if (!$url and $title =~ /curriculum vitae/); $url = html_esc($url) if $url; $txt = linkify(html_esc($txt)); print("", qq(", qq( \n), "$title\n", qq(\n), $url ? qq(\n) : '', ($opt_index and $opt_lang eq 'en') ? qq(\n) : ($opt_index and $opt_lang eq 'fa') ? qq(\n) : '', qq(\n", "
$txt
\n"); STDOUT->flush;