#!/usr/bin/perl
my $x=0, $y=0, $full="";
my @complete;
open(HTML, "full.html") or die "No input file full.html!\n";
open(TITLE, "title_page.tex") or die "No title page title_page.tex!\n";
while (<TITLE>) { print $_; }
while (<HTML>) {
$_="" if (m/LawCultureDifference|BODY BGCOLOR|BASE HREF/);
s/.*DOCTYPE.*|.*Edit text.*|.*form method.*|.*\/form.*//g;
s/<.?HEAD>|<.?HTML>|<hr>|<>|<.?p>//gi;
s/<TITLE>LCDWiki: ([^<]*)<.*/\\section{\1}/gi;
s/<A HREF=.*?>.*?<\/A>//gi;
s/<H1>([^<]*)<.*/\\section{\1}/gi;
s/<H2>([^<]*)<.*/\\subsection{\1}/gi;
s/<H3>([^<]*)<.*/\\subsubsection*{\1}/gi;
s/<H4>([^<]*)<.*/\\paragraph*{\1}/gi;
s/&/\\\&/gi;
s/\s*sec\.\s*/ § /gi;
s/\s*,/,/gi;
s/<ul>/\\begin{itemize}/gi;
s/<li>/\\item/gi;
s/<dl>/\\begin{list}{}{\n\\rightmargin\n\\leftmargin\n}\n\\let \\savelabelitemi=\\labelitemi\n\\renewcommand\\labelitem[0]{}\n/gi;
s/<dt><dd>/\\item /gi;
s/<\/dl>/\\end{list}/gi;
s/<\/ul>/\\end{itemize}/gi;
s/…/--/g;
s/_|#|%|\$/\\$&/gi;
s/EMANCIPATION CHART HERE/\\newpage \\begin{embedded_pdf} \\includegraphics{emancipation_chart_1} \\newpage \\includegraphics{emancipation_chart_2} \\newpage \\includegraphics{emancipation_chart_3} \\newpage \\includegraphics{emancipation_chart_4} \\newpage \\includegraphics{emancipation_chart_5} \\newpage \\end{embedded_pdf} /;
s/CHART PLACED HERE/\\newpage \\begin{embedded_pdf} \\includegraphics{mature_minor_1} \\newpage \\includegraphics{mature_minor_2} \\newpage \\includegraphics{mature_minor_3} \\newpage \\includegraphics{mature_minor_4} \\newpage \\includegraphics{mature_minor_5} \\newpage \\end{embedded_pdf} /;
$complete[$x++] = $_;
}
$_ = join('|',@complete);
# A little bit of a hack here: if there are multiple subsequent footnotes, there needs to be a superscripted comma between them.
# There's probably a way to do this with a more concise regexp.
s/\s*fn[\W\s]*\[([^\]]*)\]\W*fn[\W\s]*\[([^\]]*)\]\W*fn[\W\s]*\[([^\]]*)\]\W*/%\r\\footnote{\1\r}\$^{, }\$\\footnote{\2\r}\$^{, }\$\\footnote{\3\r} /gis;
s/\s*fn[\W\s]*\[([^\]]*)\]\W*fn[\W\s]*\[([^\]]*)\]\W*/%\r\\footnote{\1\r}\$^{, }\$\\footnote{\2\r} /gis;
s/\s*fn[\W\s]*\[([^\]]*)\]/%\r\\footnote{\1\r} /gis;
s/<STYLE>.*?<\/STYLE>//gis;
s/<i>([^<]*?)<\/i>/\\emph{\1}/gis;
s/<em><strong>([^<]*?)<\/strong><\/em>/\\underline{\1}/gis;
s/<em>([^<]*?)<\/em>/\\emph{\1}/gis;
s/<strong>([^<]*?)<\/strong>/\\textbf{\1}/gis;
s/'(.{2,40}?)'([^s])/`\1'\2/gs;
s/"(.{2,120}?)"/``\1''/gs;
s/“/``/gm;
s/‘/`/gm;
s/”/''/gm;
s/’/'/gm;
s/</</gm;
s/>/>/gm;
s/appendix A/Appendix A/gm;
@complete = split /\|/,$_;
while ( $y <= $x ) {
print $complete[$y++];
}
print '\end{document}';
syntax highlighted by Code2HTML, v. 0.9.1