#!/usr/bin/perl

my $x=0, $y=0, $full="";
my @complete;

open(HTML, "full.html") or die "No input file full.html!\n";
open(TITLE, "title_page.tex") or die "No title page title_page.tex!\n";

while (<TITLE>) { print $_; }

while (<HTML>) {
	$_="" if (m/LawCultureDifference|BODY BGCOLOR|BASE HREF/);
  s/.*DOCTYPE.*|.*Edit text.*|.*form method.*|.*\/form.*//g;
	s/<.?HEAD>|<.?HTML>|<hr>|<>|<.?p>//gi;
	s/<TITLE>LCDWiki: ([^<]*)<.*/\\section{\1}/gi;
	s/<A HREF=.*?>.*?<\/A>//gi;
  s/<H1>([^<]*)<.*/\\section{\1}/gi;
  s/<H2>([^<]*)<.*/\\subsection{\1}/gi;
  s/<H3>([^<]*)<.*/\\subsubsection*{\1}/gi;
  s/<H4>([^<]*)<.*/\\paragraph*{\1}/gi;
	s/&amp;/\\\&/gi;
	s/\s*sec\.\s*/ § /gi;
	s/\s*,/,/gi;
	s/<ul>/\\begin{itemize}/gi;
	s/<li>/\\item/gi;
	s/<dl>/\\begin{list}{}{\n\\rightmargin\n\\leftmargin\n}\n\\let \\savelabelitemi=\\labelitemi\n\\renewcommand\\labelitem[0]{}\n/gi;
  s/<dt><dd>/\\item /gi;
	s/<\/dl>/\\end{list}/gi;
	s/<\/ul>/\\end{itemize}/gi;
  s/…/--/g;
	s/_|#|%|\$/\\$&/gi;
	s/EMANCIPATION CHART HERE/\\newpage \\begin{embedded_pdf} \\includegraphics{emancipation_chart_1} \\newpage \\includegraphics{emancipation_chart_2} \\newpage \\includegraphics{emancipation_chart_3} \\newpage \\includegraphics{emancipation_chart_4} \\newpage \\includegraphics{emancipation_chart_5} \\newpage \\end{embedded_pdf} /; 
	s/CHART PLACED HERE/\\newpage \\begin{embedded_pdf} \\includegraphics{mature_minor_1} \\newpage \\includegraphics{mature_minor_2} \\newpage \\includegraphics{mature_minor_3} \\newpage \\includegraphics{mature_minor_4} \\newpage \\includegraphics{mature_minor_5} \\newpage \\end{embedded_pdf} /; 
	$complete[$x++] = $_;
}

$_ = join('|',@complete);

# A little bit of a hack here: if there are multiple subsequent footnotes, there needs to be a superscripted comma between them.
# There's probably a way to do this with a more concise regexp.

s/\s*fn[\W\s]*\[([^\]]*)\]\W*fn[\W\s]*\[([^\]]*)\]\W*fn[\W\s]*\[([^\]]*)\]\W*/%\r\\footnote{\1\r}\$^{, }\$\\footnote{\2\r}\$^{, }\$\\footnote{\3\r} /gis;
s/\s*fn[\W\s]*\[([^\]]*)\]\W*fn[\W\s]*\[([^\]]*)\]\W*/%\r\\footnote{\1\r}\$^{, }\$\\footnote{\2\r} /gis;
s/\s*fn[\W\s]*\[([^\]]*)\]/%\r\\footnote{\1\r} /gis;
s/<STYLE>.*?<\/STYLE>//gis;
s/<i>([^<]*?)<\/i>/\\emph{\1}/gis;
s/<em><strong>([^<]*?)<\/strong><\/em>/\\underline{\1}/gis;
s/<em>([^<]*?)<\/em>/\\emph{\1}/gis;
s/<strong>([^<]*?)<\/strong>/\\textbf{\1}/gis;
s/'(.{2,40}?)'([^s])/`\1'\2/gs;
s/"(.{2,120}?)"/``\1''/gs;
s/“/``/gm;
s/‘/`/gm;
s/”/''/gm;
s/’/'/gm;
s/&lt;/</gm;
s/&gt;/>/gm;
s/appendix A/Appendix A/gm;
@complete = split /\|/,$_;

while ( $y <= $x ) {
  print $complete[$y++];
}

print '\end{document}';


syntax highlighted by Code2HTML, v. 0.9.1