| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580 |
- #!/usr/bin/perl
- # Inspired by latexpand by D. Musliner, University of Michigan
- # 2012, 2013, 2014, 2015, 2016, 2017: Matthieu Moy <git@matthieu-moy.fr>
- # BSD License
- use strict;
- use Cwd;
- use Getopt::Long;
- use IO::Handle;
- use File::Spec;
- my $TEXINPUTS = $ENV{'TEXINPUTS'};
- if (!$TEXINPUTS) { $TEXINPUTS = getcwd(); }
- my $verbose;
- my $keep_comments;
- my $keep_includes;
- my $empty_comments;
- my $help;
- my $long_help;
- my $output;
- my $explain;
- my $show_graphics;
- my $graphics_extensions = ":.pdf:.png:.jpg:.eps";
- my $expand_usepackage;
- my $expand_bbl;
- my $fatal;
- my $version;
- my $makeatletter;
- my $inside_import;
- my $in_enc = "bytes";
- my $out_enc = "bytes";
- GetOptions (
- 'h' => \$help,
- 'help' => \$long_help,
- 'verbose|v' => \$verbose,
- 'keep-comments' => \$keep_comments,
- 'keep-includes' => \$keep_includes,
- 'empty-comments' => \$empty_comments,
- 'output|o=s' => \$output,
- 'explain' => \$explain,
- 'show-graphics' => \$show_graphics,
- 'graphics-extensions' => \$graphics_extensions,
- 'expand-usepackage' => \$expand_usepackage,
- 'expand-bbl=s' => \$expand_bbl,
- 'fatal' => \$fatal,
- 'version' => \$version,
- 'makeatletter' => \$makeatletter,
- 'in-encoding=s' => \$in_enc,
- 'out-encoding=s' => \$out_enc,
- ) or pod2usage_wrapper(2);
- version() if $version;
- pod2usage_wrapper(0) if $help;
- pod2usage_wrapper(-exitstatus => 0, -output => \*STDOUT, -verbose => 2) if $long_help;
- sub pod2usage_wrapper
- {
- # Like pod2usage, but fall back to a simpler implem in case
- # pod2usage can't be found.
- if (eval {require Pod::Usage;1;} ne 1) {
- print "Please install perldoc and Pod::Usage to get proper help.\n";
- my $started = 0;
- open (my $in, '<', "$0") or die $!;
- while (<$in>) {
- if ($started) {
- print;
- }
- if (/^__END__$/) {
- $started = 1;
- }
- }
- } else {
- Pod::Usage->import();
- pod2usage(@_);
- }
- }
- sub get_version
- {
- # $VERSION's value will be substituted by 'make dist', but the
- # next line won't (the string has to be broken to avoid it).
- my $VERSION = 'v1.5';
- if ($VERSION eq '@LATEXPAND' . '_VERSION@') {
- my($vol,$dir,$file) = File::Spec->splitpath($0);
- chdir($dir);
- $VERSION = `git describe --tags HEAD 2>/dev/null`;
- }
- if ($VERSION eq '') {
- $VERSION = '<unknown version>';
- }
- $VERSION =~ s/^\s+|\s+$//g;
- return $VERSION;
- }
- sub version
- {
- print "latexpand version ". get_version() .".\n";
- exit(0);
- }
- my $nl = "";
- if ($empty_comments) {
- $nl = "%\n";
- }
- if ($output && $output ne "-") {
- open (my $OUTPUT, '>', "$output") or die $!;
- STDOUT->fdopen(\*$OUTPUT, 'w') or die $!;
- }
- sub say
- {
- if ($verbose) {
- print STDERR "$_[0]";
- }
- }
- my $makeatletter_found;
- my $in_preamble;
- use open IN => ":$in_enc", OUT => ":$out_enc";
- foreach my $file (@ARGV)
- {
- say "processing $file\n";
- $makeatletter_found = 0;
- $in_preamble = 1;
- $inside_import = "";
- process_file($file, " ");
- }
- sub process_file
- {
- my $file = shift;
- my $prefix = (shift || "");
- my $in_comment = 0;
- open(my $FILE, "<", $file) or die "could not open input file '$file'\n";
- my $commented_newline = 0;
- while (my $line = <$FILE>) {
- if ($line =~ /^[ \t]*\\endinput/) {
- $line =~ s/(\\endinput.*)\n/% $1/;
- $in_comment = 1;
- process_line($line, $prefix, \$commented_newline);
- last;
- }
- process_line($line, $prefix, \$commented_newline, $file);
- if ($line =~ /^%.*[^\n]\z/ || $line =~ /[^\\]%.*[^\n]\z/) {
- # file ends with a comment not ending with a newline
- print "\n";
- }
- # Garbage at end of line after \end{document} is
- # ignored by LaTeX, but we don't allow anything before
- # to avoid e.g. \verb|\end{document}| from terminating
- # the file.
- if (!$keep_comments && $line =~ /^[ \t]*\\end\{document\}/) {
- last;
- }
- }
- close($FILE);
- return $in_comment;
- }
- sub process_line
- {
- my ($line, $prefix, $commented_newline, $file) = @_;
- $_ = $line;
- if ($$commented_newline) {
- # Leading whitespaces after a comment is ignored.
- # There's no space in:
- # Line 1%
- # Line 2.
- # Match just space and tabs (\s would match \n)
- s/^[ \t]*//;
- if (/^$/) {
- # Deal with:
- #
- # Line 1 % comment
- #
- # Line 2
- #
- # The newline after Line 1 is commented, but we still
- # want a new paragraph. We strip the comment together
- # with its newline, but re-add a newline to chnge
- # paragraph here if needed:
- print "\n";
- }
- }
- $$commented_newline = 0;
- # Consider \makeatletter only in preamble, because we do want
- # to warn on \someCommand{\makeatletter\command@with@arobase}.
- if ($in_preamble && /^[^%]*\\makeatletter/) {
- $makeatletter_found = 1;
- }
- if ($in_preamble && /^[^%]*\\makeatother/) {
- $makeatletter_found = 0;
- }
- if (!$makeatletter && !$makeatletter_found
- && (my ($command) = /^[^%]*(\\[[:alpha:]]*@[[:alpha:]]*)/)) {
- print STDERR "Warning: command $command containing @ found in\n";
- print STDERR "Warning: $file.\n";
- print STDERR "Warning: consider using --makeatletter if the result is not compilable.\n";
- }
- # non-comment is a sequence of:
- # - escaped character (\\.), including \% and \\
- # - neither '%' nor '\'.
- my $NON_COMMENT = '([^\\\\%]|\\\\.)*';
- unless ($keep_comments) {
- if (!$empty_comments) {
- # Include \n in pattern to avoid matching
- # comments at end of files
- # remove comments + whitespace-only lines completely
- if (s/^\s*%.*\n//) {
- $$commented_newline = 1;
- }
- # Special-case commands at end of line. We
- # don't want "\\foo%\nbar" to become
- # "\\foobar"
- if (s/^($NON_COMMENT\\[[:alpha:]@]+)%.*\n/$1 /) {
- $$commented_newline = 1;
- } elsif (s/^($NON_COMMENT)%.*\n/$1/) {
- # remove only the comment if the line has actual content
- $$commented_newline = 1;
- }
- }
- # Apply the "empty comments" treatment unconditionally
- # for comments not matched above (it doesn't harm to
- # keep an empty comment sometimes, but it may harm to
- # leave a real comment if the goal was to strip them).
- s/^(([^\\%]|\\.)*)%.*$/$1%/;
- }
- unless ($keep_includes) {
- if (my ($before, $ignored, $full_filename, $after)
- = /^($NON_COMMENT)\\include[{\s]+(.*?)[\s}](.*)$/) {
- $full_filename = find_tex_file($full_filename . ".tex");
- if ($full_filename) {
- say $prefix . "Found include for file: $full_filename\n";
- print $before . $nl;
- print '\clearpage{}' . $nl;
- print "% start include $full_filename\n" if ($explain);
- my $in_comment = process_file($full_filename, $prefix . " ");
- if ($explain) {
- print " % end include $full_filename\n";
- } elsif ($in_comment) {
- print "\n";
- }
- print '\clearpage{}' . $nl;
- print $nl . $after . "\n";
- $_ = "";
- }
- } elsif (my ($before, $ignored, $full_filename, $after)
- = /^($NON_COMMENT)\\input[{\s]+(.*?)[\s}](.*)$/) {
- if ($inside_import) {
- $full_filename = $inside_import . $full_filename;
- }
- $full_filename = find_tex_file($full_filename, ":.tex");
- if ($full_filename) {
- say $prefix . "Found input for file: $full_filename\n";
- print $before . $nl;
- print "% start input $full_filename\n" if ($explain);
- my $in_comment = process_file($full_filename, $prefix . " ");
- if ($explain) {
- print " % end input $full_filename\n";
- } elsif ($in_comment) {
- print "\n";
- }
- if ($after =~ /[^\s]/) {
- # LaTeX produces this space, so let's do it also
- print " " . $nl . $after . "\n";
- } else {
- print " ";
- }
- $_ = "";
- }
- } elsif (my ($before, $ignored, $dir, $full_filename, $after)
- = /^($NON_COMMENT)\\(?:sub)?import[{\s]+(.*?)[\s}][{\s]+(.*?)[\s}](.*)$/) {
- if ($explain) {
- print "% dir " . $dir ."\n";
- print "% full_filename " . $full_filename ."\n";
- print "% after " . $after ."\n";
- print "% inside_import $inside_import\n";
- }
- $full_filename = $dir . $full_filename;
- if ($inside_import) {
- $full_filename = $inside_import . $full_filename;
- }
- print "% cat(inside_import,dir,full_filename) " . $full_filename ."\n" if ($explain);
- $full_filename = find_tex_file($full_filename, ":.tex");
- if ($full_filename) {
- say $prefix . "Found input for file: $full_filename\n";
- print $before . $nl;
- print "% start input $full_filename\n" if ($explain);
- my $previous_import_dir = $inside_import;
- $inside_import = $inside_import . $dir;
- my $in_comment = process_file($full_filename, $prefix . " ");
- $inside_import = $previous_import_dir;
- if ($explain) {
- print " % end input $full_filename\n";
- } elsif ($in_comment) {
- print "\n";
- }
- if ($after =~ /[^\s]/) {
- # LaTeX produces this space, so let's do it also
- print " " . $nl . $after . "\n";
- } else {
- print " ";
- }
- $_ = "";
- }
- } elsif (my ($before, $ignored, $args, $full_filename, $after)
- = /^($NON_COMMENT)\\includegraphics[\[\s]+(.*?)[\s\]][{\s]+(.*?)[\s}](.*)$/) {
- if ($explain) {
- print "% inside_import " . $inside_import ."\n";
- print "% before " . $before ."\n";
- print "% ignored " . $ignored ."\n";
- print "% args " . $args ."\n";
- print "% full_filename " . $full_filename ."\n";
- print "% after " . $after ."\n";
- }
- if ($inside_import) {
- $full_filename = $inside_import . $full_filename;
- print "$before\\includegraphics[$args]{$full_filename}$after\n";
- $_ = "";
- }
- } elsif (my ($before, $ignored, $args, $full_filename, $after)
- = /^($NON_COMMENT)\\lstinputlisting[\[\s]+(.*?)[\s\]][{\s]+(.*?)[\s}](.*)$/) {
- if ($explain) {
- print "% inside_import " . $inside_import ."\n";
- print "% before " . $before ."\n";
- print "% ignored " . $ignored ."\n";
- print "% args " . $args ."\n";
- print "% full_filename " . $full_filename ."\n";
- print "% after " . $after ."\n";
- }
- if ($inside_import) {
- $full_filename = $inside_import . $full_filename;
- print "$before\\lstinputlisting[$args]{$full_filename}$after\n";
- $_ = "";
- }
- }
- }
- if ($expand_usepackage) {
- # Don't bother with before and after text, we just require the
- # usepackage to be alone on its line.
- if (my ($package_name) = /^\s*\\usepackage\{([^\}]*)\}\s*(%.*)?$/) {
- my $full = find_file($package_name . ".sty", $TEXINPUTS);
- if ($full) {
- say $prefix . "Found package file: $full\n";
- process_file($full, $prefix . " ");
- $_ = "";
- # Forget about any commented newline
- # before the \usepackage:
- $$commented_newline = 0;
- } else {
- say $prefix . "Not including external package $package_name\n";
- }
- }
- }
- if ($expand_bbl) {
- if (my ($before, $bib_name, $after)
- = /^(.*)\\(?:bibliography|bibselect)\{([^\}]*)\}(.*)$/) {
- # The BBL file is not necessarily $bib_name.
- # Take it from the command-line.
- print $before . $nl;
- say $prefix . "Expanding BBL file: $expand_bbl\n";
- process_file($expand_bbl, $prefix . " ");
- print " " . $nl . $after . "\n";
- $_ = "";
- }
- }
- if ($show_graphics) {
- if (/\\includegraphics(\[[^\]]*\])?{([^}]*)}/) {
- my $full_filename = $2;
- if ($inside_import) {
- $full_filename = $inside_import . $full_filename;
- }
- my $full = find_tex_file($full_filename, $graphics_extensions);
- say $prefix . "needs graphics file: ";
- print STDERR "$full\n";
- }
- }
- if (/^[ \t]*\\begin\{document\}/) {
- $in_preamble = 0;
- if ($makeatletter) {
- print '\makeatletter' . $nl;
- }
- }
- print;
- }
- # search $1 in $TEXINPUTS, with possible extensions in $2
- sub find_tex_file
- {
- my $file = shift;
- my $extensions = (shift || ":");
- foreach my $ext (split(':', $extensions, -1)) {
- my $full = find_file_global($file . $ext);
- if ($full) {
- return $full;
- }
- }
- if ($fatal) {
- die "ERROR: Could not find file [$file]\n";
- } else {
- print STDERR "Warning: Could not find file [$file]\n";
- return;
- }
- }
- sub find_file_global
- {
- my $file = shift;
- if (open(my $fh, "-|", "kpsewhich", $file)) {
- my $full = <$fh>;
- chomp($full);
- close($fh);
- if ($full) {
- return $full;
- }
- }
- return find_file($file, $TEXINPUTS);
- }
- sub find_file
- {
- my ($file, $path) = @_;
- if (File::Spec->file_name_is_absolute($file)) {
- if (-e "$file" && ! -d "$file") {
- return $file;
- } else {
- return;
- }
- }
- foreach my $dir (split(':', $path)) {
- if (-e "$dir/$file" && ! -d "$dir/$file") {
- return("$dir/$file");
- }
- }
- return;
- }
- __END__
- =head1 NAME
- latexpand - Flatten LaTeX file by expanding \include and \input, ... and remove comments
- =head1 SYNOPSIS
- latexpand [options] FILE...
- =head2 Options:
- --verbose show what's going on
- --keep-comments don't strip comments (comments are lines
- starting with %, and anything below
- \end{document})
- --empty-comments keep empty comments (i.e. % at end of lines) for clarity
- --keep-includes don't expand \input and \include directives
- --expand-usepackage
- Expand \usepackage{...} directives if the
- corresponding .sty file is found in
- $TEXINPUTS (or the current directory if
- $TEXINPUTS is not set)
- --expand-bbl FILE
- Expand the bibliography by inlining FILE
- (should be a *.bbl file)
- --help this help message
- --output <file>, -o <file>
- generate output in <file>
- --explain generate explanatory comments in output
- --show-graphics show included graphics
- --graphics_extensions
- colon-separated list of possible graphics extensions
- (used by --show-graphics to find the actual graphics files)
- --fatal Die in case a file can't be found.
- --makeatletter Insert a \makeatletter in the preamble. In some
- rare cases it may break your document, but it
- may help fixing bad interactions between
- @-commands and inclusion (see BUGS section).
- --in-encoding FMT, --out-encoding FMT
- File encoding used by input and output files.
- This uses the same syntax as PerlIO's layers.
- Example:
- --in-encoding 'encoding(UTF-8)'
- The default is 'bytes' and should always work.
- =head1 USES
- The most common use of latexpand is to simplify distribution of source
- LaTeX files, typically to satisfy the requirement of editors and
- archival sites (springer, arXiv.org, ...) who force the authors to
- submit sources. One does not necessarily want to submit sources with
- comments, and uploading a document made of several files including
- each other is a bit painful. By default, latexpand answers both
- problems by outputing a single LaTeX file that contain no comment.
- =head1 GETTING LATEXPAND
- The latest version of latexpand is available here:
- https://gitlab.com/latexpand/latexpand
- Versions are uploaded to ctan.org from time to time:
- http://www.ctan.org/pkg/latexpand
- =head1 BUGS
- Please, report bugs on the issue tracker on the project site:
- https://gitlab.com/latexpand/latexpand/issues
- =head2 Known bugs
- =head3 Verbatim
- latexpand currently ignores \begin{verbatim} ... \end{verbatim}, and
- will therefore process any \include, \input, ... directives that
- appear within verbatim environments (while it shouldn't).
- LaTeX comments inside verbatim environments are also incorrectly
- stripped. You can use --keep-comments as a workaround to avoid this.
- =head3 Comment environment
- It would be nice to remove code between \begin{comment} and
- \end{comment} too if \usepackage{comment} is used.
- Code like
- foo%
- \begin{comment}
- will produce the incorrect
- foo\begin{comment}
- A workaround is to use --empty-comments when such tricky usage of the
- comments package is done.
- =head3 \makeatletter and use with transfig/xfig with \scalebox{}
- If \input{} or \include{} appears as argument to a command, and the
- file included contains \makeatletter, then after expansion, the
- \makeatletter and the @-command appear as argument to the command,
- which is forbidden because the argument is parsed (and the @-command
- badly tokenized) before being executed.
- This happens with
- \scalebox{ \input{file-generated-by-xfig.pdf_t} }
- Workaround: add \makeatletter before the scalebox manually in your
- code, like
- \makeatletter{}
- \scalebox{ \input{file-generated-by-xfig.pdf_t} }
- \makeatother{}
- In the case of xfig generated files, it is necessary only for the
- first occurence.
- A more brute-force workaround is to use latexpand --makeatletter.
- =head1 SEE ALSO
- Instructions to include only the relevant .bib items (french):
- https://lacl.fr/~caubert/notes/portabilite-du-tex.html#dependances
- =head1 VERSION
- This is latexpand version v1.5.
|