latexpand 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580
  1. #!/usr/bin/perl
  2. # Inspired by latexpand by D. Musliner, University of Michigan
  3. # 2012, 2013, 2014, 2015, 2016, 2017: Matthieu Moy <git@matthieu-moy.fr>
  4. # BSD License
  5. use strict;
  6. use Cwd;
  7. use Getopt::Long;
  8. use IO::Handle;
  9. use File::Spec;
  10. my $TEXINPUTS = $ENV{'TEXINPUTS'};
  11. if (!$TEXINPUTS) { $TEXINPUTS = getcwd(); }
  12. my $verbose;
  13. my $keep_comments;
  14. my $keep_includes;
  15. my $empty_comments;
  16. my $help;
  17. my $long_help;
  18. my $output;
  19. my $explain;
  20. my $show_graphics;
  21. my $graphics_extensions = ":.pdf:.png:.jpg:.eps";
  22. my $expand_usepackage;
  23. my $expand_bbl;
  24. my $fatal;
  25. my $version;
  26. my $makeatletter;
  27. my $inside_import;
  28. my $in_enc = "bytes";
  29. my $out_enc = "bytes";
  30. GetOptions (
  31. 'h' => \$help,
  32. 'help' => \$long_help,
  33. 'verbose|v' => \$verbose,
  34. 'keep-comments' => \$keep_comments,
  35. 'keep-includes' => \$keep_includes,
  36. 'empty-comments' => \$empty_comments,
  37. 'output|o=s' => \$output,
  38. 'explain' => \$explain,
  39. 'show-graphics' => \$show_graphics,
  40. 'graphics-extensions' => \$graphics_extensions,
  41. 'expand-usepackage' => \$expand_usepackage,
  42. 'expand-bbl=s' => \$expand_bbl,
  43. 'fatal' => \$fatal,
  44. 'version' => \$version,
  45. 'makeatletter' => \$makeatletter,
  46. 'in-encoding=s' => \$in_enc,
  47. 'out-encoding=s' => \$out_enc,
  48. ) or pod2usage_wrapper(2);
  49. version() if $version;
  50. pod2usage_wrapper(0) if $help;
  51. pod2usage_wrapper(-exitstatus => 0, -output => \*STDOUT, -verbose => 2) if $long_help;
  52. sub pod2usage_wrapper
  53. {
  54. # Like pod2usage, but fall back to a simpler implem in case
  55. # pod2usage can't be found.
  56. if (eval {require Pod::Usage;1;} ne 1) {
  57. print "Please install perldoc and Pod::Usage to get proper help.\n";
  58. my $started = 0;
  59. open (my $in, '<', "$0") or die $!;
  60. while (<$in>) {
  61. if ($started) {
  62. print;
  63. }
  64. if (/^__END__$/) {
  65. $started = 1;
  66. }
  67. }
  68. } else {
  69. Pod::Usage->import();
  70. pod2usage(@_);
  71. }
  72. }
  73. sub get_version
  74. {
  75. # $VERSION's value will be substituted by 'make dist', but the
  76. # next line won't (the string has to be broken to avoid it).
  77. my $VERSION = 'v1.5';
  78. if ($VERSION eq '@LATEXPAND' . '_VERSION@') {
  79. my($vol,$dir,$file) = File::Spec->splitpath($0);
  80. chdir($dir);
  81. $VERSION = `git describe --tags HEAD 2>/dev/null`;
  82. }
  83. if ($VERSION eq '') {
  84. $VERSION = '<unknown version>';
  85. }
  86. $VERSION =~ s/^\s+|\s+$//g;
  87. return $VERSION;
  88. }
  89. sub version
  90. {
  91. print "latexpand version ". get_version() .".\n";
  92. exit(0);
  93. }
  94. my $nl = "";
  95. if ($empty_comments) {
  96. $nl = "%\n";
  97. }
  98. if ($output && $output ne "-") {
  99. open (my $OUTPUT, '>', "$output") or die $!;
  100. STDOUT->fdopen(\*$OUTPUT, 'w') or die $!;
  101. }
  102. sub say
  103. {
  104. if ($verbose) {
  105. print STDERR "$_[0]";
  106. }
  107. }
  108. my $makeatletter_found;
  109. my $in_preamble;
  110. use open IN => ":$in_enc", OUT => ":$out_enc";
  111. foreach my $file (@ARGV)
  112. {
  113. say "processing $file\n";
  114. $makeatletter_found = 0;
  115. $in_preamble = 1;
  116. $inside_import = "";
  117. process_file($file, " ");
  118. }
  119. sub process_file
  120. {
  121. my $file = shift;
  122. my $prefix = (shift || "");
  123. my $in_comment = 0;
  124. open(my $FILE, "<", $file) or die "could not open input file '$file'\n";
  125. my $commented_newline = 0;
  126. while (my $line = <$FILE>) {
  127. if ($line =~ /^[ \t]*\\endinput/) {
  128. $line =~ s/(\\endinput.*)\n/% $1/;
  129. $in_comment = 1;
  130. process_line($line, $prefix, \$commented_newline);
  131. last;
  132. }
  133. process_line($line, $prefix, \$commented_newline, $file);
  134. if ($line =~ /^%.*[^\n]\z/ || $line =~ /[^\\]%.*[^\n]\z/) {
  135. # file ends with a comment not ending with a newline
  136. print "\n";
  137. }
  138. # Garbage at end of line after \end{document} is
  139. # ignored by LaTeX, but we don't allow anything before
  140. # to avoid e.g. \verb|\end{document}| from terminating
  141. # the file.
  142. if (!$keep_comments && $line =~ /^[ \t]*\\end\{document\}/) {
  143. last;
  144. }
  145. }
  146. close($FILE);
  147. return $in_comment;
  148. }
  149. sub process_line
  150. {
  151. my ($line, $prefix, $commented_newline, $file) = @_;
  152. $_ = $line;
  153. if ($$commented_newline) {
  154. # Leading whitespaces after a comment is ignored.
  155. # There's no space in:
  156. # Line 1%
  157. # Line 2.
  158. # Match just space and tabs (\s would match \n)
  159. s/^[ \t]*//;
  160. if (/^$/) {
  161. # Deal with:
  162. #
  163. # Line 1 % comment
  164. #
  165. # Line 2
  166. #
  167. # The newline after Line 1 is commented, but we still
  168. # want a new paragraph. We strip the comment together
  169. # with its newline, but re-add a newline to chnge
  170. # paragraph here if needed:
  171. print "\n";
  172. }
  173. }
  174. $$commented_newline = 0;
  175. # Consider \makeatletter only in preamble, because we do want
  176. # to warn on \someCommand{\makeatletter\command@with@arobase}.
  177. if ($in_preamble && /^[^%]*\\makeatletter/) {
  178. $makeatletter_found = 1;
  179. }
  180. if ($in_preamble && /^[^%]*\\makeatother/) {
  181. $makeatletter_found = 0;
  182. }
  183. if (!$makeatletter && !$makeatletter_found
  184. && (my ($command) = /^[^%]*(\\[[:alpha:]]*@[[:alpha:]]*)/)) {
  185. print STDERR "Warning: command $command containing @ found in\n";
  186. print STDERR "Warning: $file.\n";
  187. print STDERR "Warning: consider using --makeatletter if the result is not compilable.\n";
  188. }
  189. # non-comment is a sequence of:
  190. # - escaped character (\\.), including \% and \\
  191. # - neither '%' nor '\'.
  192. my $NON_COMMENT = '([^\\\\%]|\\\\.)*';
  193. unless ($keep_comments) {
  194. if (!$empty_comments) {
  195. # Include \n in pattern to avoid matching
  196. # comments at end of files
  197. # remove comments + whitespace-only lines completely
  198. if (s/^\s*%.*\n//) {
  199. $$commented_newline = 1;
  200. }
  201. # Special-case commands at end of line. We
  202. # don't want "\\foo%\nbar" to become
  203. # "\\foobar"
  204. if (s/^($NON_COMMENT\\[[:alpha:]@]+)%.*\n/$1 /) {
  205. $$commented_newline = 1;
  206. } elsif (s/^($NON_COMMENT)%.*\n/$1/) {
  207. # remove only the comment if the line has actual content
  208. $$commented_newline = 1;
  209. }
  210. }
  211. # Apply the "empty comments" treatment unconditionally
  212. # for comments not matched above (it doesn't harm to
  213. # keep an empty comment sometimes, but it may harm to
  214. # leave a real comment if the goal was to strip them).
  215. s/^(([^\\%]|\\.)*)%.*$/$1%/;
  216. }
  217. unless ($keep_includes) {
  218. if (my ($before, $ignored, $full_filename, $after)
  219. = /^($NON_COMMENT)\\include[{\s]+(.*?)[\s}](.*)$/) {
  220. $full_filename = find_tex_file($full_filename . ".tex");
  221. if ($full_filename) {
  222. say $prefix . "Found include for file: $full_filename\n";
  223. print $before . $nl;
  224. print '\clearpage{}' . $nl;
  225. print "% start include $full_filename\n" if ($explain);
  226. my $in_comment = process_file($full_filename, $prefix . " ");
  227. if ($explain) {
  228. print " % end include $full_filename\n";
  229. } elsif ($in_comment) {
  230. print "\n";
  231. }
  232. print '\clearpage{}' . $nl;
  233. print $nl . $after . "\n";
  234. $_ = "";
  235. }
  236. } elsif (my ($before, $ignored, $full_filename, $after)
  237. = /^($NON_COMMENT)\\input[{\s]+(.*?)[\s}](.*)$/) {
  238. if ($inside_import) {
  239. $full_filename = $inside_import . $full_filename;
  240. }
  241. $full_filename = find_tex_file($full_filename, ":.tex");
  242. if ($full_filename) {
  243. say $prefix . "Found input for file: $full_filename\n";
  244. print $before . $nl;
  245. print "% start input $full_filename\n" if ($explain);
  246. my $in_comment = process_file($full_filename, $prefix . " ");
  247. if ($explain) {
  248. print " % end input $full_filename\n";
  249. } elsif ($in_comment) {
  250. print "\n";
  251. }
  252. if ($after =~ /[^\s]/) {
  253. # LaTeX produces this space, so let's do it also
  254. print " " . $nl . $after . "\n";
  255. } else {
  256. print " ";
  257. }
  258. $_ = "";
  259. }
  260. } elsif (my ($before, $ignored, $dir, $full_filename, $after)
  261. = /^($NON_COMMENT)\\(?:sub)?import[{\s]+(.*?)[\s}][{\s]+(.*?)[\s}](.*)$/) {
  262. if ($explain) {
  263. print "% dir " . $dir ."\n";
  264. print "% full_filename " . $full_filename ."\n";
  265. print "% after " . $after ."\n";
  266. print "% inside_import $inside_import\n";
  267. }
  268. $full_filename = $dir . $full_filename;
  269. if ($inside_import) {
  270. $full_filename = $inside_import . $full_filename;
  271. }
  272. print "% cat(inside_import,dir,full_filename) " . $full_filename ."\n" if ($explain);
  273. $full_filename = find_tex_file($full_filename, ":.tex");
  274. if ($full_filename) {
  275. say $prefix . "Found input for file: $full_filename\n";
  276. print $before . $nl;
  277. print "% start input $full_filename\n" if ($explain);
  278. my $previous_import_dir = $inside_import;
  279. $inside_import = $inside_import . $dir;
  280. my $in_comment = process_file($full_filename, $prefix . " ");
  281. $inside_import = $previous_import_dir;
  282. if ($explain) {
  283. print " % end input $full_filename\n";
  284. } elsif ($in_comment) {
  285. print "\n";
  286. }
  287. if ($after =~ /[^\s]/) {
  288. # LaTeX produces this space, so let's do it also
  289. print " " . $nl . $after . "\n";
  290. } else {
  291. print " ";
  292. }
  293. $_ = "";
  294. }
  295. } elsif (my ($before, $ignored, $args, $full_filename, $after)
  296. = /^($NON_COMMENT)\\includegraphics[\[\s]+(.*?)[\s\]][{\s]+(.*?)[\s}](.*)$/) {
  297. if ($explain) {
  298. print "% inside_import " . $inside_import ."\n";
  299. print "% before " . $before ."\n";
  300. print "% ignored " . $ignored ."\n";
  301. print "% args " . $args ."\n";
  302. print "% full_filename " . $full_filename ."\n";
  303. print "% after " . $after ."\n";
  304. }
  305. if ($inside_import) {
  306. $full_filename = $inside_import . $full_filename;
  307. print "$before\\includegraphics[$args]{$full_filename}$after\n";
  308. $_ = "";
  309. }
  310. } elsif (my ($before, $ignored, $args, $full_filename, $after)
  311. = /^($NON_COMMENT)\\lstinputlisting[\[\s]+(.*?)[\s\]][{\s]+(.*?)[\s}](.*)$/) {
  312. if ($explain) {
  313. print "% inside_import " . $inside_import ."\n";
  314. print "% before " . $before ."\n";
  315. print "% ignored " . $ignored ."\n";
  316. print "% args " . $args ."\n";
  317. print "% full_filename " . $full_filename ."\n";
  318. print "% after " . $after ."\n";
  319. }
  320. if ($inside_import) {
  321. $full_filename = $inside_import . $full_filename;
  322. print "$before\\lstinputlisting[$args]{$full_filename}$after\n";
  323. $_ = "";
  324. }
  325. }
  326. }
  327. if ($expand_usepackage) {
  328. # Don't bother with before and after text, we just require the
  329. # usepackage to be alone on its line.
  330. if (my ($package_name) = /^\s*\\usepackage\{([^\}]*)\}\s*(%.*)?$/) {
  331. my $full = find_file($package_name . ".sty", $TEXINPUTS);
  332. if ($full) {
  333. say $prefix . "Found package file: $full\n";
  334. process_file($full, $prefix . " ");
  335. $_ = "";
  336. # Forget about any commented newline
  337. # before the \usepackage:
  338. $$commented_newline = 0;
  339. } else {
  340. say $prefix . "Not including external package $package_name\n";
  341. }
  342. }
  343. }
  344. if ($expand_bbl) {
  345. if (my ($before, $bib_name, $after)
  346. = /^(.*)\\(?:bibliography|bibselect)\{([^\}]*)\}(.*)$/) {
  347. # The BBL file is not necessarily $bib_name.
  348. # Take it from the command-line.
  349. print $before . $nl;
  350. say $prefix . "Expanding BBL file: $expand_bbl\n";
  351. process_file($expand_bbl, $prefix . " ");
  352. print " " . $nl . $after . "\n";
  353. $_ = "";
  354. }
  355. }
  356. if ($show_graphics) {
  357. if (/\\includegraphics(\[[^\]]*\])?{([^}]*)}/) {
  358. my $full_filename = $2;
  359. if ($inside_import) {
  360. $full_filename = $inside_import . $full_filename;
  361. }
  362. my $full = find_tex_file($full_filename, $graphics_extensions);
  363. say $prefix . "needs graphics file: ";
  364. print STDERR "$full\n";
  365. }
  366. }
  367. if (/^[ \t]*\\begin\{document\}/) {
  368. $in_preamble = 0;
  369. if ($makeatletter) {
  370. print '\makeatletter' . $nl;
  371. }
  372. }
  373. print;
  374. }
  375. # search $1 in $TEXINPUTS, with possible extensions in $2
  376. sub find_tex_file
  377. {
  378. my $file = shift;
  379. my $extensions = (shift || ":");
  380. foreach my $ext (split(':', $extensions, -1)) {
  381. my $full = find_file_global($file . $ext);
  382. if ($full) {
  383. return $full;
  384. }
  385. }
  386. if ($fatal) {
  387. die "ERROR: Could not find file [$file]\n";
  388. } else {
  389. print STDERR "Warning: Could not find file [$file]\n";
  390. return;
  391. }
  392. }
  393. sub find_file_global
  394. {
  395. my $file = shift;
  396. if (open(my $fh, "-|", "kpsewhich", $file)) {
  397. my $full = <$fh>;
  398. chomp($full);
  399. close($fh);
  400. if ($full) {
  401. return $full;
  402. }
  403. }
  404. return find_file($file, $TEXINPUTS);
  405. }
  406. sub find_file
  407. {
  408. my ($file, $path) = @_;
  409. if (File::Spec->file_name_is_absolute($file)) {
  410. if (-e "$file" && ! -d "$file") {
  411. return $file;
  412. } else {
  413. return;
  414. }
  415. }
  416. foreach my $dir (split(':', $path)) {
  417. if (-e "$dir/$file" && ! -d "$dir/$file") {
  418. return("$dir/$file");
  419. }
  420. }
  421. return;
  422. }
  423. __END__
  424. =head1 NAME
  425. latexpand - Flatten LaTeX file by expanding \include and \input, ... and remove comments
  426. =head1 SYNOPSIS
  427. latexpand [options] FILE...
  428. =head2 Options:
  429. --verbose show what's going on
  430. --keep-comments don't strip comments (comments are lines
  431. starting with %, and anything below
  432. \end{document})
  433. --empty-comments keep empty comments (i.e. % at end of lines) for clarity
  434. --keep-includes don't expand \input and \include directives
  435. --expand-usepackage
  436. Expand \usepackage{...} directives if the
  437. corresponding .sty file is found in
  438. $TEXINPUTS (or the current directory if
  439. $TEXINPUTS is not set)
  440. --expand-bbl FILE
  441. Expand the bibliography by inlining FILE
  442. (should be a *.bbl file)
  443. --help this help message
  444. --output <file>, -o <file>
  445. generate output in <file>
  446. --explain generate explanatory comments in output
  447. --show-graphics show included graphics
  448. --graphics_extensions
  449. colon-separated list of possible graphics extensions
  450. (used by --show-graphics to find the actual graphics files)
  451. --fatal Die in case a file can't be found.
  452. --makeatletter Insert a \makeatletter in the preamble. In some
  453. rare cases it may break your document, but it
  454. may help fixing bad interactions between
  455. @-commands and inclusion (see BUGS section).
  456. --in-encoding FMT, --out-encoding FMT
  457. File encoding used by input and output files.
  458. This uses the same syntax as PerlIO's layers.
  459. Example:
  460. --in-encoding 'encoding(UTF-8)'
  461. The default is 'bytes' and should always work.
  462. =head1 USES
  463. The most common use of latexpand is to simplify distribution of source
  464. LaTeX files, typically to satisfy the requirement of editors and
  465. archival sites (springer, arXiv.org, ...) who force the authors to
  466. submit sources. One does not necessarily want to submit sources with
  467. comments, and uploading a document made of several files including
  468. each other is a bit painful. By default, latexpand answers both
  469. problems by outputing a single LaTeX file that contain no comment.
  470. =head1 GETTING LATEXPAND
  471. The latest version of latexpand is available here:
  472. https://gitlab.com/latexpand/latexpand
  473. Versions are uploaded to ctan.org from time to time:
  474. http://www.ctan.org/pkg/latexpand
  475. =head1 BUGS
  476. Please, report bugs on the issue tracker on the project site:
  477. https://gitlab.com/latexpand/latexpand/issues
  478. =head2 Known bugs
  479. =head3 Verbatim
  480. latexpand currently ignores \begin{verbatim} ... \end{verbatim}, and
  481. will therefore process any \include, \input, ... directives that
  482. appear within verbatim environments (while it shouldn't).
  483. LaTeX comments inside verbatim environments are also incorrectly
  484. stripped. You can use --keep-comments as a workaround to avoid this.
  485. =head3 Comment environment
  486. It would be nice to remove code between \begin{comment} and
  487. \end{comment} too if \usepackage{comment} is used.
  488. Code like
  489. foo%
  490. \begin{comment}
  491. will produce the incorrect
  492. foo\begin{comment}
  493. A workaround is to use --empty-comments when such tricky usage of the
  494. comments package is done.
  495. =head3 \makeatletter and use with transfig/xfig with \scalebox{}
  496. If \input{} or \include{} appears as argument to a command, and the
  497. file included contains \makeatletter, then after expansion, the
  498. \makeatletter and the @-command appear as argument to the command,
  499. which is forbidden because the argument is parsed (and the @-command
  500. badly tokenized) before being executed.
  501. This happens with
  502. \scalebox{ \input{file-generated-by-xfig.pdf_t} }
  503. Workaround: add \makeatletter before the scalebox manually in your
  504. code, like
  505. \makeatletter{}
  506. \scalebox{ \input{file-generated-by-xfig.pdf_t} }
  507. \makeatother{}
  508. In the case of xfig generated files, it is necessary only for the
  509. first occurence.
  510. A more brute-force workaround is to use latexpand --makeatletter.
  511. =head1 SEE ALSO
  512. Instructions to include only the relevant .bib items (french):
  513. https://lacl.fr/~caubert/notes/portabilite-du-tex.html#dependances
  514. =head1 VERSION
  515. This is latexpand version v1.5.