strippdf 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121
  1. #!/usr/bin/zsh
  2. usage() {
  3. echo "Usage: $0 [-q quality] [--no-transitions] input.pdf"
  4. echo " -q quality Set output quality level (0: screen, 1: ebook (default), 2: prepress)"
  5. echo " --no-transitions Remove transitional pages (keep only the last frame before slide number changes)"
  6. exit 1
  7. }
  8. # Default values
  9. quality=1
  10. remove_transitions=0
  11. # Parse arguments
  12. while [[ $# -gt 0 ]]; do
  13. case "$1" in
  14. -q)
  15. shift
  16. if [[ "$1" =~ ^[0-2]$ ]]; then
  17. quality=$1
  18. else
  19. echo "Invalid quality level. Use 0 (screen), 1 (ebook), or 2 (prepress)."
  20. exit 1
  21. fi
  22. ;;
  23. --no-transitions)
  24. remove_transitions=1
  25. ;;
  26. -*)
  27. echo "Unknown option: $1"
  28. usage
  29. ;;
  30. *)
  31. input_file="$1"
  32. ;;
  33. esac
  34. shift
  35. done
  36. # Check if input file is provided
  37. if [[ -z "$input_file" ]]; then
  38. echo "Error: No input file provided."
  39. usage
  40. fi
  41. # Set Ghostscript quality settings
  42. case "$quality" in
  43. 0) gs_quality="/screen" ;;
  44. 1) gs_quality="/ebook" ;;
  45. 2) gs_quality="/prepress" ;;
  46. esac
  47. # Create temporary file
  48. temp_file=$(mktemp)
  49. # Uncompress PDF and remove annotations
  50. pdftk "$input_file" output - uncompress | sed '/^\/Annots/d' > "$temp_file.pdf"
  51. if [[ $remove_transitions -eq 1 ]]; then
  52. # Run dump_data once and capture its output.
  53. dump=$(pdftk "$temp_file.pdf" dump_data)
  54. # Get the total number of pages.
  55. total_pages=$(echo "$dump" | awk '/^NumberOfPages/ { print $2 }')
  56. # Read all PageLabelNewIndex values into an array.
  57. new_indexes=($(echo "$dump" | awk '/^PageLabelNewIndex/ { print $2 }'))
  58. endings=()
  59. # Compute end pages for all groups except the first.
  60. # For each boundary between slides:
  61. # If the difference is >1, the previous slide spans multiple pages:
  62. # so use (current new index – 1) as the end page.
  63. # If the difference is exactly 1, then the previous slide is a single page;
  64. # so keep its PageLabelNewIndex.
  65. for ((i=1; i < ${#new_indexes[@]}; i++)); do
  66. prev=${new_indexes[i-1]}
  67. curr=${new_indexes[i]}
  68. if (( curr - prev == 1 )); then
  69. endings+=($prev)
  70. else
  71. endings+=($((curr - 1)))
  72. fi
  73. done
  74. # For the final slide group:
  75. last_index=${new_indexes[-1]}
  76. if (( total_pages - last_index == 0 )); then
  77. # Single–frame slide.
  78. endings+=($last_index)
  79. else
  80. # Multi–frame slide.
  81. endings+=($total_pages)
  82. fi
  83. # Remove duplicates (if any) and sort in ascending order.
  84. pages_to_keep=($(echo "${endings[@]}" | tr ' ' '\n' | sort -n | uniq))
  85. # (Optional: Uncomment to debug)
  86. # echo "Pages to keep: ${pages_to_keep[@]}"
  87. if [[ ${#pages_to_keep[@]} -gt 0 ]]; then
  88. pdftk "$temp_file.pdf" cat ${pages_to_keep[@]} output "$temp_file-clean.pdf"
  89. if [[ $? -ne 0 ]]; then
  90. echo "Error: Failed to remove transitional pages."
  91. rm "$temp_file.pdf"
  92. exit 1
  93. fi
  94. mv "$temp_file-clean.pdf" "$temp_file.pdf"
  95. else
  96. echo "Warning: No transitional pages identified; keeping full document."
  97. fi
  98. fi
  99. # Compress the PDF
  100. gs -sDEVICE=pdfwrite -dCompatibilityLevel=1.4 -dPDFSETTINGS="$gs_quality" -dNOPAUSE -dQUIET -dBATCH -sOutputFile="${input_file%.pdf}-compressed.pdf" "$temp_file.pdf"
  101. # Clean up
  102. rm "$temp_file.pdf"