| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121 |
- #!/usr/bin/zsh
- usage() {
- echo "Usage: $0 [-q quality] [--no-transitions] input.pdf"
- echo " -q quality Set output quality level (0: screen, 1: ebook (default), 2: prepress)"
- echo " --no-transitions Remove transitional pages (keep only the last frame before slide number changes)"
- exit 1
- }
- # Default values
- quality=1
- remove_transitions=0
- # Parse arguments
- while [[ $# -gt 0 ]]; do
- case "$1" in
- -q)
- shift
- if [[ "$1" =~ ^[0-2]$ ]]; then
- quality=$1
- else
- echo "Invalid quality level. Use 0 (screen), 1 (ebook), or 2 (prepress)."
- exit 1
- fi
- ;;
- --no-transitions)
- remove_transitions=1
- ;;
- -*)
- echo "Unknown option: $1"
- usage
- ;;
- *)
- input_file="$1"
- ;;
- esac
- shift
- done
- # Check if input file is provided
- if [[ -z "$input_file" ]]; then
- echo "Error: No input file provided."
- usage
- fi
- # Set Ghostscript quality settings
- case "$quality" in
- 0) gs_quality="/screen" ;;
- 1) gs_quality="/ebook" ;;
- 2) gs_quality="/prepress" ;;
- esac
- # Create temporary file
- temp_file=$(mktemp)
- # Uncompress PDF and remove annotations
- pdftk "$input_file" output - uncompress | sed '/^\/Annots/d' > "$temp_file.pdf"
- if [[ $remove_transitions -eq 1 ]]; then
- # Run dump_data once and capture its output.
- dump=$(pdftk "$temp_file.pdf" dump_data)
-
- # Get the total number of pages.
- total_pages=$(echo "$dump" | awk '/^NumberOfPages/ { print $2 }')
-
- # Read all PageLabelNewIndex values into an array.
- new_indexes=($(echo "$dump" | awk '/^PageLabelNewIndex/ { print $2 }'))
-
- endings=()
-
- # Compute end pages for all groups except the first.
- # For each boundary between slides:
- # If the difference is >1, the previous slide spans multiple pages:
- # so use (current new index – 1) as the end page.
- # If the difference is exactly 1, then the previous slide is a single page;
- # so keep its PageLabelNewIndex.
- for ((i=1; i < ${#new_indexes[@]}; i++)); do
- prev=${new_indexes[i-1]}
- curr=${new_indexes[i]}
- if (( curr - prev == 1 )); then
- endings+=($prev)
- else
- endings+=($((curr - 1)))
- fi
- done
-
- # For the final slide group:
- last_index=${new_indexes[-1]}
- if (( total_pages - last_index == 0 )); then
- # Single–frame slide.
- endings+=($last_index)
- else
- # Multi–frame slide.
- endings+=($total_pages)
- fi
-
- # Remove duplicates (if any) and sort in ascending order.
- pages_to_keep=($(echo "${endings[@]}" | tr ' ' '\n' | sort -n | uniq))
-
- # (Optional: Uncomment to debug)
- # echo "Pages to keep: ${pages_to_keep[@]}"
-
- if [[ ${#pages_to_keep[@]} -gt 0 ]]; then
- pdftk "$temp_file.pdf" cat ${pages_to_keep[@]} output "$temp_file-clean.pdf"
- if [[ $? -ne 0 ]]; then
- echo "Error: Failed to remove transitional pages."
- rm "$temp_file.pdf"
- exit 1
- fi
- mv "$temp_file-clean.pdf" "$temp_file.pdf"
- else
- echo "Warning: No transitional pages identified; keeping full document."
- fi
- fi
- # Compress the PDF
- gs -sDEVICE=pdfwrite -dCompatibilityLevel=1.4 -dPDFSETTINGS="$gs_quality" -dNOPAUSE -dQUIET -dBATCH -sOutputFile="${input_file%.pdf}-compressed.pdf" "$temp_file.pdf"
- # Clean up
- rm "$temp_file.pdf"
|