#!/usr/bin/env bash # tangle-file.sh # Usage: tangle-file.sh # Example: tangle-file.sh ~/NixOS ~/Projects/DroidNix README.org set -euo pipefail # --------------------------------------------------------------------------- # Arguments # --------------------------------------------------------------------------- if [[ $# -lt 3 ]]; then echo "Usage: $0 " echo " SOURCE_DIR : Directory to scan recursively" echo " DEST_DIR : Directory to write the .org file and assets" echo " ORG_FILENAME : Name of the output org file (e.g. README.org)" exit 1 fi SOURCE_DIR="$(realpath "${1/#\~/$HOME}")" DEST_DIR="$(realpath -m "${2/#\~/$HOME}")" ORG_ARG="${3/#\~/$HOME}" # Third arg may be a full path or just a filename. # If it contains a slash, treat it as an absolute/relative path; otherwise place it in DEST_DIR. if [[ "$ORG_ARG" == */* ]]; then OUTPUT_FILE="$(realpath -m "$ORG_ARG")" else OUTPUT_FILE="$DEST_DIR/$ORG_ARG" fi ASSETS_DIR="$DEST_DIR/assets" SKIPPED_LIST_FILE="$(mktemp)" # --------------------------------------------------------------------------- # Extension → language mapping # --------------------------------------------------------------------------- ALLOWED_EXTENSIONS=( "sh" "bash" "zsh" "fish" "py" "rb" "js" "mjs" "cjs" "ts" "lua" "conf" "cfg" "ini" "toml" "yaml" "yml" "json" "jsonc" "xml" "css" "scss" "sass" "html" "htm" "md" "markdown" "nix" "vim" "vimrc" "el" "rs" "go" "c" "cpp" "cc" "cxx" "h" "hpp" "java" "kt" "sql" "r" "tex" "rasi" "qml" "hs" "ex" "exs" "clj" "cs" "swift" "dart" "zig" "nu" "ps1" "bat" "cmd" "env" "lock" "gitignore" "gitattributes" "editorconfig" "prettierrc" "eslintrc" ) is_extension_allowed() { local ext="${1,,}" # lowercase for allowed in "${ALLOWED_EXTENSIONS[@]}"; do [[ "$ext" == "$allowed" ]] && return 0 done return 1 } get_language_tag() { local ext="${1,,}" case "$ext" in sh|bash) echo "bash" ;; zsh) echo "zsh" ;; fish) echo "fish" ;; nu) echo "nu" ;; py) echo "python" ;; rb) echo "ruby" ;; js|mjs|cjs) echo "javascript" ;; ts) echo "typescript" ;; lua) echo "lua" ;; conf|cfg|ini) echo "conf" ;; toml|lock) echo "toml" ;; yaml|yml) echo "yaml" ;; json|jsonc) echo "json" ;; xml) echo "xml" ;; css|rasi) echo "css" ;; scss|sass) echo "scss" ;; html|htm) echo "html" ;; md|markdown) echo "markdown" ;; nix) echo "nix" ;; vim|vimrc) echo "vimscript" ;; el) echo "emacs-lisp" ;; rs) echo "rust" ;; go) echo "go" ;; c|h) echo "c" ;; cpp|cc|cxx|hpp) echo "cpp" ;; java) echo "java" ;; kt) echo "kotlin" ;; sql) echo "sql" ;; r) echo "R" ;; tex) echo "latex" ;; qml) echo "qml" ;; hs) echo "haskell" ;; ex|exs) echo "elixir" ;; clj) echo "clojure" ;; cs) echo "csharp" ;; swift) echo "swift" ;; dart) echo "dart" ;; zig) echo "zig" ;; ps1) echo "powershell" ;; bat|cmd) echo "bat" ;; env|gitignore|gitattributes|editorconfig|prettierrc|eslintrc) echo "text" ;; *) echo "$ext" ;; esac } # --------------------------------------------------------------------------- # Helper: check if a file is text (by mime type or allowed extension) # --------------------------------------------------------------------------- is_text_file() { local file="$1" local filename ext mime_type filename="$(basename "$file")" ext="${filename##*.}" # No extension (ext equals the whole filename) [[ "$ext" == "$filename" ]] && ext="" mime_type="$(file -b --mime-type "$file" 2>/dev/null || echo "application/octet-stream")" # Always include if mime says text if [[ "$mime_type" == text/* ]]; then return 0 fi # Explicitly skip binary/media mime types — regardless of extension case "$mime_type" in image/*|audio/*|video/*|\ application/octet-stream|\ application/zip|\ application/gzip|\ application/x-tar|\ application/x-bzip2|\ application/x-xz|\ application/x-zstd|\ application/x-7z-compressed|\ application/x-rar|\ application/pdf|\ application/vnd.*|\ font/*) return 1 ;; esac # Some well-known text-ish mime types case "$mime_type" in application/json|\ application/x-shellscript|\ application/x-sh|\ application/x-nix|\ application/xml|\ application/javascript|\ application/typescript|\ inode/x-empty) return 0 ;; esac # Fall back: if extension is in our allowed list → treat as text if [[ -n "$ext" ]] && is_extension_allowed "$ext"; then return 0 fi # Last resort for extensionless files: ask `file` for a plain description local description description="$(file -b "$file" 2>/dev/null || echo "")" if [[ "$description" == *"text"* || "$description" == *"ASCII"* || "$description" == *"UTF-8"* ]]; then return 0 fi return 1 } # --------------------------------------------------------------------------- # Prepare output # --------------------------------------------------------------------------- mkdir -p "$DEST_DIR" mkdir -p "$ASSETS_DIR" # Write org file header # Use tangle-header.md if it exists in the same directory as the script, otherwise in DEST_DIR SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" HEADER_FILE="" if [[ -f "$SCRIPT_DIR/tangle-header.md" ]]; then HEADER_FILE="$SCRIPT_DIR/tangle-header.md" elif [[ -f "$DEST_DIR/tangle-header.md" ]]; then HEADER_FILE="$DEST_DIR/tangle-header.md" else echo "Error: tangle-header.md not found in script directory or destination directory" exit 1 fi cat "$HEADER_FILE" > "$OUTPUT_FILE" echo "Scanning $SOURCE_DIR ..." echo "Writing to $OUTPUT_FILE ..." # --------------------------------------------------------------------------- # Main loop: find all files, sorted, skipping .git # --------------------------------------------------------------------------- while IFS= read -r -d '' abs_file; do filename="$(basename "$abs_file")" ext="${filename##*.}" [[ "$ext" == "$filename" ]] && ext="" # extensionless # Relative path from SOURCE_DIR (e.g. modules/core/flatpak.nix) rel_path="${abs_file#$SOURCE_DIR/}" # Tangle path always under generated/ tangle_path="generated/$rel_path" # Org display path (with ~ shorthand when under HOME, raw path otherwise) if [[ "$SOURCE_DIR" == "$HOME"* ]]; then org_path="~/${SOURCE_DIR#$HOME/}/$rel_path" else org_path="$SOURCE_DIR/$rel_path" fi if is_text_file "$abs_file"; then # Determine language tag if [[ -z "$ext" ]]; then lang="text" else lang="$(get_language_tag "$ext")" fi { echo "** =$tangle_path=" echo "Information read from $org_path" echo "#+BEGIN_SRC $lang :tangle $tangle_path :noweb yes :mkdirp yes :eval never" cat "$abs_file" # Ensure there's a newline before #+END_SRC echo "" echo "#+END_SRC" echo "" } >> "$OUTPUT_FILE" else # Binary / skipped: record relative path, copy to assets echo "$rel_path" >> "$SKIPPED_LIST_FILE" asset_dest="$ASSETS_DIR/$rel_path" mkdir -p "$(dirname "$asset_dest")" cp "$abs_file" "$asset_dest" fi done < <(find "$SOURCE_DIR" -type f \ -not -path "*/.git/*" \ -not -name ".git" \ -not -path "$OUTPUT_FILE" \ -print0 | sort -z) # --------------------------------------------------------------------------- # Skipped-files section # --------------------------------------------------------------------------- if [[ -s "$SKIPPED_LIST_FILE" ]]; then echo "" >> "$OUTPUT_FILE" echo "* Skipped (non-text / binary) files" >> "$OUTPUT_FILE" echo "" >> "$OUTPUT_FILE" echo "** =UntangledFiles.md=" >> "$OUTPUT_FILE" echo "Tree of untangled files" >> "$OUTPUT_FILE" echo "#+BEGIN_SRC text :tangle UntangledFiles.md :noweb yes :mkdirp yes :eval never" >> "$OUTPUT_FILE" if command -v tree &>/dev/null; then # Build a temporary shadow directory tree and run `tree` on it TMP_DIR="$(mktemp -d)" while IFS= read -r rel; do mkdir -p "$TMP_DIR/$(dirname "$rel")" touch "$TMP_DIR/$rel" done < "$SKIPPED_LIST_FILE" # Print tree output without the tmp path prefix tree --noreport "$TMP_DIR" | tail -n +2 >> "$OUTPUT_FILE" rm -rf "$TMP_DIR" else # Fallback: manual pseudo-tree using sorted paths echo "/" >> "$OUTPUT_FILE" sort "$SKIPPED_LIST_FILE" | while IFS= read -r rel; do depth=$(echo "$rel" | tr -cd '/' | wc -c) indent="" for ((i=0; i> "$OUTPUT_FILE" done fi echo "#+END_SRC" >> "$OUTPUT_FILE" echo "" >> "$OUTPUT_FILE" # Summary of where assets were copied echo "Skipped files have been copied to: $ASSETS_DIR" >> "$OUTPUT_FILE" fi # --------------------------------------------------------------------------- # Cleanup # --------------------------------------------------------------------------- rm -f "$SKIPPED_LIST_FILE" echo "" echo "Done!" echo " Org file : $OUTPUT_FILE" echo " Assets : $ASSETS_DIR"