295 lines
10 KiB
Bash
Executable File
295 lines
10 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# tangle-file.sh
|
|
# Usage: tangle-file.sh <SOURCE_DIR> <DEST_DIR> <ORG_FILE>
|
|
# Example: tangle-file.sh ~/NixOS ~/Projects/DroidNix README.org
|
|
|
|
set -euo pipefail
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Arguments
|
|
# ---------------------------------------------------------------------------
|
|
if [[ $# -lt 3 ]]; then
|
|
echo "Usage: $0 <SOURCE_DIR> <DEST_DIR> <ORG_FILENAME>"
|
|
echo " SOURCE_DIR : Directory to scan recursively"
|
|
echo " DEST_DIR : Directory to write the .org file and assets"
|
|
echo " ORG_FILENAME : Name of the output org file (e.g. README.org)"
|
|
exit 1
|
|
fi
|
|
|
|
SOURCE_DIR="$(realpath "${1/#\~/$HOME}")"
|
|
DEST_DIR="$(realpath -m "${2/#\~/$HOME}")"
|
|
ORG_ARG="${3/#\~/$HOME}"
|
|
|
|
# Third arg may be a full path or just a filename.
|
|
# If it contains a slash, treat it as an absolute/relative path; otherwise place it in DEST_DIR.
|
|
if [[ "$ORG_ARG" == */* ]]; then
|
|
OUTPUT_FILE="$(realpath -m "$ORG_ARG")"
|
|
else
|
|
OUTPUT_FILE="$DEST_DIR/$ORG_ARG"
|
|
fi
|
|
ASSETS_DIR="$DEST_DIR/assets"
|
|
SKIPPED_LIST_FILE="$(mktemp)"
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Extension → language mapping
|
|
# ---------------------------------------------------------------------------
|
|
ALLOWED_EXTENSIONS=(
|
|
"sh" "bash" "zsh" "fish" "py" "rb" "js" "mjs" "cjs" "ts" "lua"
|
|
"conf" "cfg" "ini" "toml" "yaml" "yml" "json" "jsonc" "xml"
|
|
"css" "scss" "sass" "html" "htm" "md" "markdown" "nix" "vim"
|
|
"vimrc" "el" "rs" "go" "c" "cpp" "cc" "cxx" "h" "hpp" "java"
|
|
"kt" "sql" "r" "tex" "rasi" "qml" "hs" "ex" "exs" "clj" "cs"
|
|
"swift" "dart" "zig" "nu" "ps1" "bat" "cmd" "env" "lock"
|
|
"gitignore" "gitattributes" "editorconfig" "prettierrc" "eslintrc"
|
|
)
|
|
|
|
is_extension_allowed() {
|
|
local ext="${1,,}" # lowercase
|
|
for allowed in "${ALLOWED_EXTENSIONS[@]}"; do
|
|
[[ "$ext" == "$allowed" ]] && return 0
|
|
done
|
|
return 1
|
|
}
|
|
|
|
get_language_tag() {
|
|
local ext="${1,,}"
|
|
case "$ext" in
|
|
sh|bash) echo "bash" ;;
|
|
zsh) echo "zsh" ;;
|
|
fish) echo "fish" ;;
|
|
nu) echo "nu" ;;
|
|
py) echo "python" ;;
|
|
rb) echo "ruby" ;;
|
|
js|mjs|cjs) echo "javascript" ;;
|
|
ts) echo "typescript" ;;
|
|
lua) echo "lua" ;;
|
|
conf|cfg|ini) echo "conf" ;;
|
|
toml|lock) echo "toml" ;;
|
|
yaml|yml) echo "yaml" ;;
|
|
json|jsonc) echo "json" ;;
|
|
xml) echo "xml" ;;
|
|
css|rasi) echo "css" ;;
|
|
scss|sass) echo "scss" ;;
|
|
html|htm) echo "html" ;;
|
|
md|markdown) echo "markdown" ;;
|
|
nix) echo "nix" ;;
|
|
vim|vimrc) echo "vimscript" ;;
|
|
el) echo "emacs-lisp" ;;
|
|
rs) echo "rust" ;;
|
|
go) echo "go" ;;
|
|
c|h) echo "c" ;;
|
|
cpp|cc|cxx|hpp) echo "cpp" ;;
|
|
java) echo "java" ;;
|
|
kt) echo "kotlin" ;;
|
|
sql) echo "sql" ;;
|
|
r) echo "R" ;;
|
|
tex) echo "latex" ;;
|
|
qml) echo "qml" ;;
|
|
hs) echo "haskell" ;;
|
|
ex|exs) echo "elixir" ;;
|
|
clj) echo "clojure" ;;
|
|
cs) echo "csharp" ;;
|
|
swift) echo "swift" ;;
|
|
dart) echo "dart" ;;
|
|
zig) echo "zig" ;;
|
|
ps1) echo "powershell" ;;
|
|
bat|cmd) echo "bat" ;;
|
|
env|gitignore|gitattributes|editorconfig|prettierrc|eslintrc)
|
|
echo "text" ;;
|
|
*) echo "$ext" ;;
|
|
esac
|
|
}
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Helper: check if a file is text (by mime type or allowed extension)
|
|
# ---------------------------------------------------------------------------
|
|
is_text_file() {
|
|
local file="$1"
|
|
local filename ext mime_type
|
|
|
|
filename="$(basename "$file")"
|
|
ext="${filename##*.}"
|
|
# No extension (ext equals the whole filename)
|
|
[[ "$ext" == "$filename" ]] && ext=""
|
|
|
|
mime_type="$(file -b --mime-type "$file" 2>/dev/null || echo "application/octet-stream")"
|
|
|
|
# Always include if mime says text
|
|
if [[ "$mime_type" == text/* ]]; then
|
|
return 0
|
|
fi
|
|
|
|
# Explicitly skip binary/media mime types — regardless of extension
|
|
case "$mime_type" in
|
|
image/*|audio/*|video/*|\
|
|
application/octet-stream|\
|
|
application/zip|\
|
|
application/gzip|\
|
|
application/x-tar|\
|
|
application/x-bzip2|\
|
|
application/x-xz|\
|
|
application/x-zstd|\
|
|
application/x-7z-compressed|\
|
|
application/x-rar|\
|
|
application/pdf|\
|
|
application/vnd.*|\
|
|
font/*)
|
|
return 1 ;;
|
|
esac
|
|
|
|
# Some well-known text-ish mime types
|
|
case "$mime_type" in
|
|
application/json|\
|
|
application/x-shellscript|\
|
|
application/x-sh|\
|
|
application/x-nix|\
|
|
application/xml|\
|
|
application/javascript|\
|
|
application/typescript|\
|
|
inode/x-empty)
|
|
return 0 ;;
|
|
esac
|
|
|
|
# Fall back: if extension is in our allowed list → treat as text
|
|
if [[ -n "$ext" ]] && is_extension_allowed "$ext"; then
|
|
return 0
|
|
fi
|
|
|
|
# Last resort for extensionless files: ask `file` for a plain description
|
|
local description
|
|
description="$(file -b "$file" 2>/dev/null || echo "")"
|
|
if [[ "$description" == *"text"* || "$description" == *"ASCII"* || "$description" == *"UTF-8"* ]]; then
|
|
return 0
|
|
fi
|
|
|
|
return 1
|
|
}
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Prepare output
|
|
# ---------------------------------------------------------------------------
|
|
mkdir -p "$DEST_DIR"
|
|
mkdir -p "$ASSETS_DIR"
|
|
|
|
# Write org file header
|
|
# Use tangle-header.md if it exists in the same directory as the script, otherwise in DEST_DIR
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
HEADER_FILE=""
|
|
if [[ -f "$SCRIPT_DIR/tangle-header.md" ]]; then
|
|
HEADER_FILE="$SCRIPT_DIR/tangle-header.md"
|
|
elif [[ -f "$DEST_DIR/tangle-header.md" ]]; then
|
|
HEADER_FILE="$DEST_DIR/tangle-header.md"
|
|
else
|
|
echo "Error: tangle-header.md not found in script directory or destination directory"
|
|
exit 1
|
|
fi
|
|
|
|
cat "$HEADER_FILE" > "$OUTPUT_FILE"
|
|
|
|
echo "Scanning $SOURCE_DIR ..."
|
|
echo "Writing to $OUTPUT_FILE ..."
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Main loop: find all files, sorted, skipping .git
|
|
# ---------------------------------------------------------------------------
|
|
while IFS= read -r -d '' abs_file; do
|
|
filename="$(basename "$abs_file")"
|
|
ext="${filename##*.}"
|
|
[[ "$ext" == "$filename" ]] && ext="" # extensionless
|
|
|
|
# Relative path from SOURCE_DIR (e.g. modules/core/flatpak.nix)
|
|
rel_path="${abs_file#$SOURCE_DIR/}"
|
|
|
|
# Tangle path always under generated/
|
|
tangle_path="generated/$rel_path"
|
|
|
|
# Org display path (with ~ shorthand when under HOME, raw path otherwise)
|
|
if [[ "$SOURCE_DIR" == "$HOME"* ]]; then
|
|
org_path="~/${SOURCE_DIR#$HOME/}/$rel_path"
|
|
else
|
|
org_path="$SOURCE_DIR/$rel_path"
|
|
fi
|
|
|
|
if is_text_file "$abs_file"; then
|
|
# Determine language tag
|
|
if [[ -z "$ext" ]]; then
|
|
lang="text"
|
|
else
|
|
lang="$(get_language_tag "$ext")"
|
|
fi
|
|
|
|
{
|
|
echo "** =$tangle_path="
|
|
echo "Information read from $org_path"
|
|
echo "#+BEGIN_SRC $lang :tangle $tangle_path :noweb yes :mkdirp yes :eval never"
|
|
cat "$abs_file"
|
|
# Ensure there's a newline before #+END_SRC
|
|
echo ""
|
|
echo "#+END_SRC"
|
|
echo ""
|
|
} >> "$OUTPUT_FILE"
|
|
|
|
else
|
|
# Binary / skipped: record relative path, copy to assets
|
|
echo "$rel_path" >> "$SKIPPED_LIST_FILE"
|
|
|
|
asset_dest="$ASSETS_DIR/$rel_path"
|
|
mkdir -p "$(dirname "$asset_dest")"
|
|
cp "$abs_file" "$asset_dest"
|
|
fi
|
|
|
|
done < <(find "$SOURCE_DIR" -type f \
|
|
-not -path "*/.git/*" \
|
|
-not -name ".git" \
|
|
-not -path "$OUTPUT_FILE" \
|
|
-print0 | sort -z)
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Skipped-files section
|
|
# ---------------------------------------------------------------------------
|
|
if [[ -s "$SKIPPED_LIST_FILE" ]]; then
|
|
echo "" >> "$OUTPUT_FILE"
|
|
echo "* Skipped (non-text / binary) files" >> "$OUTPUT_FILE"
|
|
echo "" >> "$OUTPUT_FILE"
|
|
echo "** =UntangledFiles.md=" >> "$OUTPUT_FILE"
|
|
echo "Tree of untangled files" >> "$OUTPUT_FILE"
|
|
echo "#+BEGIN_SRC text :tangle UntangledFiles.md :noweb yes :mkdirp yes :eval never" >> "$OUTPUT_FILE"
|
|
|
|
if command -v tree &>/dev/null; then
|
|
# Build a temporary shadow directory tree and run `tree` on it
|
|
TMP_DIR="$(mktemp -d)"
|
|
while IFS= read -r rel; do
|
|
mkdir -p "$TMP_DIR/$(dirname "$rel")"
|
|
touch "$TMP_DIR/$rel"
|
|
done < "$SKIPPED_LIST_FILE"
|
|
# Print tree output without the tmp path prefix
|
|
tree --noreport "$TMP_DIR" | tail -n +2 >> "$OUTPUT_FILE"
|
|
rm -rf "$TMP_DIR"
|
|
else
|
|
# Fallback: manual pseudo-tree using sorted paths
|
|
echo "/" >> "$OUTPUT_FILE"
|
|
sort "$SKIPPED_LIST_FILE" | while IFS= read -r rel; do
|
|
depth=$(echo "$rel" | tr -cd '/' | wc -c)
|
|
indent=""
|
|
for ((i=0; i<depth; i++)); do indent="$indent "; done
|
|
echo "${indent}└── $(basename "$rel")" >> "$OUTPUT_FILE"
|
|
done
|
|
fi
|
|
|
|
echo "#+END_SRC" >> "$OUTPUT_FILE"
|
|
echo "" >> "$OUTPUT_FILE"
|
|
|
|
# Summary of where assets were copied
|
|
echo "Skipped files have been copied to: $ASSETS_DIR" >> "$OUTPUT_FILE"
|
|
fi
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Cleanup
|
|
# ---------------------------------------------------------------------------
|
|
rm -f "$SKIPPED_LIST_FILE"
|
|
|
|
echo ""
|
|
echo "Done!"
|
|
echo " Org file : $OUTPUT_FILE"
|
|
echo " Assets : $ASSETS_DIR"
|