|
48 | 48 | import inspect
|
49 | 49 | import itertools
|
50 | 50 | import os
|
| 51 | +import pathlib |
51 | 52 | import platform
|
52 | 53 | import re
|
53 | 54 | import shutil
|
54 | 55 | import signal
|
55 | 56 | import stat
|
56 | 57 | import ssl
|
57 | 58 | import sys
|
| 59 | +import tarfile |
58 | 60 | import tempfile
|
59 | 61 | import time
|
60 | 62 | import zlib
|
@@ -1408,13 +1410,12 @@ def find_extension(filename):
|
1408 | 1410 | suffixes = sorted(EXTRACT_CMDS.keys(), key=len, reverse=True)
|
1409 | 1411 | pat = r'(?P<ext>%s)$' % '|'.join([s.replace('.', '\\.') for s in suffixes])
|
1410 | 1412 | res = re.search(pat, filename, flags=re.IGNORECASE)
|
| 1413 | + |
1411 | 1414 | if res:
|
1412 |
| - ext = res.group('ext') |
| 1415 | + return res.group('ext') |
1413 | 1416 | else:
|
1414 | 1417 | raise EasyBuildError("%s has unknown file extension", filename)
|
1415 | 1418 |
|
1416 |
| - return ext |
1417 |
| - |
1418 | 1419 |
|
1419 | 1420 | def extract_cmd(filepath, overwrite=False):
|
1420 | 1421 | """
|
@@ -2644,7 +2645,7 @@ def get_source_tarball_from_git(filename, target_dir, git_config):
|
2644 | 2645 | """
|
2645 | 2646 | Downloads a git repository, at a specific tag or commit, recursively or not, and make an archive with it
|
2646 | 2647 |
|
2647 |
| - :param filename: name of the archive to save the code to (must be .tar.gz) |
| 2648 | + :param filename: name of the archive file to save the code to (including extension) |
2648 | 2649 | :param target_dir: target directory where to save the archive to
|
2649 | 2650 | :param git_config: dictionary containing url, repo_name, recursive, and one of tag or commit
|
2650 | 2651 | """
|
@@ -2680,9 +2681,6 @@ def get_source_tarball_from_git(filename, target_dir, git_config):
|
2680 | 2681 | if not url:
|
2681 | 2682 | raise EasyBuildError("url not specified in git_config parameter")
|
2682 | 2683 |
|
2683 |
| - if not filename.endswith('.tar.gz'): |
2684 |
| - raise EasyBuildError("git_config currently only supports filename ending in .tar.gz") |
2685 |
| - |
2686 | 2684 | # prepare target directory and clone repository
|
2687 | 2685 | mkdir(target_dir, parents=True)
|
2688 | 2686 |
|
@@ -2768,37 +2766,138 @@ def get_source_tarball_from_git(filename, target_dir, git_config):
|
2768 | 2766 | run_shell_cmd(cmd, work_dir=work_dir, hidden=True, verbose_dry_run=True)
|
2769 | 2767 |
|
2770 | 2768 | # Create archive
|
2771 |
| - archive_path = os.path.join(target_dir, filename) |
2772 |
| - |
2773 |
| - if keep_git_dir: |
2774 |
| - # create archive of git repo including .git directory |
2775 |
| - tar_cmd = ['tar', 'cfvz', archive_path, repo_name] |
2776 |
| - else: |
2777 |
| - # create reproducible archive |
2778 |
| - # see https://reproducible-builds.org/docs/archives/ |
2779 |
| - tar_cmd = [ |
2780 |
| - # print names of all files and folders excluding .git directory |
2781 |
| - 'find', repo_name, '-name ".git"', '-prune', '-o', '-print0', |
2782 |
| - # reset access and modification timestamps to epoch 0 (equivalent to --mtime in GNU tar) |
2783 |
| - '-exec', 'touch', '--date=@0', '{}', r'\;', |
2784 |
| - # reset file permissions of cloned repo (equivalent to --mode in GNU tar) |
2785 |
| - '-exec', 'chmod', '"go+u,go-w"', '{}', r'\;', '|', |
2786 |
| - # sort file list (equivalent to --sort in GNU tar) |
2787 |
| - 'LC_ALL=C', 'sort', '--zero-terminated', '|', |
2788 |
| - # create tarball in GNU format with ownership and permissions reset |
2789 |
| - 'tar', '--create', '--no-recursion', '--owner=0', '--group=0', '--numeric-owner', |
2790 |
| - '--format=gnu', '--null', '--files-from', '-', '|', |
2791 |
| - # compress tarball with gzip without original file name and timestamp |
2792 |
| - 'gzip', '--no-name', '>', archive_path |
2793 |
| - ] |
2794 |
| - run_shell_cmd(' '.join(tar_cmd), work_dir=tmpdir, hidden=True, verbose_dry_run=True) |
| 2769 | + repo_path = os.path.join(tmpdir, repo_name) |
| 2770 | + reproducible = not keep_git_dir # presence of .git directory renders repo unreproducible |
| 2771 | + archive_path = make_archive(repo_path, archive_file=filename, archive_dir=target_dir, reproducible=reproducible) |
2795 | 2772 |
|
2796 | 2773 | # cleanup (repo_name dir does not exist in dry run mode)
|
2797 | 2774 | remove(tmpdir)
|
2798 | 2775 |
|
2799 | 2776 | return archive_path
|
2800 | 2777 |
|
2801 | 2778 |
|
| 2779 | +def make_archive(source_dir, archive_file=None, archive_dir=None, reproducible=True): |
| 2780 | + """ |
| 2781 | + Create an archive file of the given directory |
| 2782 | + The format of the tarball is defined by the extension of the archive file name |
| 2783 | +
|
| 2784 | + :source_dir: string with path to directory to be archived |
| 2785 | + :archive_file: string with filename of archive |
| 2786 | + :archive_dir: string with path to directory to place the archive |
| 2787 | + :reproducible: make a tarball that is reproducible accross systems |
| 2788 | + - see https://reproducible-builds.org/docs/archives/ |
| 2789 | + - requires uncompressed or LZMA compressed archive images |
| 2790 | + - gzip is currently not supported due to undeterministic data injected in its headers |
| 2791 | + see https://github.com/python/cpython/issues/112346 |
| 2792 | +
|
| 2793 | + Default behaviour: reproducible tarball in .tar.xz |
| 2794 | + """ |
| 2795 | + def reproducible_filter(tarinfo): |
| 2796 | + "Filter out system-dependent data from tarball" |
| 2797 | + # contents of '.git' subdir are inherently system dependent |
| 2798 | + if "/.git/" in tarinfo.name or tarinfo.name.endswith("/.git"): |
| 2799 | + return None |
| 2800 | + # set timestamp to epoch 0 |
| 2801 | + tarinfo.mtime = 0 |
| 2802 | + # reset file permissions by applying go+u,go-w |
| 2803 | + user_mode = tarinfo.mode & stat.S_IRWXU |
| 2804 | + group_mode = (user_mode >> 3) & ~stat.S_IWGRP # user mode without write |
| 2805 | + other_mode = group_mode >> 3 # same as group mode |
| 2806 | + tarinfo.mode = (tarinfo.mode & ~0o77) | group_mode | other_mode |
| 2807 | + # reset ownership to numeric UID/GID 0 |
| 2808 | + # equivalent in GNU tar to 'tar --owner=0 --group=0 --numeric-owner' |
| 2809 | + tarinfo.uid = tarinfo.gid = 0 |
| 2810 | + tarinfo.uname = tarinfo.gname = "" |
| 2811 | + return tarinfo |
| 2812 | + |
| 2813 | + ext_compression_map = { |
| 2814 | + # taken from EXTRACT_CMDS |
| 2815 | + '.gtgz': 'gz', |
| 2816 | + '.tar.gz': 'gz', |
| 2817 | + '.tgz': 'gz', |
| 2818 | + '.tar.bz2': 'bz2', |
| 2819 | + '.tb2': 'bz2', |
| 2820 | + '.tbz': 'bz2', |
| 2821 | + '.tbz2': 'bz2', |
| 2822 | + '.tar.xz': 'xz', |
| 2823 | + '.txz': 'xz', |
| 2824 | + '.tar': '', |
| 2825 | + } |
| 2826 | + reproducible_compression = ['', 'xz'] |
| 2827 | + default_ext = '.tar.xz' |
| 2828 | + |
| 2829 | + if archive_file is None: |
| 2830 | + archive_file = os.path.basename(source_dir) + default_ext |
| 2831 | + |
| 2832 | + try: |
| 2833 | + archive_ext = find_extension(archive_file) |
| 2834 | + except EasyBuildError: |
| 2835 | + if '.' in archive_file: |
| 2836 | + # archive filename has unknown extension (set for raise) |
| 2837 | + archive_ext = '' |
| 2838 | + else: |
| 2839 | + # archive filename has no extension, use default one |
| 2840 | + archive_ext = default_ext |
| 2841 | + archive_file += archive_ext |
| 2842 | + |
| 2843 | + if archive_ext not in ext_compression_map: |
| 2844 | + # archive filename has unsupported extension |
| 2845 | + supported_exts = ', '.join(ext_compression_map) |
| 2846 | + raise EasyBuildError( |
| 2847 | + f"Unsupported archive format: {archive_file}. Supported tarball extensions: {supported_exts}" |
| 2848 | + ) |
| 2849 | + compression = ext_compression_map[archive_ext] |
| 2850 | + _log.debug(f"Archive extension and compression: {archive_ext} in {compression}") |
| 2851 | + |
| 2852 | + archive_path = archive_file if archive_dir is None else os.path.join(archive_dir, archive_file) |
| 2853 | + |
| 2854 | + archive_specs = { |
| 2855 | + 'name': archive_path, |
| 2856 | + 'mode': f"w:{compression}", |
| 2857 | + 'format': tarfile.GNU_FORMAT, |
| 2858 | + 'encoding': "utf-8", |
| 2859 | + } |
| 2860 | + |
| 2861 | + if reproducible: |
| 2862 | + if compression == 'xz': |
| 2863 | + # ensure a consistent compression level in reproducible tarballs with XZ |
| 2864 | + archive_specs['preset'] = 6 |
| 2865 | + elif compression not in reproducible_compression: |
| 2866 | + # requested archive compression cannot be made reproducible |
| 2867 | + print_warning( |
| 2868 | + f"Can not create reproducible archive due to unsupported file compression ({compression}). " |
| 2869 | + "Please use XZ instead." |
| 2870 | + ) |
| 2871 | + reproducible = False |
| 2872 | + |
| 2873 | + archive_filter = reproducible_filter if reproducible else None |
| 2874 | + |
| 2875 | + if build_option('extended_dry_run'): |
| 2876 | + # early return in dry run mode |
| 2877 | + dry_run_msg("Archiving '%s' into '%s'...", source_dir, archive_path) |
| 2878 | + return archive_path |
| 2879 | + _log.info("Archiving '%s' into '%s'...", source_dir, archive_path) |
| 2880 | + |
| 2881 | + # TODO: replace with TarFile.add(recursive=True) when support for Python 3.6 drops |
| 2882 | + # since Python v3.7 tarfile automatically orders the list of files added to the archive |
| 2883 | + # see Tarfile.add documentation: https://docs.python.org/3/library/tarfile.html#tarfile.TarFile.add |
| 2884 | + source_files = [source_dir] |
| 2885 | + # pathlib's glob includes hidden files |
| 2886 | + source_files.extend([str(filepath) for filepath in pathlib.Path(source_dir).glob("**/*")]) |
| 2887 | + source_files.sort() # independent of locale |
| 2888 | + |
| 2889 | + with tarfile.open(**archive_specs) as tar_archive: |
| 2890 | + for filepath in source_files: |
| 2891 | + # archive with target directory in its top level, remove any prefix in path |
| 2892 | + file_name = os.path.relpath(filepath, start=os.path.dirname(source_dir)) |
| 2893 | + tar_archive.add(filepath, arcname=file_name, recursive=False, filter=archive_filter) |
| 2894 | + _log.debug("File/folder added to archive '%s': %s", archive_file, filepath) |
| 2895 | + |
| 2896 | + _log.info("Archive '%s' created successfully", archive_file) |
| 2897 | + |
| 2898 | + return archive_path |
| 2899 | + |
| 2900 | + |
2802 | 2901 | def move_file(path, target_path, force_in_dry_run=False):
|
2803 | 2902 | """
|
2804 | 2903 | Move a file from path to target_path
|
|
0 commit comments