深入理解shutil模块
copyfileobj、copyfile、copymode、copystat
copy、copy2、copytree
rmtree
move

shutil


复制


实现一个简单的复制

with open('a.txt', 'w+') as f1:
    f1.write('abcd\n1234')
    f1.flush()
    f1.seek(0)  # 移动文件指针
    with open('b.txt', 'w+') as f2:
        f2.write(f1.read())

copyfileobj和copyfile

  1. copyfileobj(fsrc, fdst[, length])
    • 文件对象的复制,fsrc和fdst是open打开的文件对象,复制内容
    • fdst要求可写
    • length 指定了表示buffer的大小
    # copyfileobj源码
    COPY_BUFSIZE = 1024 * 1024 if _WINDOWS else 64 * 1024
    
    def copyfileobj(fsrc, fdst, length=0):
        """copy data from file-like object fsrc to file-like object fdst"""
        # Localize variable access to minimize overhead.
        if not length:
            length = COPY_BUFSIZE
        fsrc_read = fsrc.read
        fdst_write = fdst.write
        while True:
            buf = fsrc_read(length)
            if not buf:
                break
            fdst_write(buf)
    
    # copyfileobj的使用
    import shutil
    with open('a.txt', 'w+') as f1:
        f1.write('abcd\n1234')
        f1.flush()
        f1.seek(0)
        with open('b.txt', 'w+') as f2:
            shutil.copyfileobj(f1, f2)
    
  2. copyfile(src, dst, *, follow_symlinks=True)
    • 复制文件内容,不含元数据
    • src、dst为文件的路径字符串
    • 本质上调用的就是copyfileobj,所以不带元数据二进制内容复制,其源码如下:
    def copyfile(src, dst, *, follow_symlinks=True):
        ......
        ......
        if _samefile(src, dst):
            raise SameFileError("{!r} and {!r} are the same file".format(src, dst))
        ......
        ......
        if not follow_symlinks and _islink(src):
            os.symlink(os.readlink(src), dst)
        else:
            with open(src, 'rb') as fsrc, open(dst, 'wb') as fdst:
                ......
                ......
                copyfileobj(fsrc, fdst)
        return dst
    
    # copyfile的使用
    import shutil
    shutil.copyfile('a.txt', 'c.txt')
    

copymode和copystat

  1. copymode(src, dst, *, follow_symlinks=True)

    • 仅复制权限,源码如下:
    def copymode(src, dst, *, follow_symlinks=True):
        """Copy mode bits from src to dst.
    
        If follow_symlinks is not set, symlinks aren't followed if and only
        if both `src` and `dst` are symlinks.  If `lchmod` isn't available
        (e.g. Linux) this method does nothing.
    
        """
        sys.audit("shutil.copymode", src, dst)
    
        if not follow_symlinks and _islink(src) and os.path.islink(dst):
            if hasattr(os, 'lchmod'):
                stat_func, chmod_func = os.lstat, os.lchmod
            else:
                return
        else:
            stat_func, chmod_func = _stat, os.chmod
    
        st = stat_func(src)
        chmod_func(dst, stat.S_IMODE(st.st_mode))
    
    shutil.copymode('test1','test')
    

    查看并比较test1和test的权限

  2. copystat(src, dst, *, follow_symlinks=True)

    • 复制元数据,stat包含权限
    shutil.copystat('test1','test')
    

    查看并比较test1和test的权限

copy和copy2

  1. copy(src, dst, *, follow_symlinks=True)
    • 复制文件内容、权限和部分元数据,不包括创建时间和修改时间
    • 本质上调用的是copyfile和copymode,其源码如下:
    def copy(src, dst, *, follow_symlinks=True):
        """Copy data and mode bits ("cp src dst"). Return the file's destination.
    
        The destination may be a directory.
    
        If follow_symlinks is false, symlinks won't be followed. This
        resembles GNU's "cp -P src dst".
    
        If source and destination are the same file, a SameFileError will be
        raised.
    
        """
        if os.path.isdir(dst):
            dst = os.path.join(dst, os.path.basename(src))
        copyfile(src, dst, follow_symlinks=follow_symlinks)
        copymode(src, dst, follow_symlinks=follow_symlinks)
        return dst
    
  2. copy2(src, dst, *, follow_symlinks=True)
    • copy2 比copy多了复制全部元数据
    • 本质上是调用了copyfile和copystat,其源码如下:
    def copy2(src, dst, *, follow_symlinks=True):
        """Copy data and metadata. Return the file's destination.
    
        Metadata is copied with copystat(). Please see the copystat function
        for more information.
    
        The destination may be a directory.
    
        If follow_symlinks is false, symlinks won't be followed. This
        resembles GNU's "cp -P src dst".
        """
        if os.path.isdir(dst):
            dst = os.path.join(dst, os.path.basename(src))
        copyfile(src, dst, follow_symlinks=follow_symlinks)
        copystat(src, dst, follow_symlinks=follow_symlinks)
        return dst
    

copytree

  • copytree(src, dst, symlinks=False, ignore=None, copy_function=copy2, ignore_dangling_symlinks=False, dirs_exist_ok=False)
  • 递归复制目录。默认使用copy2,也就是说复制了更多的元数据
  • src、dst必须是目录,src必须存在,dst必须不存在
  • ignore中设置不需要拷贝的内容,需要传的是一个函数,且返回值是set类型,源码解读:
    def _copytree(entries, src, dst, symlinks, ignore, copy_function,
                ignore_dangling_symlinks, dirs_exist_ok=False):
        if ignore is not None:
            ignored_names = ignore(os.fspath(src), [x.name for x in entries]) # ignore是可调用(callable)对象
        else:
            ignored_names = set() # 集合
    ......
    ......
        for srcentry in entries:
            if srcentry.name in ignored_names:
                continue
    ....
    ....
    elif srcentry.is_dir():
                    copytree(srcobj, dstname, symlinks, ignore, copy_function,
                            dirs_exist_ok=dirs_exist_ok) # 递归调用copytree,完成遍历
                else:
                    # Will raise a SpecialFileError for unsupported file types
                    copy_function(srcobj, dstname) # 即默认调用copy2
    ......
    ......
    
  • ignore设置不需要拷贝的内容,使用示例
# 将a文件夹下的b文件夹(除了a开头的文件)拷贝到当前目录c文件夹中
import shutil
from pathlib import Path

def ignore(src, names):
    s = set()
    for name in names:
        if name.startswith('a'):
            s.add(name)  # 留下的是即将用于copytree需要过滤的,不需要copy的
    return s

def ignore1(src, names):
    return {name for name in names if name.startswith('a')}  # 用推导式改写

def ignore2(src, names):
    return set(filter(lambda x: x.startswith('a'), names))  # 用filter改写

p = Path('a/b')
p.mkdir(parents=True, exist_ok=True)
p1 = p / 'a.txt'
p1.touch()
p2 = p / 'b.txt'
p2.touch()

shutil.copytree('a/', p.cwd() / 'c', ignore=ignore2)

复制使用结论

  • 如果需要拷贝更多元数据可使用copy2
  • 如果需要需要递归复制,可使用copytree

删除


rmtree

  • shutil.rmtree(path, ignore_errors=False, onerror=None)
  • 递归删除。如同rm -rf一样危险,慎用
  • 它不是原子操作,有可能删除错误,就会中断,已经删除的就删除了
  • ignore_errorstrue,忽略错误。当为False或者omitted时onerror生效
  • onerror为callable,接受函数function、path和execinfo

移动


move

  • move(src, dst, copy_function=copy2)
  • 递归移动文件、目录到目标,返回目标
  • 本身使用的是 os.rename方法
  • 如果不支持rename,如果是目录则copytree再删除源目录
  • 默认使用copy2方法

参考


  • magedu