July 10, 2009

Python - Zip Directories Recursively

This helped me out today with some backup scripts. Posting here so I can remember it. Idea and snippet adapted from: http://mail.python.org/pipermail/python-list/2007-February/596539.html

        
#!/usr/bin/env python

import os
import zipfile


def main():
    zipper('c:/test', 'c:/temp/test.zip')


def zipper(dir, zip_file):
    zip = zipfile.ZipFile(zip_file, 'w', compression=zipfile.ZIP_DEFLATED)
    root_len = len(os.path.abspath(dir))
    for root, dirs, files in os.walk(dir):
        archive_root = os.path.abspath(root)[root_len:]
        for f in files:
            fullpath = os.path.join(root, f)
            archive_name = os.path.join(archive_root, f)
            print f
            zip.write(fullpath, archive_name, zipfile.ZIP_DEFLATED)
    zip.close()
    return zip_file


if __name__ == '__main__':
    main()

* code updated. there was a bug in the original I posted (cmg - 07/13/09)

6 comments:

Rayjan Wilson said...

hi corey, thanks for sharing this code. i extended zipper to have commandline options so it can be used for general recursive zipping. hope it helps. it's up on www.casualengineer.blogspot.com with credit given to you for inspiration

cheers


#!/usr/bin/env python
import os, zipfile

def zipper(dir, zip_file):
z = zipfile.ZipFile(zip_file, 'w', compression=zipfile.ZIP_DEFLATED)
def walker(zip, dir, files, root=dir):
for f in files:
f = os.path.join(dir, f)
archive_name = os.path.basename(f)
zip.write( f, archive_name, zipfile.ZIP_DEFLATED)
print f
os.path.walk(dir, walker, z)
z.close()
return zip_file



if '__main__' == __name__:
# Late import, in case this project becomes a library, never to be run as main again
import optparse

# Populate our options, -h/--help is already there for you
usage = "usage: %prog [options]"
version="%prog 1.0"
parser = optparse.OptionParser(usage=usage, version=version)
parser.add_option("-d", "--dir", dest="inputDir", default="c:/test", action="store", help="sets the input directory to something other than the default (c:/test)")
parser.add_option("-f", "--file", dest="outputFile", default="c:/temp/test.zip", action="store", help="sets the output zip file to something other than the default (c:/temp/test.zip)")
parser.set_defaults()

# Parse the arguments (defaults to parsing sys.argv)
(options, args) = parser.parse_args()

# Here would be a good place to check what came in on the command line and
# call parser.error("Useful message") to exit if all is not well
if len(args) > 0 and (1 != options.inputDir or 1 != options.outputFile):
parser.error("Additional arguments are not supported\nYou can only change the inputDir or outputFile using the -d and -f options.\nType zippy.py -h for help.\n")


# Do the actual work
zipper(options.inputDir, options.outputFile)

Mark M said...

Hi,

Maybe I was doing something wrong - but your code didn't work for me.

It wouldn't handle sub-directories below the path.

Here is some code that seems to work for me (well I just created it now :) ).
Two other things I changed...
a) Used the more modern os.walk rather os.path.walk
b) removed the os.path.basename(f) call - this always gave you back what you had before the os.path.join on the previous line.




import os
import zipfile


def main():
zipper('tsearch', 'c:/_temp/testzip.zip')


def zipper(dir, zip_file):
zip = zipfile.ZipFile(zip_file, 'w', compression=zipfile.ZIP_DEFLATED)

# get the length of the root dir
root_len = len(os.path.abspath(dir))
for root, dirs, files in os.walk(dir):
# get the directories below the root
archive_root = os.path.abspath(root)[root_len:]

for f in files:
# full path to the file
fullpath = os.path.join(root, f)
# archive path to the file
archive_name = os.path.join(archive_root, f)
print f
zip.write(fullpath, archive_name, zipfile.ZIP_DEFLATED)

zip.close()
return zip_file


if __name__ == '__main__':
main()


(strange that you can't make Blogger treat that as Monospaced!

Corey Goldberg said...

Mark, I just updated my code with the fix!

thanks,

-Corey

Corey Goldberg said...

Rayjan,

very cool :) thanks for the additions. see my updated post with some fixed code.

milleplateaux said...

Hi Corey,

Thanks for sharing the code. It is now the backbone of a small auto backup script.

Tal said...

Zip file is being created but is empty for me. code below


#!/usr/bin/env python
import os, zipfile

def zipper(dir, zip_file):
z = zipfile.ZipFile(zip_file, 'w', compression=zipfile.ZIP_DEFLATED)
def walker(zip, dir, files, root=dir):
root_len = len(os.path.abspath(dir))
for root, dirs, files in os.walk(dir):
archive_root = os.path.abspath(root)[root_len:]
for f in files:
f = os.path.join(dir, f)
archive_name = os.path.basename(f)
zip.write( f, archive_name, zipfile.ZIP_DEFLATED)
print f
os.path.walk(dir, walker, z)

z.close()
return zip_file



if '__main__' == __name__:
# Late import, in case this project becomes a library, never to be run as main again
import optparse

# Populate our options, -h/--help is already there for you
usage = "usage: %prog [options]"
version="%prog 1.0"
parser = optparse.OptionParser(usage=usage, version=version)
parser.add_option("-d", "--dir", dest="inputDir", default="c:/test", action="store", help="sets the input directory to something other than the default (c:/test)")
parser.add_option("-f", "--file", dest="outputFile", default="c:/temp/test.zip", action="store", help="sets the output zip file to something other than the default (c:/temp/test.zip)")
parser.set_defaults()

# Parse the arguments (defaults to parsing sys.argv)
(options, args) = parser.parse_args()

# Here would be a good place to check what came in on the command line and
# call parser.error("Useful message") to exit if all is not well
if len(args) > 0 and (1 != options.inputDir or 1 != options.outputFile):
parser.error("Additional arguments are not supported\nYou can only change the inputDir or outputFile using the -d and -f options.\nType zippy.py -h for help.\n")


# Do the actual work
zipper(options.inputDir, options.outputFile)