Append to compressed tar file with performance

.everyoneloves__top-leaderboard:empty,.everyoneloves__mid-leaderboard:empty margin-bottom:0;

up vote
6
down vote

favorite

As you might know, Python's tarfile does not have the ability to append to files compressed with e.g. gz or bz2. I tried to implement this functionality which works fine but operates slowly. The following function accepts a string or bytes object and appends it as a file to an existing tarfile. I'm not sure if this code has the best performance, and it may have issues. I also tried writing files to memory instead of to a temporary directory, but this didn't impact performance.

import os
import tarfile
import tempfile
import time
from pathlib import Path


def append_tar_file(buffer, file_name, output_path, replace=True):
 """
 append a buffer to an existing tar file
 """
 # extract files
 # check for existing file and overwrite if need to
 # compress files
 if not os.path.isfile(output_path):
 return
 buffer = buffer.encode("utf-8") if isinstance(buffer, str) else buffer

 with tempfile.TemporaryDirectory() as tempdir:
 tempdirp = Path(tempdir)
 with tarfile.open(output_path, "r:bz2") as tar:
 try:
 tar.extractall(os.path.abspath(tempdirp))
 except Exception as err: #tar file is empty
 print(err)
 buffer_path = os.path.join(tempdir, os.path.basename(file_name))
 if replace or (buffer_path not in list(os.path.abspath(f) for f in tempdirp.iterdir())):
 with open(buffer_path, "wb") as f:
 f.write(buffer)

 with tarfile.open(output_path, "w:bz2") as tar:
 for file in tempdirp.iterdir():
 try:
 tar.add(file, arcname=os.path.basename(os.path.normpath(file)))
 except Exception as err:
 print(err)

if __name__ == "__main__":
 path = "./test.tar.gz"
 buffer = "Test String"
 filename = "somefile"
 for i in range(1, 100):
 print(time.time())
 append_tar_file(buffer, filename+str(i), path)

edited Jul 29 at 17:44

Daniel

4,0632834

asked Jul 29 at 7:50

Masoud R.

355

add a commentÂ |Â

up vote
6
down vote

favorite

import os
import tarfile
import tempfile
import time
from pathlib import Path


def append_tar_file(buffer, file_name, output_path, replace=True):
 """
 append a buffer to an existing tar file
 """
 # extract files
 # check for existing file and overwrite if need to
 # compress files
 if not os.path.isfile(output_path):
 return
 buffer = buffer.encode("utf-8") if isinstance(buffer, str) else buffer

 with tempfile.TemporaryDirectory() as tempdir:
 tempdirp = Path(tempdir)
 with tarfile.open(output_path, "r:bz2") as tar:
 try:
 tar.extractall(os.path.abspath(tempdirp))
 except Exception as err: #tar file is empty
 print(err)
 buffer_path = os.path.join(tempdir, os.path.basename(file_name))
 if replace or (buffer_path not in list(os.path.abspath(f) for f in tempdirp.iterdir())):
 with open(buffer_path, "wb") as f:
 f.write(buffer)

 with tarfile.open(output_path, "w:bz2") as tar:
 for file in tempdirp.iterdir():
 try:
 tar.add(file, arcname=os.path.basename(os.path.normpath(file)))
 except Exception as err:
 print(err)

if __name__ == "__main__":
 path = "./test.tar.gz"
 buffer = "Test String"
 filename = "somefile"
 for i in range(1, 100):
 print(time.time())
 append_tar_file(buffer, filename+str(i), path)

edited Jul 29 at 17:44

Daniel

4,0632834

asked Jul 29 at 7:50

Masoud R.

355

add a commentÂ |Â

up vote
6
down vote

favorite

import os
import tarfile
import tempfile
import time
from pathlib import Path


def append_tar_file(buffer, file_name, output_path, replace=True):
 """
 append a buffer to an existing tar file
 """
 # extract files
 # check for existing file and overwrite if need to
 # compress files
 if not os.path.isfile(output_path):
 return
 buffer = buffer.encode("utf-8") if isinstance(buffer, str) else buffer

 with tempfile.TemporaryDirectory() as tempdir:
 tempdirp = Path(tempdir)
 with tarfile.open(output_path, "r:bz2") as tar:
 try:
 tar.extractall(os.path.abspath(tempdirp))
 except Exception as err: #tar file is empty
 print(err)
 buffer_path = os.path.join(tempdir, os.path.basename(file_name))
 if replace or (buffer_path not in list(os.path.abspath(f) for f in tempdirp.iterdir())):
 with open(buffer_path, "wb") as f:
 f.write(buffer)

 with tarfile.open(output_path, "w:bz2") as tar:
 for file in tempdirp.iterdir():
 try:
 tar.add(file, arcname=os.path.basename(os.path.normpath(file)))
 except Exception as err:
 print(err)

if __name__ == "__main__":
 path = "./test.tar.gz"
 buffer = "Test String"
 filename = "somefile"
 for i in range(1, 100):
 print(time.time())
 append_tar_file(buffer, filename+str(i), path)

edited Jul 29 at 17:44

Daniel

4,0632834

asked Jul 29 at 7:50

Masoud R.

355

import os
import tarfile
import tempfile
import time
from pathlib import Path


def append_tar_file(buffer, file_name, output_path, replace=True):
 """
 append a buffer to an existing tar file
 """
 # extract files
 # check for existing file and overwrite if need to
 # compress files
 if not os.path.isfile(output_path):
 return
 buffer = buffer.encode("utf-8") if isinstance(buffer, str) else buffer

 with tempfile.TemporaryDirectory() as tempdir:
 tempdirp = Path(tempdir)
 with tarfile.open(output_path, "r:bz2") as tar:
 try:
 tar.extractall(os.path.abspath(tempdirp))
 except Exception as err: #tar file is empty
 print(err)
 buffer_path = os.path.join(tempdir, os.path.basename(file_name))
 if replace or (buffer_path not in list(os.path.abspath(f) for f in tempdirp.iterdir())):
 with open(buffer_path, "wb") as f:
 f.write(buffer)

 with tarfile.open(output_path, "w:bz2") as tar:
 for file in tempdirp.iterdir():
 try:
 tar.add(file, arcname=os.path.basename(os.path.normpath(file)))
 except Exception as err:
 print(err)

if __name__ == "__main__":
 path = "./test.tar.gz"
 buffer = "Test String"
 filename = "somefile"
 for i in range(1, 100):
 print(time.time())
 append_tar_file(buffer, filename+str(i), path)

edited Jul 29 at 17:44

Daniel

4,0632834

asked Jul 29 at 7:50

Masoud R.

355

edited Jul 29 at 17:44

Daniel

4,0632834

edited Jul 29 at 17:44

Daniel

4,0632834

edited Jul 29 at 17:44

Daniel

4,0632834

asked Jul 29 at 7:50

Masoud R.

355

asked Jul 29 at 7:50

Masoud R.

355

asked Jul 29 at 7:50

Masoud R.

355

add a commentÂ |Â

2 Answers
2

active

oldest

votes

up vote
5
down vote

accepted

Indeed, the tarfile package doesn't support appending to a compressed tar.
But I think you can do better than your current attempt.
Instead of extracting the content to disk, you could keep it in memory, write and append to a new compressed file, and finally rename the compressed file.
That way you will write a single temporary file,
instead of many.
The reduced disk I/O should improve the performance.

def append_tar_file(buf, file_name, output_path, replace=True):
 """
 append a buf to an existing tar file if not already there, or if replace=True
 """
 if not os.path.isfile(output_path):
 return

 with tempfile.TemporaryDirectory() as tempdir:
 tmp_path = os.path.join(tempdir, 'tmp.tar.bz2')

 with tarfile.open(output_path, "r:bz2") as tar:
 if not replace:
 if file_name in (member.name for member in tar):
 return

 if isinstance(buf, str):
 buf = buf.encode("utf-8")

 fileobj = BytesIO(buf)
 tarinfo = tarfile.TarInfo(file_name)
 tarinfo.size = len(fileobj.getvalue())

 with tarfile.open(tmp_path, "w:bz2") as tmp:
 for member in tar:
 if member.name != file_name:
 tmp.addfile(member, tar.extractfile(member.name))
 tmp.addfile(tarinfo, fileobj)

 os.rename(tmp_path, output_path)

edited Jul 30 at 9:06

Daniel

4,0632834

answered Jul 29 at 10:18

janos

95.2k12119342

Can multiprocessing used while working with memory? I mean when adding files back to output tar file use it. When the compressed file getting larger, it would take more time to append.
â€“Â Masoud R.
Jul 30 at 7:59

add a commentÂ |Â

up vote
4
down vote

Instead of hardcoding the compression scheme (and possibly compressing a .tar.gz file using BZIP2 as you do), you should try to infer that information. Note that tarfile let you open a compressed file without knowing the compression scheme using tarfile.open(filename, 'r:*') but there is no equivalent for writing the archive.

Since the compression scheme understood by the tarfile module are usual extensions of the files, inspecting the suffixes of output_path should be enough:

def get_compression(filename):
 suffixes = Path(filename).suffixes
 tar, *compression = (s.lstrip('.') for s in suffixes)

 if tar == 'tgz':
 if compression:
 raise RuntimeError('Too much suffixes, cannot infer compression scheme from '.format(''.join(suffixes)))
 return 'gz'

 if tar != 'tar':
 raise RuntimeError('Not a tar archive')

 if not compression:
 return ''

 try:
 compression, = compression
 except ValueError:
 raise RuntimeError('Too much compression scheme: '.format(', '.join(compression))) from None
 else:
 return compression

Now you can use compression = get_compression(output_path) and then open the tar file using tarfile.open(<name>, 'r:'.format(compression)) and open the file for writing using tarfile.open(<name>, 'w:'.format(compression)).

Note that I used RuntimeError here but you should probably come up with something less generic.

edited Jul 29 at 11:57

answered Jul 29 at 11:51

Mathias Ettinger

21.7k32875

Thanks. The function return gz for the files that created with bz2 in tarfile. Thus it should return bz2 instead of gz.
â€“Â Masoud R.
Jul 30 at 6:41

If your file is named something.tar.gz, it is not coherent to use anything else than GZIP compression to create the file. This is the assumption this function is doing, hence the choice to only check the filename and not trying to do anything fancy with the file content. If you store BZIP2 compressed data in a .tar.gz this is your mistake to start with.
â€“Â Mathias Ettinger
Jul 30 at 7:15

So you mean I should use tar.bz2 right?
â€“Â Masoud R.
Jul 30 at 7:17

2

If you want to store BZIP2 compressed data, definitely.
â€“Â Mathias Ettinger
Jul 30 at 7:23

1

On the other hand, if you want something fancier based on the file content rather than its name, you can check how tarfile handle the r:* mode. Basically, it tries to open the file using, in turn gzip.GzipFile, bz2.BZ2File, lzma.LZMAFile and if all failed fallback to trying without compression.
â€“Â Mathias Ettinger
Jul 30 at 7:33

add a commentÂ |Â

Your Answer

StackExchange.ifUsing("editor", function ()
return StackExchange.using("mathjaxEditing", function ()
StackExchange.MarkdownEditor.creationCallbacks.add(function (editor, postfix)
StackExchange.mathjaxEditing.prepareWmdForMathJax(editor, postfix, [["\$", "\$"]]);
);
);
, "mathjax-editing");

StackExchange.ifUsing("editor", function ()
StackExchange.using("externalEditor", function ()
StackExchange.using("snippets", function ()
StackExchange.snippets.init();
);
);
, "code-snippets");

StackExchange.ready(function()
var channelOptions =
tags: "".split(" "),
id: "196"
;
initTagRenderer("".split(" "), "".split(" "), channelOptions);

StackExchange.using("externalEditor", function()
// Have to fire editor after snippets, if snippets enabled
if (StackExchange.settings.snippets.snippetsEnabled)
StackExchange.using("snippets", function()
createEditor();
);

else
createEditor();

);

function createEditor()
StackExchange.prepareEditor(
heartbeatType: 'answer',
convertImagesToLinks: false,
noModals: false,
showLowRepImageUploadWarning: true,
reputationToPostImages: null,
bindNavPrevention: true,
postfix: "",
onDemand: true,
discardSelector: ".discard-answer"
,immediatelyShowMarkdownHelp:true
);

);

draft saved

draft discarded

StackExchange.ready(
function ()
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f200514%2fappend-to-compressed-tar-file-with-performance%23new-answer', 'question_page');

);

Post as a guest

Name

2 Answers
2

active

oldest

votes

2 Answers
2

active

oldest

votes

up vote
5
down vote

accepted

def append_tar_file(buf, file_name, output_path, replace=True):
 """
 append a buf to an existing tar file if not already there, or if replace=True
 """
 if not os.path.isfile(output_path):
 return

 with tempfile.TemporaryDirectory() as tempdir:
 tmp_path = os.path.join(tempdir, 'tmp.tar.bz2')

 with tarfile.open(output_path, "r:bz2") as tar:
 if not replace:
 if file_name in (member.name for member in tar):
 return

 if isinstance(buf, str):
 buf = buf.encode("utf-8")

 fileobj = BytesIO(buf)
 tarinfo = tarfile.TarInfo(file_name)
 tarinfo.size = len(fileobj.getvalue())

 with tarfile.open(tmp_path, "w:bz2") as tmp:
 for member in tar:
 if member.name != file_name:
 tmp.addfile(member, tar.extractfile(member.name))
 tmp.addfile(tarinfo, fileobj)

 os.rename(tmp_path, output_path)

edited Jul 30 at 9:06

Daniel

4,0632834

answered Jul 29 at 10:18

janos

95.2k12119342

Can multiprocessing used while working with memory? I mean when adding files back to output tar file use it. When the compressed file getting larger, it would take more time to append.
â€“Â Masoud R.
Jul 30 at 7:59

add a commentÂ |Â

up vote
5
down vote

accepted

def append_tar_file(buf, file_name, output_path, replace=True):
 """
 append a buf to an existing tar file if not already there, or if replace=True
 """
 if not os.path.isfile(output_path):
 return

 with tempfile.TemporaryDirectory() as tempdir:
 tmp_path = os.path.join(tempdir, 'tmp.tar.bz2')

 with tarfile.open(output_path, "r:bz2") as tar:
 if not replace:
 if file_name in (member.name for member in tar):
 return

 if isinstance(buf, str):
 buf = buf.encode("utf-8")

 fileobj = BytesIO(buf)
 tarinfo = tarfile.TarInfo(file_name)
 tarinfo.size = len(fileobj.getvalue())

 with tarfile.open(tmp_path, "w:bz2") as tmp:
 for member in tar:
 if member.name != file_name:
 tmp.addfile(member, tar.extractfile(member.name))
 tmp.addfile(tarinfo, fileobj)

 os.rename(tmp_path, output_path)

edited Jul 30 at 9:06

Daniel

4,0632834

answered Jul 29 at 10:18

janos

95.2k12119342

Can multiprocessing used while working with memory? I mean when adding files back to output tar file use it. When the compressed file getting larger, it would take more time to append.
â€“Â Masoud R.
Jul 30 at 7:59

add a commentÂ |Â

up vote
5
down vote

accepted

def append_tar_file(buf, file_name, output_path, replace=True):
 """
 append a buf to an existing tar file if not already there, or if replace=True
 """
 if not os.path.isfile(output_path):
 return

 with tempfile.TemporaryDirectory() as tempdir:
 tmp_path = os.path.join(tempdir, 'tmp.tar.bz2')

 with tarfile.open(output_path, "r:bz2") as tar:
 if not replace:
 if file_name in (member.name for member in tar):
 return

 if isinstance(buf, str):
 buf = buf.encode("utf-8")

 fileobj = BytesIO(buf)
 tarinfo = tarfile.TarInfo(file_name)
 tarinfo.size = len(fileobj.getvalue())

 with tarfile.open(tmp_path, "w:bz2") as tmp:
 for member in tar:
 if member.name != file_name:
 tmp.addfile(member, tar.extractfile(member.name))
 tmp.addfile(tarinfo, fileobj)

 os.rename(tmp_path, output_path)

edited Jul 30 at 9:06

Daniel

4,0632834

answered Jul 29 at 10:18

janos

95.2k12119342

def append_tar_file(buf, file_name, output_path, replace=True):
 """
 append a buf to an existing tar file if not already there, or if replace=True
 """
 if not os.path.isfile(output_path):
 return

 with tempfile.TemporaryDirectory() as tempdir:
 tmp_path = os.path.join(tempdir, 'tmp.tar.bz2')

 with tarfile.open(output_path, "r:bz2") as tar:
 if not replace:
 if file_name in (member.name for member in tar):
 return

 if isinstance(buf, str):
 buf = buf.encode("utf-8")

 fileobj = BytesIO(buf)
 tarinfo = tarfile.TarInfo(file_name)
 tarinfo.size = len(fileobj.getvalue())

 with tarfile.open(tmp_path, "w:bz2") as tmp:
 for member in tar:
 if member.name != file_name:
 tmp.addfile(member, tar.extractfile(member.name))
 tmp.addfile(tarinfo, fileobj)

 os.rename(tmp_path, output_path)

edited Jul 30 at 9:06

Daniel

4,0632834

answered Jul 29 at 10:18

janos

95.2k12119342

edited Jul 30 at 9:06

Daniel

4,0632834

edited Jul 30 at 9:06

Daniel

4,0632834

edited Jul 30 at 9:06

Daniel

4,0632834

answered Jul 29 at 10:18

janos

95.2k12119342

answered Jul 29 at 10:18

janos

95.2k12119342

answered Jul 29 at 10:18

janos

95.2k12119342

Can multiprocessing used while working with memory? I mean when adding files back to output tar file use it. When the compressed file getting larger, it would take more time to append.
â€“Â Masoud R.
Jul 30 at 7:59

add a commentÂ |Â

Can multiprocessing used while working with memory? I mean when adding files back to output tar file use it. When the compressed file getting larger, it would take more time to append.
â€“Â Masoud R.
Jul 30 at 7:59

Can multiprocessing used while working with memory? I mean when adding files back to output tar file use it. When the compressed file getting larger, it would take more time to append.
â€“Â Masoud R.
Jul 30 at 7:59

add a commentÂ |Â

up vote
4
down vote

Since the compression scheme understood by the tarfile module are usual extensions of the files, inspecting the suffixes of output_path should be enough:

def get_compression(filename):
 suffixes = Path(filename).suffixes
 tar, *compression = (s.lstrip('.') for s in suffixes)

 if tar == 'tgz':
 if compression:
 raise RuntimeError('Too much suffixes, cannot infer compression scheme from '.format(''.join(suffixes)))
 return 'gz'

 if tar != 'tar':
 raise RuntimeError('Not a tar archive')

 if not compression:
 return ''

 try:
 compression, = compression
 except ValueError:
 raise RuntimeError('Too much compression scheme: '.format(', '.join(compression))) from None
 else:
 return compression

Note that I used RuntimeError here but you should probably come up with something less generic.

edited Jul 29 at 11:57

answered Jul 29 at 11:51

Mathias Ettinger

21.7k32875

Thanks. The function return gz for the files that created with bz2 in tarfile. Thus it should return bz2 instead of gz.
â€“Â Masoud R.
Jul 30 at 6:41

If your file is named something.tar.gz, it is not coherent to use anything else than GZIP compression to create the file. This is the assumption this function is doing, hence the choice to only check the filename and not trying to do anything fancy with the file content. If you store BZIP2 compressed data in a .tar.gz this is your mistake to start with.
â€“Â Mathias Ettinger
Jul 30 at 7:15

So you mean I should use tar.bz2 right?
â€“Â Masoud R.
Jul 30 at 7:17

2

If you want to store BZIP2 compressed data, definitely.
â€“Â Mathias Ettinger
Jul 30 at 7:23

1

On the other hand, if you want something fancier based on the file content rather than its name, you can check how tarfile handle the r:* mode. Basically, it tries to open the file using, in turn gzip.GzipFile, bz2.BZ2File, lzma.LZMAFile and if all failed fallback to trying without compression.
â€“Â Mathias Ettinger
Jul 30 at 7:33

add a commentÂ |Â

up vote
4
down vote

Since the compression scheme understood by the tarfile module are usual extensions of the files, inspecting the suffixes of output_path should be enough:

def get_compression(filename):
 suffixes = Path(filename).suffixes
 tar, *compression = (s.lstrip('.') for s in suffixes)

 if tar == 'tgz':
 if compression:
 raise RuntimeError('Too much suffixes, cannot infer compression scheme from '.format(''.join(suffixes)))
 return 'gz'

 if tar != 'tar':
 raise RuntimeError('Not a tar archive')

 if not compression:
 return ''

 try:
 compression, = compression
 except ValueError:
 raise RuntimeError('Too much compression scheme: '.format(', '.join(compression))) from None
 else:
 return compression

Note that I used RuntimeError here but you should probably come up with something less generic.

edited Jul 29 at 11:57

answered Jul 29 at 11:51

Mathias Ettinger

21.7k32875

Thanks. The function return gz for the files that created with bz2 in tarfile. Thus it should return bz2 instead of gz.
â€“Â Masoud R.
Jul 30 at 6:41

If your file is named something.tar.gz, it is not coherent to use anything else than GZIP compression to create the file. This is the assumption this function is doing, hence the choice to only check the filename and not trying to do anything fancy with the file content. If you store BZIP2 compressed data in a .tar.gz this is your mistake to start with.
â€“Â Mathias Ettinger
Jul 30 at 7:15

So you mean I should use tar.bz2 right?
â€“Â Masoud R.
Jul 30 at 7:17

2

If you want to store BZIP2 compressed data, definitely.
â€“Â Mathias Ettinger
Jul 30 at 7:23

1

On the other hand, if you want something fancier based on the file content rather than its name, you can check how tarfile handle the r:* mode. Basically, it tries to open the file using, in turn gzip.GzipFile, bz2.BZ2File, lzma.LZMAFile and if all failed fallback to trying without compression.
â€“Â Mathias Ettinger
Jul 30 at 7:33

add a commentÂ |Â

up vote
4
down vote

Since the compression scheme understood by the tarfile module are usual extensions of the files, inspecting the suffixes of output_path should be enough:

def get_compression(filename):
 suffixes = Path(filename).suffixes
 tar, *compression = (s.lstrip('.') for s in suffixes)

 if tar == 'tgz':
 if compression:
 raise RuntimeError('Too much suffixes, cannot infer compression scheme from '.format(''.join(suffixes)))
 return 'gz'

 if tar != 'tar':
 raise RuntimeError('Not a tar archive')

 if not compression:
 return ''

 try:
 compression, = compression
 except ValueError:
 raise RuntimeError('Too much compression scheme: '.format(', '.join(compression))) from None
 else:
 return compression

Note that I used RuntimeError here but you should probably come up with something less generic.

edited Jul 29 at 11:57

answered Jul 29 at 11:51

Mathias Ettinger

21.7k32875

Since the compression scheme understood by the tarfile module are usual extensions of the files, inspecting the suffixes of output_path should be enough:

def get_compression(filename):
 suffixes = Path(filename).suffixes
 tar, *compression = (s.lstrip('.') for s in suffixes)

 if tar == 'tgz':
 if compression:
 raise RuntimeError('Too much suffixes, cannot infer compression scheme from '.format(''.join(suffixes)))
 return 'gz'

 if tar != 'tar':
 raise RuntimeError('Not a tar archive')

 if not compression:
 return ''

 try:
 compression, = compression
 except ValueError:
 raise RuntimeError('Too much compression scheme: '.format(', '.join(compression))) from None
 else:
 return compression

Note that I used RuntimeError here but you should probably come up with something less generic.

edited Jul 29 at 11:57

answered Jul 29 at 11:51

Mathias Ettinger

21.7k32875

edited Jul 29 at 11:57

answered Jul 29 at 11:51

Mathias Ettinger

21.7k32875

answered Jul 29 at 11:51

Mathias Ettinger

21.7k32875

answered Jul 29 at 11:51

Mathias Ettinger

21.7k32875

Thanks. The function return gz for the files that created with bz2 in tarfile. Thus it should return bz2 instead of gz.
â€“Â Masoud R.
Jul 30 at 6:41

If your file is named something.tar.gz, it is not coherent to use anything else than GZIP compression to create the file. This is the assumption this function is doing, hence the choice to only check the filename and not trying to do anything fancy with the file content. If you store BZIP2 compressed data in a .tar.gz this is your mistake to start with.
â€“Â Mathias Ettinger
Jul 30 at 7:15

So you mean I should use tar.bz2 right?
â€“Â Masoud R.
Jul 30 at 7:17

2

If you want to store BZIP2 compressed data, definitely.
â€“Â Mathias Ettinger
Jul 30 at 7:23

1

On the other hand, if you want something fancier based on the file content rather than its name, you can check how tarfile handle the r:* mode. Basically, it tries to open the file using, in turn gzip.GzipFile, bz2.BZ2File, lzma.LZMAFile and if all failed fallback to trying without compression.
â€“Â Mathias Ettinger
Jul 30 at 7:33

add a commentÂ |Â

Thanks. The function return gz for the files that created with bz2 in tarfile. Thus it should return bz2 instead of gz.
â€“Â Masoud R.
Jul 30 at 6:41

If your file is named something.tar.gz, it is not coherent to use anything else than GZIP compression to create the file. This is the assumption this function is doing, hence the choice to only check the filename and not trying to do anything fancy with the file content. If you store BZIP2 compressed data in a .tar.gz this is your mistake to start with.
â€“Â Mathias Ettinger
Jul 30 at 7:15

So you mean I should use tar.bz2 right?
â€“Â Masoud R.
Jul 30 at 7:17

2

If you want to store BZIP2 compressed data, definitely.
â€“Â Mathias Ettinger
Jul 30 at 7:23

1

On the other hand, if you want something fancier based on the file content rather than its name, you can check how tarfile handle the r:* mode. Basically, it tries to open the file using, in turn gzip.GzipFile, bz2.BZ2File, lzma.LZMAFile and if all failed fallback to trying without compression.
â€“Â Mathias Ettinger
Jul 30 at 7:33

Thanks. The function return gz for the files that created with bz2 in tarfile. Thus it should return bz2 instead of gz.
â€“Â Masoud R.
Jul 30 at 6:41

If your file is named something.tar.gz, it is not coherent to use anything else than GZIP compression to create the file. This is the assumption this function is doing, hence the choice to only check the filename and not trying to do anything fancy with the file content. If you store BZIP2 compressed data in a .tar.gz this is your mistake to start with.
â€“Â Mathias Ettinger
Jul 30 at 7:15

So you mean I should use tar.bz2 right?
â€“Â Masoud R.
Jul 30 at 7:17

If you want to store BZIP2 compressed data, definitely.
â€“Â Mathias Ettinger
Jul 30 at 7:23

On the other hand, if you want something fancier based on the file content rather than its name, you can check how tarfile handle the r:* mode. Basically, it tries to open the file using, in turn gzip.GzipFile, bz2.BZ2File, lzma.LZMAFile and if all failed fallback to trying without compression.
â€“Â Mathias Ettinger
Jul 30 at 7:33

add a commentÂ |Â

draft saved

draft discarded

draft saved

draft discarded

Post as a guest

Name

搜尋此網誌

trjhtr