Append to compressed tar file with performance
Clash Royale CLAN TAG#URR8PPP
.everyoneloves__top-leaderboard:empty,.everyoneloves__mid-leaderboard:empty margin-bottom:0;
up vote
6
down vote
favorite
As you might know, Python's tarfile
does not have the ability to append to files compressed with e.g. gz
or bz2
. I tried to implement this functionality which works fine but operates slowly. The following function accepts a string or bytes object and appends it as a file to an existing tarfile. I'm not sure if this code has the best performance, and it may have issues. I also tried writing files to memory instead of to a temporary directory, but this didn't impact performance.
import os
import tarfile
import tempfile
import time
from pathlib import Path
def append_tar_file(buffer, file_name, output_path, replace=True):
"""
append a buffer to an existing tar file
"""
# extract files
# check for existing file and overwrite if need to
# compress files
if not os.path.isfile(output_path):
return
buffer = buffer.encode("utf-8") if isinstance(buffer, str) else buffer
with tempfile.TemporaryDirectory() as tempdir:
tempdirp = Path(tempdir)
with tarfile.open(output_path, "r:bz2") as tar:
try:
tar.extractall(os.path.abspath(tempdirp))
except Exception as err: #tar file is empty
print(err)
buffer_path = os.path.join(tempdir, os.path.basename(file_name))
if replace or (buffer_path not in list(os.path.abspath(f) for f in tempdirp.iterdir())):
with open(buffer_path, "wb") as f:
f.write(buffer)
with tarfile.open(output_path, "w:bz2") as tar:
for file in tempdirp.iterdir():
try:
tar.add(file, arcname=os.path.basename(os.path.normpath(file)))
except Exception as err:
print(err)
if __name__ == "__main__":
path = "./test.tar.gz"
buffer = "Test String"
filename = "somefile"
for i in range(1, 100):
print(time.time())
append_tar_file(buffer, filename+str(i), path)
python performance compression
add a comment |Â
up vote
6
down vote
favorite
As you might know, Python's tarfile
does not have the ability to append to files compressed with e.g. gz
or bz2
. I tried to implement this functionality which works fine but operates slowly. The following function accepts a string or bytes object and appends it as a file to an existing tarfile. I'm not sure if this code has the best performance, and it may have issues. I also tried writing files to memory instead of to a temporary directory, but this didn't impact performance.
import os
import tarfile
import tempfile
import time
from pathlib import Path
def append_tar_file(buffer, file_name, output_path, replace=True):
"""
append a buffer to an existing tar file
"""
# extract files
# check for existing file and overwrite if need to
# compress files
if not os.path.isfile(output_path):
return
buffer = buffer.encode("utf-8") if isinstance(buffer, str) else buffer
with tempfile.TemporaryDirectory() as tempdir:
tempdirp = Path(tempdir)
with tarfile.open(output_path, "r:bz2") as tar:
try:
tar.extractall(os.path.abspath(tempdirp))
except Exception as err: #tar file is empty
print(err)
buffer_path = os.path.join(tempdir, os.path.basename(file_name))
if replace or (buffer_path not in list(os.path.abspath(f) for f in tempdirp.iterdir())):
with open(buffer_path, "wb") as f:
f.write(buffer)
with tarfile.open(output_path, "w:bz2") as tar:
for file in tempdirp.iterdir():
try:
tar.add(file, arcname=os.path.basename(os.path.normpath(file)))
except Exception as err:
print(err)
if __name__ == "__main__":
path = "./test.tar.gz"
buffer = "Test String"
filename = "somefile"
for i in range(1, 100):
print(time.time())
append_tar_file(buffer, filename+str(i), path)
python performance compression
add a comment |Â
up vote
6
down vote
favorite
up vote
6
down vote
favorite
As you might know, Python's tarfile
does not have the ability to append to files compressed with e.g. gz
or bz2
. I tried to implement this functionality which works fine but operates slowly. The following function accepts a string or bytes object and appends it as a file to an existing tarfile. I'm not sure if this code has the best performance, and it may have issues. I also tried writing files to memory instead of to a temporary directory, but this didn't impact performance.
import os
import tarfile
import tempfile
import time
from pathlib import Path
def append_tar_file(buffer, file_name, output_path, replace=True):
"""
append a buffer to an existing tar file
"""
# extract files
# check for existing file and overwrite if need to
# compress files
if not os.path.isfile(output_path):
return
buffer = buffer.encode("utf-8") if isinstance(buffer, str) else buffer
with tempfile.TemporaryDirectory() as tempdir:
tempdirp = Path(tempdir)
with tarfile.open(output_path, "r:bz2") as tar:
try:
tar.extractall(os.path.abspath(tempdirp))
except Exception as err: #tar file is empty
print(err)
buffer_path = os.path.join(tempdir, os.path.basename(file_name))
if replace or (buffer_path not in list(os.path.abspath(f) for f in tempdirp.iterdir())):
with open(buffer_path, "wb") as f:
f.write(buffer)
with tarfile.open(output_path, "w:bz2") as tar:
for file in tempdirp.iterdir():
try:
tar.add(file, arcname=os.path.basename(os.path.normpath(file)))
except Exception as err:
print(err)
if __name__ == "__main__":
path = "./test.tar.gz"
buffer = "Test String"
filename = "somefile"
for i in range(1, 100):
print(time.time())
append_tar_file(buffer, filename+str(i), path)
python performance compression
As you might know, Python's tarfile
does not have the ability to append to files compressed with e.g. gz
or bz2
. I tried to implement this functionality which works fine but operates slowly. The following function accepts a string or bytes object and appends it as a file to an existing tarfile. I'm not sure if this code has the best performance, and it may have issues. I also tried writing files to memory instead of to a temporary directory, but this didn't impact performance.
import os
import tarfile
import tempfile
import time
from pathlib import Path
def append_tar_file(buffer, file_name, output_path, replace=True):
"""
append a buffer to an existing tar file
"""
# extract files
# check for existing file and overwrite if need to
# compress files
if not os.path.isfile(output_path):
return
buffer = buffer.encode("utf-8") if isinstance(buffer, str) else buffer
with tempfile.TemporaryDirectory() as tempdir:
tempdirp = Path(tempdir)
with tarfile.open(output_path, "r:bz2") as tar:
try:
tar.extractall(os.path.abspath(tempdirp))
except Exception as err: #tar file is empty
print(err)
buffer_path = os.path.join(tempdir, os.path.basename(file_name))
if replace or (buffer_path not in list(os.path.abspath(f) for f in tempdirp.iterdir())):
with open(buffer_path, "wb") as f:
f.write(buffer)
with tarfile.open(output_path, "w:bz2") as tar:
for file in tempdirp.iterdir():
try:
tar.add(file, arcname=os.path.basename(os.path.normpath(file)))
except Exception as err:
print(err)
if __name__ == "__main__":
path = "./test.tar.gz"
buffer = "Test String"
filename = "somefile"
for i in range(1, 100):
print(time.time())
append_tar_file(buffer, filename+str(i), path)
python performance compression
edited Jul 29 at 17:44
Daniel
4,0632834
4,0632834
asked Jul 29 at 7:50
Masoud R.
355
355
add a comment |Â
add a comment |Â
2 Answers
2
active
oldest
votes
up vote
5
down vote
accepted
Indeed, the tarfile
package doesn't support appending to a compressed tar.
But I think you can do better than your current attempt.
Instead of extracting the content to disk, you could keep it in memory, write and append to a new compressed file, and finally rename the compressed file.
That way you will write a single temporary file,
instead of many.
The reduced disk I/O should improve the performance.
def append_tar_file(buf, file_name, output_path, replace=True):
"""
append a buf to an existing tar file if not already there, or if replace=True
"""
if not os.path.isfile(output_path):
return
with tempfile.TemporaryDirectory() as tempdir:
tmp_path = os.path.join(tempdir, 'tmp.tar.bz2')
with tarfile.open(output_path, "r:bz2") as tar:
if not replace:
if file_name in (member.name for member in tar):
return
if isinstance(buf, str):
buf = buf.encode("utf-8")
fileobj = BytesIO(buf)
tarinfo = tarfile.TarInfo(file_name)
tarinfo.size = len(fileobj.getvalue())
with tarfile.open(tmp_path, "w:bz2") as tmp:
for member in tar:
if member.name != file_name:
tmp.addfile(member, tar.extractfile(member.name))
tmp.addfile(tarinfo, fileobj)
os.rename(tmp_path, output_path)
Can multiprocessing used while working with memory? I mean when adding files back to output tar file use it. When the compressed file getting larger, it would take more time to append.
â Masoud R.
Jul 30 at 7:59
add a comment |Â
up vote
4
down vote
Instead of hardcoding the compression scheme (and possibly compressing a .tar.gz
file using BZIP2 as you do), you should try to infer that information. Note that tarfile
let you open a compressed file without knowing the compression scheme using tarfile.open(filename, 'r:*')
but there is no equivalent for writing the archive.
Since the compression scheme understood by the tarfile
module are usual extensions of the files, inspecting the suffixes of output_path
should be enough:
def get_compression(filename):
suffixes = Path(filename).suffixes
tar, *compression = (s.lstrip('.') for s in suffixes)
if tar == 'tgz':
if compression:
raise RuntimeError('Too much suffixes, cannot infer compression scheme from '.format(''.join(suffixes)))
return 'gz'
if tar != 'tar':
raise RuntimeError('Not a tar archive')
if not compression:
return ''
try:
compression, = compression
except ValueError:
raise RuntimeError('Too much compression scheme: '.format(', '.join(compression))) from None
else:
return compression
Now you can use compression = get_compression(output_path)
and then open the tar file using tarfile.open(<name>, 'r:'.format(compression))
and open the file for writing using tarfile.open(<name>, 'w:'.format(compression))
.
Note that I used RuntimeError
here but you should probably come up with something less generic.
Thanks. The function returngz
for the files that created withbz2
intarfile
. Thus it should returnbz2
instead ofgz
.
â Masoud R.
Jul 30 at 6:41
If your file is namedsomething.tar.gz
, it is not coherent to use anything else than GZIP compression to create the file. This is the assumption this function is doing, hence the choice to only check the filename and not trying to do anything fancy with the file content. If you store BZIP2 compressed data in a.tar.gz
this is your mistake to start with.
â Mathias Ettinger
Jul 30 at 7:15
So you mean I should usetar.bz2
right?
â Masoud R.
Jul 30 at 7:17
2
If you want to store BZIP2 compressed data, definitely.
â Mathias Ettinger
Jul 30 at 7:23
1
On the other hand, if you want something fancier based on the file content rather than its name, you can check howtarfile
handle ther:*
mode. Basically, it tries to open the file using, in turngzip.GzipFile
,bz2.BZ2File
,lzma.LZMAFile
and if all failed fallback to trying without compression.
â Mathias Ettinger
Jul 30 at 7:33
add a comment |Â
2 Answers
2
active
oldest
votes
2 Answers
2
active
oldest
votes
active
oldest
votes
active
oldest
votes
up vote
5
down vote
accepted
Indeed, the tarfile
package doesn't support appending to a compressed tar.
But I think you can do better than your current attempt.
Instead of extracting the content to disk, you could keep it in memory, write and append to a new compressed file, and finally rename the compressed file.
That way you will write a single temporary file,
instead of many.
The reduced disk I/O should improve the performance.
def append_tar_file(buf, file_name, output_path, replace=True):
"""
append a buf to an existing tar file if not already there, or if replace=True
"""
if not os.path.isfile(output_path):
return
with tempfile.TemporaryDirectory() as tempdir:
tmp_path = os.path.join(tempdir, 'tmp.tar.bz2')
with tarfile.open(output_path, "r:bz2") as tar:
if not replace:
if file_name in (member.name for member in tar):
return
if isinstance(buf, str):
buf = buf.encode("utf-8")
fileobj = BytesIO(buf)
tarinfo = tarfile.TarInfo(file_name)
tarinfo.size = len(fileobj.getvalue())
with tarfile.open(tmp_path, "w:bz2") as tmp:
for member in tar:
if member.name != file_name:
tmp.addfile(member, tar.extractfile(member.name))
tmp.addfile(tarinfo, fileobj)
os.rename(tmp_path, output_path)
Can multiprocessing used while working with memory? I mean when adding files back to output tar file use it. When the compressed file getting larger, it would take more time to append.
â Masoud R.
Jul 30 at 7:59
add a comment |Â
up vote
5
down vote
accepted
Indeed, the tarfile
package doesn't support appending to a compressed tar.
But I think you can do better than your current attempt.
Instead of extracting the content to disk, you could keep it in memory, write and append to a new compressed file, and finally rename the compressed file.
That way you will write a single temporary file,
instead of many.
The reduced disk I/O should improve the performance.
def append_tar_file(buf, file_name, output_path, replace=True):
"""
append a buf to an existing tar file if not already there, or if replace=True
"""
if not os.path.isfile(output_path):
return
with tempfile.TemporaryDirectory() as tempdir:
tmp_path = os.path.join(tempdir, 'tmp.tar.bz2')
with tarfile.open(output_path, "r:bz2") as tar:
if not replace:
if file_name in (member.name for member in tar):
return
if isinstance(buf, str):
buf = buf.encode("utf-8")
fileobj = BytesIO(buf)
tarinfo = tarfile.TarInfo(file_name)
tarinfo.size = len(fileobj.getvalue())
with tarfile.open(tmp_path, "w:bz2") as tmp:
for member in tar:
if member.name != file_name:
tmp.addfile(member, tar.extractfile(member.name))
tmp.addfile(tarinfo, fileobj)
os.rename(tmp_path, output_path)
Can multiprocessing used while working with memory? I mean when adding files back to output tar file use it. When the compressed file getting larger, it would take more time to append.
â Masoud R.
Jul 30 at 7:59
add a comment |Â
up vote
5
down vote
accepted
up vote
5
down vote
accepted
Indeed, the tarfile
package doesn't support appending to a compressed tar.
But I think you can do better than your current attempt.
Instead of extracting the content to disk, you could keep it in memory, write and append to a new compressed file, and finally rename the compressed file.
That way you will write a single temporary file,
instead of many.
The reduced disk I/O should improve the performance.
def append_tar_file(buf, file_name, output_path, replace=True):
"""
append a buf to an existing tar file if not already there, or if replace=True
"""
if not os.path.isfile(output_path):
return
with tempfile.TemporaryDirectory() as tempdir:
tmp_path = os.path.join(tempdir, 'tmp.tar.bz2')
with tarfile.open(output_path, "r:bz2") as tar:
if not replace:
if file_name in (member.name for member in tar):
return
if isinstance(buf, str):
buf = buf.encode("utf-8")
fileobj = BytesIO(buf)
tarinfo = tarfile.TarInfo(file_name)
tarinfo.size = len(fileobj.getvalue())
with tarfile.open(tmp_path, "w:bz2") as tmp:
for member in tar:
if member.name != file_name:
tmp.addfile(member, tar.extractfile(member.name))
tmp.addfile(tarinfo, fileobj)
os.rename(tmp_path, output_path)
Indeed, the tarfile
package doesn't support appending to a compressed tar.
But I think you can do better than your current attempt.
Instead of extracting the content to disk, you could keep it in memory, write and append to a new compressed file, and finally rename the compressed file.
That way you will write a single temporary file,
instead of many.
The reduced disk I/O should improve the performance.
def append_tar_file(buf, file_name, output_path, replace=True):
"""
append a buf to an existing tar file if not already there, or if replace=True
"""
if not os.path.isfile(output_path):
return
with tempfile.TemporaryDirectory() as tempdir:
tmp_path = os.path.join(tempdir, 'tmp.tar.bz2')
with tarfile.open(output_path, "r:bz2") as tar:
if not replace:
if file_name in (member.name for member in tar):
return
if isinstance(buf, str):
buf = buf.encode("utf-8")
fileobj = BytesIO(buf)
tarinfo = tarfile.TarInfo(file_name)
tarinfo.size = len(fileobj.getvalue())
with tarfile.open(tmp_path, "w:bz2") as tmp:
for member in tar:
if member.name != file_name:
tmp.addfile(member, tar.extractfile(member.name))
tmp.addfile(tarinfo, fileobj)
os.rename(tmp_path, output_path)
edited Jul 30 at 9:06
Daniel
4,0632834
4,0632834
answered Jul 29 at 10:18
janos
95.2k12119342
95.2k12119342
Can multiprocessing used while working with memory? I mean when adding files back to output tar file use it. When the compressed file getting larger, it would take more time to append.
â Masoud R.
Jul 30 at 7:59
add a comment |Â
Can multiprocessing used while working with memory? I mean when adding files back to output tar file use it. When the compressed file getting larger, it would take more time to append.
â Masoud R.
Jul 30 at 7:59
Can multiprocessing used while working with memory? I mean when adding files back to output tar file use it. When the compressed file getting larger, it would take more time to append.
â Masoud R.
Jul 30 at 7:59
Can multiprocessing used while working with memory? I mean when adding files back to output tar file use it. When the compressed file getting larger, it would take more time to append.
â Masoud R.
Jul 30 at 7:59
add a comment |Â
up vote
4
down vote
Instead of hardcoding the compression scheme (and possibly compressing a .tar.gz
file using BZIP2 as you do), you should try to infer that information. Note that tarfile
let you open a compressed file without knowing the compression scheme using tarfile.open(filename, 'r:*')
but there is no equivalent for writing the archive.
Since the compression scheme understood by the tarfile
module are usual extensions of the files, inspecting the suffixes of output_path
should be enough:
def get_compression(filename):
suffixes = Path(filename).suffixes
tar, *compression = (s.lstrip('.') for s in suffixes)
if tar == 'tgz':
if compression:
raise RuntimeError('Too much suffixes, cannot infer compression scheme from '.format(''.join(suffixes)))
return 'gz'
if tar != 'tar':
raise RuntimeError('Not a tar archive')
if not compression:
return ''
try:
compression, = compression
except ValueError:
raise RuntimeError('Too much compression scheme: '.format(', '.join(compression))) from None
else:
return compression
Now you can use compression = get_compression(output_path)
and then open the tar file using tarfile.open(<name>, 'r:'.format(compression))
and open the file for writing using tarfile.open(<name>, 'w:'.format(compression))
.
Note that I used RuntimeError
here but you should probably come up with something less generic.
Thanks. The function returngz
for the files that created withbz2
intarfile
. Thus it should returnbz2
instead ofgz
.
â Masoud R.
Jul 30 at 6:41
If your file is namedsomething.tar.gz
, it is not coherent to use anything else than GZIP compression to create the file. This is the assumption this function is doing, hence the choice to only check the filename and not trying to do anything fancy with the file content. If you store BZIP2 compressed data in a.tar.gz
this is your mistake to start with.
â Mathias Ettinger
Jul 30 at 7:15
So you mean I should usetar.bz2
right?
â Masoud R.
Jul 30 at 7:17
2
If you want to store BZIP2 compressed data, definitely.
â Mathias Ettinger
Jul 30 at 7:23
1
On the other hand, if you want something fancier based on the file content rather than its name, you can check howtarfile
handle ther:*
mode. Basically, it tries to open the file using, in turngzip.GzipFile
,bz2.BZ2File
,lzma.LZMAFile
and if all failed fallback to trying without compression.
â Mathias Ettinger
Jul 30 at 7:33
add a comment |Â
up vote
4
down vote
Instead of hardcoding the compression scheme (and possibly compressing a .tar.gz
file using BZIP2 as you do), you should try to infer that information. Note that tarfile
let you open a compressed file without knowing the compression scheme using tarfile.open(filename, 'r:*')
but there is no equivalent for writing the archive.
Since the compression scheme understood by the tarfile
module are usual extensions of the files, inspecting the suffixes of output_path
should be enough:
def get_compression(filename):
suffixes = Path(filename).suffixes
tar, *compression = (s.lstrip('.') for s in suffixes)
if tar == 'tgz':
if compression:
raise RuntimeError('Too much suffixes, cannot infer compression scheme from '.format(''.join(suffixes)))
return 'gz'
if tar != 'tar':
raise RuntimeError('Not a tar archive')
if not compression:
return ''
try:
compression, = compression
except ValueError:
raise RuntimeError('Too much compression scheme: '.format(', '.join(compression))) from None
else:
return compression
Now you can use compression = get_compression(output_path)
and then open the tar file using tarfile.open(<name>, 'r:'.format(compression))
and open the file for writing using tarfile.open(<name>, 'w:'.format(compression))
.
Note that I used RuntimeError
here but you should probably come up with something less generic.
Thanks. The function returngz
for the files that created withbz2
intarfile
. Thus it should returnbz2
instead ofgz
.
â Masoud R.
Jul 30 at 6:41
If your file is namedsomething.tar.gz
, it is not coherent to use anything else than GZIP compression to create the file. This is the assumption this function is doing, hence the choice to only check the filename and not trying to do anything fancy with the file content. If you store BZIP2 compressed data in a.tar.gz
this is your mistake to start with.
â Mathias Ettinger
Jul 30 at 7:15
So you mean I should usetar.bz2
right?
â Masoud R.
Jul 30 at 7:17
2
If you want to store BZIP2 compressed data, definitely.
â Mathias Ettinger
Jul 30 at 7:23
1
On the other hand, if you want something fancier based on the file content rather than its name, you can check howtarfile
handle ther:*
mode. Basically, it tries to open the file using, in turngzip.GzipFile
,bz2.BZ2File
,lzma.LZMAFile
and if all failed fallback to trying without compression.
â Mathias Ettinger
Jul 30 at 7:33
add a comment |Â
up vote
4
down vote
up vote
4
down vote
Instead of hardcoding the compression scheme (and possibly compressing a .tar.gz
file using BZIP2 as you do), you should try to infer that information. Note that tarfile
let you open a compressed file without knowing the compression scheme using tarfile.open(filename, 'r:*')
but there is no equivalent for writing the archive.
Since the compression scheme understood by the tarfile
module are usual extensions of the files, inspecting the suffixes of output_path
should be enough:
def get_compression(filename):
suffixes = Path(filename).suffixes
tar, *compression = (s.lstrip('.') for s in suffixes)
if tar == 'tgz':
if compression:
raise RuntimeError('Too much suffixes, cannot infer compression scheme from '.format(''.join(suffixes)))
return 'gz'
if tar != 'tar':
raise RuntimeError('Not a tar archive')
if not compression:
return ''
try:
compression, = compression
except ValueError:
raise RuntimeError('Too much compression scheme: '.format(', '.join(compression))) from None
else:
return compression
Now you can use compression = get_compression(output_path)
and then open the tar file using tarfile.open(<name>, 'r:'.format(compression))
and open the file for writing using tarfile.open(<name>, 'w:'.format(compression))
.
Note that I used RuntimeError
here but you should probably come up with something less generic.
Instead of hardcoding the compression scheme (and possibly compressing a .tar.gz
file using BZIP2 as you do), you should try to infer that information. Note that tarfile
let you open a compressed file without knowing the compression scheme using tarfile.open(filename, 'r:*')
but there is no equivalent for writing the archive.
Since the compression scheme understood by the tarfile
module are usual extensions of the files, inspecting the suffixes of output_path
should be enough:
def get_compression(filename):
suffixes = Path(filename).suffixes
tar, *compression = (s.lstrip('.') for s in suffixes)
if tar == 'tgz':
if compression:
raise RuntimeError('Too much suffixes, cannot infer compression scheme from '.format(''.join(suffixes)))
return 'gz'
if tar != 'tar':
raise RuntimeError('Not a tar archive')
if not compression:
return ''
try:
compression, = compression
except ValueError:
raise RuntimeError('Too much compression scheme: '.format(', '.join(compression))) from None
else:
return compression
Now you can use compression = get_compression(output_path)
and then open the tar file using tarfile.open(<name>, 'r:'.format(compression))
and open the file for writing using tarfile.open(<name>, 'w:'.format(compression))
.
Note that I used RuntimeError
here but you should probably come up with something less generic.
edited Jul 29 at 11:57
answered Jul 29 at 11:51
Mathias Ettinger
21.7k32875
21.7k32875
Thanks. The function returngz
for the files that created withbz2
intarfile
. Thus it should returnbz2
instead ofgz
.
â Masoud R.
Jul 30 at 6:41
If your file is namedsomething.tar.gz
, it is not coherent to use anything else than GZIP compression to create the file. This is the assumption this function is doing, hence the choice to only check the filename and not trying to do anything fancy with the file content. If you store BZIP2 compressed data in a.tar.gz
this is your mistake to start with.
â Mathias Ettinger
Jul 30 at 7:15
So you mean I should usetar.bz2
right?
â Masoud R.
Jul 30 at 7:17
2
If you want to store BZIP2 compressed data, definitely.
â Mathias Ettinger
Jul 30 at 7:23
1
On the other hand, if you want something fancier based on the file content rather than its name, you can check howtarfile
handle ther:*
mode. Basically, it tries to open the file using, in turngzip.GzipFile
,bz2.BZ2File
,lzma.LZMAFile
and if all failed fallback to trying without compression.
â Mathias Ettinger
Jul 30 at 7:33
add a comment |Â
Thanks. The function returngz
for the files that created withbz2
intarfile
. Thus it should returnbz2
instead ofgz
.
â Masoud R.
Jul 30 at 6:41
If your file is namedsomething.tar.gz
, it is not coherent to use anything else than GZIP compression to create the file. This is the assumption this function is doing, hence the choice to only check the filename and not trying to do anything fancy with the file content. If you store BZIP2 compressed data in a.tar.gz
this is your mistake to start with.
â Mathias Ettinger
Jul 30 at 7:15
So you mean I should usetar.bz2
right?
â Masoud R.
Jul 30 at 7:17
2
If you want to store BZIP2 compressed data, definitely.
â Mathias Ettinger
Jul 30 at 7:23
1
On the other hand, if you want something fancier based on the file content rather than its name, you can check howtarfile
handle ther:*
mode. Basically, it tries to open the file using, in turngzip.GzipFile
,bz2.BZ2File
,lzma.LZMAFile
and if all failed fallback to trying without compression.
â Mathias Ettinger
Jul 30 at 7:33
Thanks. The function return
gz
for the files that created with bz2
in tarfile
. Thus it should return bz2
instead of gz
.â Masoud R.
Jul 30 at 6:41
Thanks. The function return
gz
for the files that created with bz2
in tarfile
. Thus it should return bz2
instead of gz
.â Masoud R.
Jul 30 at 6:41
If your file is named
something.tar.gz
, it is not coherent to use anything else than GZIP compression to create the file. This is the assumption this function is doing, hence the choice to only check the filename and not trying to do anything fancy with the file content. If you store BZIP2 compressed data in a .tar.gz
this is your mistake to start with.â Mathias Ettinger
Jul 30 at 7:15
If your file is named
something.tar.gz
, it is not coherent to use anything else than GZIP compression to create the file. This is the assumption this function is doing, hence the choice to only check the filename and not trying to do anything fancy with the file content. If you store BZIP2 compressed data in a .tar.gz
this is your mistake to start with.â Mathias Ettinger
Jul 30 at 7:15
So you mean I should use
tar.bz2
right?â Masoud R.
Jul 30 at 7:17
So you mean I should use
tar.bz2
right?â Masoud R.
Jul 30 at 7:17
2
2
If you want to store BZIP2 compressed data, definitely.
â Mathias Ettinger
Jul 30 at 7:23
If you want to store BZIP2 compressed data, definitely.
â Mathias Ettinger
Jul 30 at 7:23
1
1
On the other hand, if you want something fancier based on the file content rather than its name, you can check how
tarfile
handle the r:*
mode. Basically, it tries to open the file using, in turn gzip.GzipFile
, bz2.BZ2File
, lzma.LZMAFile
and if all failed fallback to trying without compression.â Mathias Ettinger
Jul 30 at 7:33
On the other hand, if you want something fancier based on the file content rather than its name, you can check how
tarfile
handle the r:*
mode. Basically, it tries to open the file using, in turn gzip.GzipFile
, bz2.BZ2File
, lzma.LZMAFile
and if all failed fallback to trying without compression.â Mathias Ettinger
Jul 30 at 7:33
add a comment |Â
Sign up or log in
StackExchange.ready(function ()
StackExchange.helpers.onClickDraftSave('#login-link');
);
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
StackExchange.ready(
function ()
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f200514%2fappend-to-compressed-tar-file-with-performance%23new-answer', 'question_page');
);
Post as a guest
Sign up or log in
StackExchange.ready(function ()
StackExchange.helpers.onClickDraftSave('#login-link');
);
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Sign up or log in
StackExchange.ready(function ()
StackExchange.helpers.onClickDraftSave('#login-link');
);
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Sign up or log in
StackExchange.ready(function ()
StackExchange.helpers.onClickDraftSave('#login-link');
);
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Sign up using Google
Sign up using Facebook
Sign up using Email and Password