Decoding a histogram
Clash Royale CLAN TAG#URR8PPP
.everyoneloves__top-leaderboard:empty,.everyoneloves__mid-leaderboard:empty margin-bottom:0;
up vote
1
down vote
favorite
I'm writing this as a follow-up for my previous question posted here:
I've successfully decoded a large histogram into a byte array, which is written to a file. I'm now focusing on returning the byte array back to the token-based String. Details on how the tokens work are in the previous example.
I've created a method, below, which takes the byte array as read from the file, and outputs a char array. Due to the unknown size of the output as this point, i'm using a StringBuilder to append the result of the byte array. The DecodingResult
class is just a simple POJO with the output String
as a char
and the size of the histogram as an int
.
/** Decodes the raw byte into a decoding result object.
* @param bytes bytes to decode
* @return decodingResult object
*/
public static DecodingResult decodeBinarySPECtoRAW(byte bytes)
StringBuilder sb = new StringBuilder();
int height = 0;
int length = 0;
int val;
int histogramLength = 0;
for (int i = 0; i < bytes.length; i++)
char token = (char) bytes[i];
sb.append(token);
boolean nonSpecial = false;
for (Token t : Token.values())
if (token == t.name().charAt(0))
nonSpecial = true;
height = t.getHeight();
length = t.getLength();
if (nonSpecial)
//length
if (length != 0 && length != 1)
if (length == 8)
//1 byte
sb.append(getPaddedString(String.valueOf(bytes[i + 1] & 0xFF), 3));
histogramLength += bytes[i + 1] & 0xFF;
i++;
else if (length == 16)
//2 bytes
val = Tools.convertFromByteArray2(bytes[i + 1], bytes[i + 2]);
histogramLength += val;
sb.append(getPaddedString(String.valueOf(val), 5));
i += 2;
else
//4 bytes
val = Tools.convertFromByteArray4(bytes[i + 1], bytes[i + 2], bytes[i + 3], bytes[i + 4]);
histogramLength += val;
sb.append(getPaddedString(String.valueOf(val), 10));
i += 4;
else
histogramLength++;
//height
if (height != 0 && height != 1)
if (height == 8)
//1 byte
sb.append(getPaddedString(String.valueOf(bytes[i + 1] & 0xFF), 3));
i++;
else if (height == 16)
//2 bytes
val = Tools.convertFromByteArray2(bytes[i + 1], bytes[i + 2]);
sb.append(getPaddedString(String.valueOf(val), 5));
i += 2;
else
//4 bytes
val = Tools.convertFromByteArray4(bytes[i + 1], bytes[i + 2], bytes[i + 3], bytes[i + 4]);
sb.append(getPaddedString(String.valueOf(val), 10));
i += 4;
else
switch (token)
case 'R':
int numReads = (int) bytes[i + 1] & 0xFF;
i++;
sb.append(getPaddedString(String.valueOf(numReads), 3));
for (int j = 0; j < numReads; j++)
int nextNum = bytes[i + 1] & 0xFF;
sb.append(getPaddedString(String.valueOf(nextNum), 3));
histogramLength++;
i++;
break;
case 'S':
int numReads = (int) bytes[i + 1] & 0xFF;
i++;
sb.append(getPaddedString(String.valueOf(numReads), 3));
for (int j = 0; j < numReads; j++)
histogramLength++;
sb.append(getPaddedString(String.valueOf(Tools.convertFromByteArray2(bytes[i + 1], bytes[i + 2])), 5));
i += 2;
break;
case 'T':
sb.append(getPaddedString(String.valueOf(Tools.convertFromByteArray4(bytes[i + 1], bytes[i + 2], bytes[i + 3], bytes[i + 4])), 10));
i += 4;
sb.append(getPaddedString(String.valueOf(Tools.convertFromByteArray4(bytes[i + 1], bytes[i + 2], bytes[i + 3], bytes[i + 4])), 10));
i += 4;
break;
case 'U':
sb.append(getPaddedString(String.valueOf(Tools.convertFromByteArray4(bytes[i + 1], bytes[i + 2], bytes[i + 3], bytes[i + 4])), 10));
i += 4;
break;
case 'V':
List<Byte> VBytes = new ArrayList<>();
boolean escapeFound = false;
while (!escapeFound)
if (i + 1 < bytes.length)
if (bytes[i + 1] == 0) escapeFound = true;
else
VBytes.add(bytes[i + 1]);
i += 1;
for (byte b : VBytes)
sb.append((char) b);
sb.append(getPaddedString(String.valueOf(bytes[i + 1] & 0xFF), 3));
i += 1;
break;
case 'W':
for (int j = 0; j < 6; j++)
sb.append("000");
i += 1;
sb.append(getPaddedString(String.valueOf(Tools.convertFromByteArray4(bytes[i + 1], bytes[i + 2], bytes[i + 3], bytes[i + 4])), 10));
i += 4;
break;
case 'X':
sb.append(getPaddedString(String.valueOf(bytes[i + 1] & 0xFF), 3));
i += 1;
//get length of the statement
int statementLength = bytes[i + 1] & 0xFF;
sb.append(getPaddedString(String.valueOf(statementLength), 3));
i += 1;
for (int j = i + 1; j < i + 1 + statementLength; j++)
sb.append((char) bytes[j]);
i += statementLength;
sb.append(getPaddedString(String.valueOf(Tools.convertFromByteArray2(bytes[i + 1], bytes[i + 2])), 5));
i += 2;
//endseq
int endLength = bytes[i + 1];
sb.append(getPaddedString(String.valueOf(endLength), 3));
i += 1;
if (endLength != 0)
for (int j = i + 1; j < i + 1 + endLength; j++)
sb.append((char) bytes[j]);
i += endLength;
//flankseq
int flankLength = bytes[i + 1];
sb.append(getPaddedString(String.valueOf(flankLength), 3));
i += 1;
if (flankLength != 0)
for (int j = i + 1; j < i + 1 + flankLength; j++)
sb.append((char) bytes[j]);
i += flankLength;
break;
case 'Y':
//must be Y
sb.append(getPaddedString(String.valueOf(Tools.convertFromByteArray4(bytes[i + 1], bytes[i + 2], bytes[i + 3], bytes[i + 4])), 10));
i += 4;
break;
return new DecodingResult(sb.toString().toCharArray(), histogramLength);
public static String getPaddedString(String s, int max)
StringBuilder b = new StringBuilder(max);
for(int i = 0; i < max - s.length(); i++)
b.append('0');
b.append(s);
return b.toString();
The token code, just so no one has to go back and forth to the last post:
/** All lengths and heights in bits.
* All 1's are to be ignored in writing
* i.e 1 - 0 is transcoded as A.
* 1 -1 is transcoded as E
* 1 - 209 is transcoded as I209
* 1 - 2 is transcoded as I002
* 1 - 40000 is transcoded as M40000
* 1 - 290 is transcoded as M00290
*/
public enum Token
A (1, 0),
B (8, 0),
I (1 ,8),
E (1, 1),
F (8, 1),
J (8, 8),
N (8,16),
M (1,16),
C (16,0),
D (32,0),
G (16,1),
H (32,1),
K (16,8),
L (32,8),
O (16,16),
P (32,16),
Q (16,32),
Z (1,32);
private final int length;
private final int height;
Token(int length, int height)
this.length = length;
this.height = height;
public int getLength()
return length;
public int getHeight()
return height;
Also the convertFromByteArray
code.
public static int convertFromByteArray2(byte byte1, byte byte2) (byte1 & 0xFF));
public static int convertFromByteArray4(byte byte1, byte byte2, byte byte3, byte byte4)
return byte1 << 24
There are two processes here that are repeated, and are taking a lot of time but i'm not sure if there's a better way of doing it. The first is the byte
-> string
process. I have created a padding method to remove the use of String.format
Secondly, It's having to loop through every Token
in the list to find the corresponding correct one.
java performance
add a comment |Â
up vote
1
down vote
favorite
I'm writing this as a follow-up for my previous question posted here:
I've successfully decoded a large histogram into a byte array, which is written to a file. I'm now focusing on returning the byte array back to the token-based String. Details on how the tokens work are in the previous example.
I've created a method, below, which takes the byte array as read from the file, and outputs a char array. Due to the unknown size of the output as this point, i'm using a StringBuilder to append the result of the byte array. The DecodingResult
class is just a simple POJO with the output String
as a char
and the size of the histogram as an int
.
/** Decodes the raw byte into a decoding result object.
* @param bytes bytes to decode
* @return decodingResult object
*/
public static DecodingResult decodeBinarySPECtoRAW(byte bytes)
StringBuilder sb = new StringBuilder();
int height = 0;
int length = 0;
int val;
int histogramLength = 0;
for (int i = 0; i < bytes.length; i++)
char token = (char) bytes[i];
sb.append(token);
boolean nonSpecial = false;
for (Token t : Token.values())
if (token == t.name().charAt(0))
nonSpecial = true;
height = t.getHeight();
length = t.getLength();
if (nonSpecial)
//length
if (length != 0 && length != 1)
if (length == 8)
//1 byte
sb.append(getPaddedString(String.valueOf(bytes[i + 1] & 0xFF), 3));
histogramLength += bytes[i + 1] & 0xFF;
i++;
else if (length == 16)
//2 bytes
val = Tools.convertFromByteArray2(bytes[i + 1], bytes[i + 2]);
histogramLength += val;
sb.append(getPaddedString(String.valueOf(val), 5));
i += 2;
else
//4 bytes
val = Tools.convertFromByteArray4(bytes[i + 1], bytes[i + 2], bytes[i + 3], bytes[i + 4]);
histogramLength += val;
sb.append(getPaddedString(String.valueOf(val), 10));
i += 4;
else
histogramLength++;
//height
if (height != 0 && height != 1)
if (height == 8)
//1 byte
sb.append(getPaddedString(String.valueOf(bytes[i + 1] & 0xFF), 3));
i++;
else if (height == 16)
//2 bytes
val = Tools.convertFromByteArray2(bytes[i + 1], bytes[i + 2]);
sb.append(getPaddedString(String.valueOf(val), 5));
i += 2;
else
//4 bytes
val = Tools.convertFromByteArray4(bytes[i + 1], bytes[i + 2], bytes[i + 3], bytes[i + 4]);
sb.append(getPaddedString(String.valueOf(val), 10));
i += 4;
else
switch (token)
case 'R':
int numReads = (int) bytes[i + 1] & 0xFF;
i++;
sb.append(getPaddedString(String.valueOf(numReads), 3));
for (int j = 0; j < numReads; j++)
int nextNum = bytes[i + 1] & 0xFF;
sb.append(getPaddedString(String.valueOf(nextNum), 3));
histogramLength++;
i++;
break;
case 'S':
int numReads = (int) bytes[i + 1] & 0xFF;
i++;
sb.append(getPaddedString(String.valueOf(numReads), 3));
for (int j = 0; j < numReads; j++)
histogramLength++;
sb.append(getPaddedString(String.valueOf(Tools.convertFromByteArray2(bytes[i + 1], bytes[i + 2])), 5));
i += 2;
break;
case 'T':
sb.append(getPaddedString(String.valueOf(Tools.convertFromByteArray4(bytes[i + 1], bytes[i + 2], bytes[i + 3], bytes[i + 4])), 10));
i += 4;
sb.append(getPaddedString(String.valueOf(Tools.convertFromByteArray4(bytes[i + 1], bytes[i + 2], bytes[i + 3], bytes[i + 4])), 10));
i += 4;
break;
case 'U':
sb.append(getPaddedString(String.valueOf(Tools.convertFromByteArray4(bytes[i + 1], bytes[i + 2], bytes[i + 3], bytes[i + 4])), 10));
i += 4;
break;
case 'V':
List<Byte> VBytes = new ArrayList<>();
boolean escapeFound = false;
while (!escapeFound)
if (i + 1 < bytes.length)
if (bytes[i + 1] == 0) escapeFound = true;
else
VBytes.add(bytes[i + 1]);
i += 1;
for (byte b : VBytes)
sb.append((char) b);
sb.append(getPaddedString(String.valueOf(bytes[i + 1] & 0xFF), 3));
i += 1;
break;
case 'W':
for (int j = 0; j < 6; j++)
sb.append("000");
i += 1;
sb.append(getPaddedString(String.valueOf(Tools.convertFromByteArray4(bytes[i + 1], bytes[i + 2], bytes[i + 3], bytes[i + 4])), 10));
i += 4;
break;
case 'X':
sb.append(getPaddedString(String.valueOf(bytes[i + 1] & 0xFF), 3));
i += 1;
//get length of the statement
int statementLength = bytes[i + 1] & 0xFF;
sb.append(getPaddedString(String.valueOf(statementLength), 3));
i += 1;
for (int j = i + 1; j < i + 1 + statementLength; j++)
sb.append((char) bytes[j]);
i += statementLength;
sb.append(getPaddedString(String.valueOf(Tools.convertFromByteArray2(bytes[i + 1], bytes[i + 2])), 5));
i += 2;
//endseq
int endLength = bytes[i + 1];
sb.append(getPaddedString(String.valueOf(endLength), 3));
i += 1;
if (endLength != 0)
for (int j = i + 1; j < i + 1 + endLength; j++)
sb.append((char) bytes[j]);
i += endLength;
//flankseq
int flankLength = bytes[i + 1];
sb.append(getPaddedString(String.valueOf(flankLength), 3));
i += 1;
if (flankLength != 0)
for (int j = i + 1; j < i + 1 + flankLength; j++)
sb.append((char) bytes[j]);
i += flankLength;
break;
case 'Y':
//must be Y
sb.append(getPaddedString(String.valueOf(Tools.convertFromByteArray4(bytes[i + 1], bytes[i + 2], bytes[i + 3], bytes[i + 4])), 10));
i += 4;
break;
return new DecodingResult(sb.toString().toCharArray(), histogramLength);
public static String getPaddedString(String s, int max)
StringBuilder b = new StringBuilder(max);
for(int i = 0; i < max - s.length(); i++)
b.append('0');
b.append(s);
return b.toString();
The token code, just so no one has to go back and forth to the last post:
/** All lengths and heights in bits.
* All 1's are to be ignored in writing
* i.e 1 - 0 is transcoded as A.
* 1 -1 is transcoded as E
* 1 - 209 is transcoded as I209
* 1 - 2 is transcoded as I002
* 1 - 40000 is transcoded as M40000
* 1 - 290 is transcoded as M00290
*/
public enum Token
A (1, 0),
B (8, 0),
I (1 ,8),
E (1, 1),
F (8, 1),
J (8, 8),
N (8,16),
M (1,16),
C (16,0),
D (32,0),
G (16,1),
H (32,1),
K (16,8),
L (32,8),
O (16,16),
P (32,16),
Q (16,32),
Z (1,32);
private final int length;
private final int height;
Token(int length, int height)
this.length = length;
this.height = height;
public int getLength()
return length;
public int getHeight()
return height;
Also the convertFromByteArray
code.
public static int convertFromByteArray2(byte byte1, byte byte2) (byte1 & 0xFF));
public static int convertFromByteArray4(byte byte1, byte byte2, byte byte3, byte byte4)
return byte1 << 24
There are two processes here that are repeated, and are taking a lot of time but i'm not sure if there's a better way of doing it. The first is the byte
-> string
process. I have created a padding method to remove the use of String.format
Secondly, It's having to loop through every Token
in the list to find the corresponding correct one.
java performance
1
I thought you were told about the cost ofString.format()
â Sharon Ben Asher
Apr 17 at 11:16
@SharonBenAsher This new code avoidsString.format()
.
â 200_success
Apr 17 at 18:46
add a comment |Â
up vote
1
down vote
favorite
up vote
1
down vote
favorite
I'm writing this as a follow-up for my previous question posted here:
I've successfully decoded a large histogram into a byte array, which is written to a file. I'm now focusing on returning the byte array back to the token-based String. Details on how the tokens work are in the previous example.
I've created a method, below, which takes the byte array as read from the file, and outputs a char array. Due to the unknown size of the output as this point, i'm using a StringBuilder to append the result of the byte array. The DecodingResult
class is just a simple POJO with the output String
as a char
and the size of the histogram as an int
.
/** Decodes the raw byte into a decoding result object.
* @param bytes bytes to decode
* @return decodingResult object
*/
public static DecodingResult decodeBinarySPECtoRAW(byte bytes)
StringBuilder sb = new StringBuilder();
int height = 0;
int length = 0;
int val;
int histogramLength = 0;
for (int i = 0; i < bytes.length; i++)
char token = (char) bytes[i];
sb.append(token);
boolean nonSpecial = false;
for (Token t : Token.values())
if (token == t.name().charAt(0))
nonSpecial = true;
height = t.getHeight();
length = t.getLength();
if (nonSpecial)
//length
if (length != 0 && length != 1)
if (length == 8)
//1 byte
sb.append(getPaddedString(String.valueOf(bytes[i + 1] & 0xFF), 3));
histogramLength += bytes[i + 1] & 0xFF;
i++;
else if (length == 16)
//2 bytes
val = Tools.convertFromByteArray2(bytes[i + 1], bytes[i + 2]);
histogramLength += val;
sb.append(getPaddedString(String.valueOf(val), 5));
i += 2;
else
//4 bytes
val = Tools.convertFromByteArray4(bytes[i + 1], bytes[i + 2], bytes[i + 3], bytes[i + 4]);
histogramLength += val;
sb.append(getPaddedString(String.valueOf(val), 10));
i += 4;
else
histogramLength++;
//height
if (height != 0 && height != 1)
if (height == 8)
//1 byte
sb.append(getPaddedString(String.valueOf(bytes[i + 1] & 0xFF), 3));
i++;
else if (height == 16)
//2 bytes
val = Tools.convertFromByteArray2(bytes[i + 1], bytes[i + 2]);
sb.append(getPaddedString(String.valueOf(val), 5));
i += 2;
else
//4 bytes
val = Tools.convertFromByteArray4(bytes[i + 1], bytes[i + 2], bytes[i + 3], bytes[i + 4]);
sb.append(getPaddedString(String.valueOf(val), 10));
i += 4;
else
switch (token)
case 'R':
int numReads = (int) bytes[i + 1] & 0xFF;
i++;
sb.append(getPaddedString(String.valueOf(numReads), 3));
for (int j = 0; j < numReads; j++)
int nextNum = bytes[i + 1] & 0xFF;
sb.append(getPaddedString(String.valueOf(nextNum), 3));
histogramLength++;
i++;
break;
case 'S':
int numReads = (int) bytes[i + 1] & 0xFF;
i++;
sb.append(getPaddedString(String.valueOf(numReads), 3));
for (int j = 0; j < numReads; j++)
histogramLength++;
sb.append(getPaddedString(String.valueOf(Tools.convertFromByteArray2(bytes[i + 1], bytes[i + 2])), 5));
i += 2;
break;
case 'T':
sb.append(getPaddedString(String.valueOf(Tools.convertFromByteArray4(bytes[i + 1], bytes[i + 2], bytes[i + 3], bytes[i + 4])), 10));
i += 4;
sb.append(getPaddedString(String.valueOf(Tools.convertFromByteArray4(bytes[i + 1], bytes[i + 2], bytes[i + 3], bytes[i + 4])), 10));
i += 4;
break;
case 'U':
sb.append(getPaddedString(String.valueOf(Tools.convertFromByteArray4(bytes[i + 1], bytes[i + 2], bytes[i + 3], bytes[i + 4])), 10));
i += 4;
break;
case 'V':
List<Byte> VBytes = new ArrayList<>();
boolean escapeFound = false;
while (!escapeFound)
if (i + 1 < bytes.length)
if (bytes[i + 1] == 0) escapeFound = true;
else
VBytes.add(bytes[i + 1]);
i += 1;
for (byte b : VBytes)
sb.append((char) b);
sb.append(getPaddedString(String.valueOf(bytes[i + 1] & 0xFF), 3));
i += 1;
break;
case 'W':
for (int j = 0; j < 6; j++)
sb.append("000");
i += 1;
sb.append(getPaddedString(String.valueOf(Tools.convertFromByteArray4(bytes[i + 1], bytes[i + 2], bytes[i + 3], bytes[i + 4])), 10));
i += 4;
break;
case 'X':
sb.append(getPaddedString(String.valueOf(bytes[i + 1] & 0xFF), 3));
i += 1;
//get length of the statement
int statementLength = bytes[i + 1] & 0xFF;
sb.append(getPaddedString(String.valueOf(statementLength), 3));
i += 1;
for (int j = i + 1; j < i + 1 + statementLength; j++)
sb.append((char) bytes[j]);
i += statementLength;
sb.append(getPaddedString(String.valueOf(Tools.convertFromByteArray2(bytes[i + 1], bytes[i + 2])), 5));
i += 2;
//endseq
int endLength = bytes[i + 1];
sb.append(getPaddedString(String.valueOf(endLength), 3));
i += 1;
if (endLength != 0)
for (int j = i + 1; j < i + 1 + endLength; j++)
sb.append((char) bytes[j]);
i += endLength;
//flankseq
int flankLength = bytes[i + 1];
sb.append(getPaddedString(String.valueOf(flankLength), 3));
i += 1;
if (flankLength != 0)
for (int j = i + 1; j < i + 1 + flankLength; j++)
sb.append((char) bytes[j]);
i += flankLength;
break;
case 'Y':
//must be Y
sb.append(getPaddedString(String.valueOf(Tools.convertFromByteArray4(bytes[i + 1], bytes[i + 2], bytes[i + 3], bytes[i + 4])), 10));
i += 4;
break;
return new DecodingResult(sb.toString().toCharArray(), histogramLength);
public static String getPaddedString(String s, int max)
StringBuilder b = new StringBuilder(max);
for(int i = 0; i < max - s.length(); i++)
b.append('0');
b.append(s);
return b.toString();
The token code, just so no one has to go back and forth to the last post:
/** All lengths and heights in bits.
* All 1's are to be ignored in writing
* i.e 1 - 0 is transcoded as A.
* 1 -1 is transcoded as E
* 1 - 209 is transcoded as I209
* 1 - 2 is transcoded as I002
* 1 - 40000 is transcoded as M40000
* 1 - 290 is transcoded as M00290
*/
public enum Token
A (1, 0),
B (8, 0),
I (1 ,8),
E (1, 1),
F (8, 1),
J (8, 8),
N (8,16),
M (1,16),
C (16,0),
D (32,0),
G (16,1),
H (32,1),
K (16,8),
L (32,8),
O (16,16),
P (32,16),
Q (16,32),
Z (1,32);
private final int length;
private final int height;
Token(int length, int height)
this.length = length;
this.height = height;
public int getLength()
return length;
public int getHeight()
return height;
Also the convertFromByteArray
code.
public static int convertFromByteArray2(byte byte1, byte byte2) (byte1 & 0xFF));
public static int convertFromByteArray4(byte byte1, byte byte2, byte byte3, byte byte4)
return byte1 << 24
There are two processes here that are repeated, and are taking a lot of time but i'm not sure if there's a better way of doing it. The first is the byte
-> string
process. I have created a padding method to remove the use of String.format
Secondly, It's having to loop through every Token
in the list to find the corresponding correct one.
java performance
I'm writing this as a follow-up for my previous question posted here:
I've successfully decoded a large histogram into a byte array, which is written to a file. I'm now focusing on returning the byte array back to the token-based String. Details on how the tokens work are in the previous example.
I've created a method, below, which takes the byte array as read from the file, and outputs a char array. Due to the unknown size of the output as this point, i'm using a StringBuilder to append the result of the byte array. The DecodingResult
class is just a simple POJO with the output String
as a char
and the size of the histogram as an int
.
/** Decodes the raw byte into a decoding result object.
* @param bytes bytes to decode
* @return decodingResult object
*/
public static DecodingResult decodeBinarySPECtoRAW(byte bytes)
StringBuilder sb = new StringBuilder();
int height = 0;
int length = 0;
int val;
int histogramLength = 0;
for (int i = 0; i < bytes.length; i++)
char token = (char) bytes[i];
sb.append(token);
boolean nonSpecial = false;
for (Token t : Token.values())
if (token == t.name().charAt(0))
nonSpecial = true;
height = t.getHeight();
length = t.getLength();
if (nonSpecial)
//length
if (length != 0 && length != 1)
if (length == 8)
//1 byte
sb.append(getPaddedString(String.valueOf(bytes[i + 1] & 0xFF), 3));
histogramLength += bytes[i + 1] & 0xFF;
i++;
else if (length == 16)
//2 bytes
val = Tools.convertFromByteArray2(bytes[i + 1], bytes[i + 2]);
histogramLength += val;
sb.append(getPaddedString(String.valueOf(val), 5));
i += 2;
else
//4 bytes
val = Tools.convertFromByteArray4(bytes[i + 1], bytes[i + 2], bytes[i + 3], bytes[i + 4]);
histogramLength += val;
sb.append(getPaddedString(String.valueOf(val), 10));
i += 4;
else
histogramLength++;
//height
if (height != 0 && height != 1)
if (height == 8)
//1 byte
sb.append(getPaddedString(String.valueOf(bytes[i + 1] & 0xFF), 3));
i++;
else if (height == 16)
//2 bytes
val = Tools.convertFromByteArray2(bytes[i + 1], bytes[i + 2]);
sb.append(getPaddedString(String.valueOf(val), 5));
i += 2;
else
//4 bytes
val = Tools.convertFromByteArray4(bytes[i + 1], bytes[i + 2], bytes[i + 3], bytes[i + 4]);
sb.append(getPaddedString(String.valueOf(val), 10));
i += 4;
else
switch (token)
case 'R':
int numReads = (int) bytes[i + 1] & 0xFF;
i++;
sb.append(getPaddedString(String.valueOf(numReads), 3));
for (int j = 0; j < numReads; j++)
int nextNum = bytes[i + 1] & 0xFF;
sb.append(getPaddedString(String.valueOf(nextNum), 3));
histogramLength++;
i++;
break;
case 'S':
int numReads = (int) bytes[i + 1] & 0xFF;
i++;
sb.append(getPaddedString(String.valueOf(numReads), 3));
for (int j = 0; j < numReads; j++)
histogramLength++;
sb.append(getPaddedString(String.valueOf(Tools.convertFromByteArray2(bytes[i + 1], bytes[i + 2])), 5));
i += 2;
break;
case 'T':
sb.append(getPaddedString(String.valueOf(Tools.convertFromByteArray4(bytes[i + 1], bytes[i + 2], bytes[i + 3], bytes[i + 4])), 10));
i += 4;
sb.append(getPaddedString(String.valueOf(Tools.convertFromByteArray4(bytes[i + 1], bytes[i + 2], bytes[i + 3], bytes[i + 4])), 10));
i += 4;
break;
case 'U':
sb.append(getPaddedString(String.valueOf(Tools.convertFromByteArray4(bytes[i + 1], bytes[i + 2], bytes[i + 3], bytes[i + 4])), 10));
i += 4;
break;
case 'V':
List<Byte> VBytes = new ArrayList<>();
boolean escapeFound = false;
while (!escapeFound)
if (i + 1 < bytes.length)
if (bytes[i + 1] == 0) escapeFound = true;
else
VBytes.add(bytes[i + 1]);
i += 1;
for (byte b : VBytes)
sb.append((char) b);
sb.append(getPaddedString(String.valueOf(bytes[i + 1] & 0xFF), 3));
i += 1;
break;
case 'W':
for (int j = 0; j < 6; j++)
sb.append("000");
i += 1;
sb.append(getPaddedString(String.valueOf(Tools.convertFromByteArray4(bytes[i + 1], bytes[i + 2], bytes[i + 3], bytes[i + 4])), 10));
i += 4;
break;
case 'X':
sb.append(getPaddedString(String.valueOf(bytes[i + 1] & 0xFF), 3));
i += 1;
//get length of the statement
int statementLength = bytes[i + 1] & 0xFF;
sb.append(getPaddedString(String.valueOf(statementLength), 3));
i += 1;
for (int j = i + 1; j < i + 1 + statementLength; j++)
sb.append((char) bytes[j]);
i += statementLength;
sb.append(getPaddedString(String.valueOf(Tools.convertFromByteArray2(bytes[i + 1], bytes[i + 2])), 5));
i += 2;
//endseq
int endLength = bytes[i + 1];
sb.append(getPaddedString(String.valueOf(endLength), 3));
i += 1;
if (endLength != 0)
for (int j = i + 1; j < i + 1 + endLength; j++)
sb.append((char) bytes[j]);
i += endLength;
//flankseq
int flankLength = bytes[i + 1];
sb.append(getPaddedString(String.valueOf(flankLength), 3));
i += 1;
if (flankLength != 0)
for (int j = i + 1; j < i + 1 + flankLength; j++)
sb.append((char) bytes[j]);
i += flankLength;
break;
case 'Y':
//must be Y
sb.append(getPaddedString(String.valueOf(Tools.convertFromByteArray4(bytes[i + 1], bytes[i + 2], bytes[i + 3], bytes[i + 4])), 10));
i += 4;
break;
return new DecodingResult(sb.toString().toCharArray(), histogramLength);
public static String getPaddedString(String s, int max)
StringBuilder b = new StringBuilder(max);
for(int i = 0; i < max - s.length(); i++)
b.append('0');
b.append(s);
return b.toString();
The token code, just so no one has to go back and forth to the last post:
/** All lengths and heights in bits.
* All 1's are to be ignored in writing
* i.e 1 - 0 is transcoded as A.
* 1 -1 is transcoded as E
* 1 - 209 is transcoded as I209
* 1 - 2 is transcoded as I002
* 1 - 40000 is transcoded as M40000
* 1 - 290 is transcoded as M00290
*/
public enum Token
A (1, 0),
B (8, 0),
I (1 ,8),
E (1, 1),
F (8, 1),
J (8, 8),
N (8,16),
M (1,16),
C (16,0),
D (32,0),
G (16,1),
H (32,1),
K (16,8),
L (32,8),
O (16,16),
P (32,16),
Q (16,32),
Z (1,32);
private final int length;
private final int height;
Token(int length, int height)
this.length = length;
this.height = height;
public int getLength()
return length;
public int getHeight()
return height;
Also the convertFromByteArray
code.
public static int convertFromByteArray2(byte byte1, byte byte2) (byte1 & 0xFF));
public static int convertFromByteArray4(byte byte1, byte byte2, byte byte3, byte byte4)
return byte1 << 24
There are two processes here that are repeated, and are taking a lot of time but i'm not sure if there's a better way of doing it. The first is the byte
-> string
process. I have created a padding method to remove the use of String.format
Secondly, It's having to loop through every Token
in the list to find the corresponding correct one.
java performance
edited Apr 17 at 12:00
asked Apr 17 at 10:48
Sam
1887
1887
1
I thought you were told about the cost ofString.format()
â Sharon Ben Asher
Apr 17 at 11:16
@SharonBenAsher This new code avoidsString.format()
.
â 200_success
Apr 17 at 18:46
add a comment |Â
1
I thought you were told about the cost ofString.format()
â Sharon Ben Asher
Apr 17 at 11:16
@SharonBenAsher This new code avoidsString.format()
.
â 200_success
Apr 17 at 18:46
1
1
I thought you were told about the cost of
String.format()
â Sharon Ben Asher
Apr 17 at 11:16
I thought you were told about the cost of
String.format()
â Sharon Ben Asher
Apr 17 at 11:16
@SharonBenAsher This new code avoids
String.format()
.â 200_success
Apr 17 at 18:46
@SharonBenAsher This new code avoids
String.format()
.â 200_success
Apr 17 at 18:46
add a comment |Â
1 Answer
1
active
oldest
votes
up vote
2
down vote
The loop can simply be replaced with a lookup map which you prepare once before the process runs:
Map<Character, Token> tokenLookup = EnumSet.allOf(Token.class).stream()
.collect(Collectors.toMap(tok -> tok.name().charAt(0), Function.identity()));
Then, instead of the loop just:
Token t = tokenLookup.get(token);
if(t != null)
nonSpecial = true;
height = t.getHeight();
length = t.getLength();
Regarding the getPaddedString()
method: you could at least eliminate the repeated call to s.length() for every loop operation:
for(int i = max - s.length(); i > 0; i--)
...
Thanks for the suggestions. Thats definitely a better way that to do it than iterating through the Tokens.
â Sam
Apr 18 at 8:27
add a comment |Â
1 Answer
1
active
oldest
votes
1 Answer
1
active
oldest
votes
active
oldest
votes
active
oldest
votes
up vote
2
down vote
The loop can simply be replaced with a lookup map which you prepare once before the process runs:
Map<Character, Token> tokenLookup = EnumSet.allOf(Token.class).stream()
.collect(Collectors.toMap(tok -> tok.name().charAt(0), Function.identity()));
Then, instead of the loop just:
Token t = tokenLookup.get(token);
if(t != null)
nonSpecial = true;
height = t.getHeight();
length = t.getLength();
Regarding the getPaddedString()
method: you could at least eliminate the repeated call to s.length() for every loop operation:
for(int i = max - s.length(); i > 0; i--)
...
Thanks for the suggestions. Thats definitely a better way that to do it than iterating through the Tokens.
â Sam
Apr 18 at 8:27
add a comment |Â
up vote
2
down vote
The loop can simply be replaced with a lookup map which you prepare once before the process runs:
Map<Character, Token> tokenLookup = EnumSet.allOf(Token.class).stream()
.collect(Collectors.toMap(tok -> tok.name().charAt(0), Function.identity()));
Then, instead of the loop just:
Token t = tokenLookup.get(token);
if(t != null)
nonSpecial = true;
height = t.getHeight();
length = t.getLength();
Regarding the getPaddedString()
method: you could at least eliminate the repeated call to s.length() for every loop operation:
for(int i = max - s.length(); i > 0; i--)
...
Thanks for the suggestions. Thats definitely a better way that to do it than iterating through the Tokens.
â Sam
Apr 18 at 8:27
add a comment |Â
up vote
2
down vote
up vote
2
down vote
The loop can simply be replaced with a lookup map which you prepare once before the process runs:
Map<Character, Token> tokenLookup = EnumSet.allOf(Token.class).stream()
.collect(Collectors.toMap(tok -> tok.name().charAt(0), Function.identity()));
Then, instead of the loop just:
Token t = tokenLookup.get(token);
if(t != null)
nonSpecial = true;
height = t.getHeight();
length = t.getLength();
Regarding the getPaddedString()
method: you could at least eliminate the repeated call to s.length() for every loop operation:
for(int i = max - s.length(); i > 0; i--)
...
The loop can simply be replaced with a lookup map which you prepare once before the process runs:
Map<Character, Token> tokenLookup = EnumSet.allOf(Token.class).stream()
.collect(Collectors.toMap(tok -> tok.name().charAt(0), Function.identity()));
Then, instead of the loop just:
Token t = tokenLookup.get(token);
if(t != null)
nonSpecial = true;
height = t.getHeight();
length = t.getLength();
Regarding the getPaddedString()
method: you could at least eliminate the repeated call to s.length() for every loop operation:
for(int i = max - s.length(); i > 0; i--)
...
answered Apr 18 at 8:04
mtj
2,675212
2,675212
Thanks for the suggestions. Thats definitely a better way that to do it than iterating through the Tokens.
â Sam
Apr 18 at 8:27
add a comment |Â
Thanks for the suggestions. Thats definitely a better way that to do it than iterating through the Tokens.
â Sam
Apr 18 at 8:27
Thanks for the suggestions. Thats definitely a better way that to do it than iterating through the Tokens.
â Sam
Apr 18 at 8:27
Thanks for the suggestions. Thats definitely a better way that to do it than iterating through the Tokens.
â Sam
Apr 18 at 8:27
add a comment |Â
Sign up or log in
StackExchange.ready(function ()
StackExchange.helpers.onClickDraftSave('#login-link');
);
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
StackExchange.ready(
function ()
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f192280%2fdecoding-a-histogram%23new-answer', 'question_page');
);
Post as a guest
Sign up or log in
StackExchange.ready(function ()
StackExchange.helpers.onClickDraftSave('#login-link');
);
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Sign up or log in
StackExchange.ready(function ()
StackExchange.helpers.onClickDraftSave('#login-link');
);
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Sign up or log in
StackExchange.ready(function ()
StackExchange.helpers.onClickDraftSave('#login-link');
);
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
1
I thought you were told about the cost of
String.format()
â Sharon Ben Asher
Apr 17 at 11:16
@SharonBenAsher This new code avoids
String.format()
.â 200_success
Apr 17 at 18:46