Skip to content

Commit 9a64d3b

Browse files
committed
Fix AsciiBytes unicode decoding
Fix the decoding logic in the AsciiBytes `hashCode` and `matches` to correctly deal with multi-byte encodings. Fixes spring-projectsgh-12504
1 parent 98a2a91 commit 9a64d3b

File tree

2 files changed

+34
-18
lines changed
  • spring-boot-project/spring-boot-tools/spring-boot-loader/src

2 files changed

+34
-18
lines changed

spring-boot-project/spring-boot-tools/spring-boot-loader/src/main/java/org/springframework/boot/loader/jar/AsciiBytes.java

+21-17
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,9 @@ final class AsciiBytes {
2929

3030
private static final String EMPTY_STRING = "";
3131

32-
private static final int[] EXCESS = { 0x0, 0x1080, 0x96, 0x1c82080 };
32+
private static final int[] INITIAL_BYTE_BITMASK = { 0x7F, 0x1F, 0x0F, 0x07 };
33+
34+
private static final int SUBSEQUENT_BYTE_BITMASK = 0x3F;
3335

3436
private final byte[] bytes;
3537

@@ -142,13 +144,10 @@ public boolean matches(CharSequence name, char suffix) {
142144
int totalLen = (nameLen + (suffix == 0 ? 0 : 1));
143145
for (int i = this.offset; i < this.offset + this.length; i++) {
144146
int b = this.bytes[i];
145-
if (b < 0) {
146-
b = b & 0x7F;
147-
int limit = getRemainingUtfBytes(b);
148-
for (int j = 0; j < limit; j++) {
149-
b = (b << 6) + (this.bytes[++i] & 0xFF);
150-
}
151-
b -= EXCESS[limit];
147+
int remainingUtfBytes = getNumberOfUtfBytes(b) - 1;
148+
b &= INITIAL_BYTE_BITMASK[remainingUtfBytes];
149+
for (int j = 0; j < remainingUtfBytes; j++) {
150+
b = (b << 6) + (this.bytes[++i] & SUBSEQUENT_BYTE_BITMASK);
152151
}
153152
char c = getChar(name, suffix, charIndex++);
154153
if (b <= 0xFFFF) {
@@ -185,13 +184,10 @@ public int hashCode() {
185184
if (hash == 0 && this.bytes.length > 0) {
186185
for (int i = this.offset; i < this.offset + this.length; i++) {
187186
int b = this.bytes[i];
188-
if (b < 0) {
189-
b = b & 0x7F;
190-
int limit = getRemainingUtfBytes(b);
191-
for (int j = 0; j < limit; j++) {
192-
b = (b << 6) + (this.bytes[++i] & 0xFF);
193-
}
194-
b -= EXCESS[limit];
187+
int remainingUtfBytes = getNumberOfUtfBytes(b) - 1;
188+
b &= INITIAL_BYTE_BITMASK[remainingUtfBytes];
189+
for (int j = 0; j < remainingUtfBytes; j++) {
190+
b = (b << 6) + (this.bytes[++i] & SUBSEQUENT_BYTE_BITMASK);
195191
}
196192
if (b <= 0xFFFF) {
197193
hash = 31 * hash + b;
@@ -206,8 +202,16 @@ public int hashCode() {
206202
return hash;
207203
}
208204

209-
private int getRemainingUtfBytes(int b) {
210-
return (b < 96 ? 1 : (b < 112 ? 2 : 3));
205+
private int getNumberOfUtfBytes(int b) {
206+
if ((b & 0x80) == 0) {
207+
return 1;
208+
}
209+
int numberOfUtfBytes = 0;
210+
while ((b & 0x80) != 0) {
211+
b <<= 1;
212+
numberOfUtfBytes++;
213+
}
214+
return numberOfUtfBytes;
211215
}
212216

213217
@Override

spring-boot-project/spring-boot-tools/spring-boot-loader/src/test/java/org/springframework/boot/loader/jar/AsciiBytesTests.java

+13-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright 2012-2017 the original author or authors.
2+
* Copyright 2012-2018 the original author or authors.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -184,6 +184,18 @@ public void matchesSameAsStringWithEmoji() {
184184
matchesSameAsString("\ud83d\udca9");
185185
}
186186

187+
@Test
188+
public void hashCodeFromInstanceMatchesHashCodeFromString() {
189+
String name = "fonts/宋体/simsun.ttf";
190+
assertThat(new AsciiBytes(name).hashCode()).isEqualTo(AsciiBytes.hashCode(name));
191+
}
192+
193+
@Test
194+
public void instanceCreatedFromCharSequenceMatchesSameCharSequence() {
195+
String name = "fonts/宋体/simsun.ttf";
196+
assertThat(new AsciiBytes(name).matches(name, NO_SUFFIX)).isTrue();
197+
}
198+
187199
private void matchesSameAsString(String input) {
188200
assertThat(new AsciiBytes(input).matches(input, NO_SUFFIX)).isTrue();
189201
}

0 commit comments

Comments
 (0)