Skip to content

Commit df77b31

Browse files
committed
test maappings
1 parent cc696d3 commit df77b31

4 files changed

Lines changed: 40 additions & 10 deletions

File tree

core/src/main/java/org/jruby/RubyString.java

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -282,6 +282,9 @@ final boolean singleByteOptimizable() {
282282
final boolean singleByteOptimizable(Encoding enc) {
283283
return StringSupport.isSingleByteOptimizable(this, enc);
284284
}
285+
final boolean singleByteOptimizable(int flags, Encoding enc) {
286+
return StringSupport.isSingleByteOptimizable(flags, this, enc);
287+
}
285288

286289
@SuppressWarnings("ReferenceEquality")
287290
final Encoding isCompatibleWith(EncodingCapable other) {
@@ -1926,8 +1929,7 @@ public IRubyObject upcase_bang(ThreadContext context, IRubyObject arg0, IRubyObj
19261929
private IRubyObject upcase_bang(ThreadContext context, int flags) {
19271930
modifyAndKeepCodeRange();
19281931
Encoding enc = checkDummyEncoding();
1929-
if (((flags & Config.CASE_ASCII_ONLY) != 0 && (enc.isUTF8() || enc.maxLength() == 1)) ||
1930-
(flags & Config.CASE_FOLD_TURKISH_AZERI) == 0 && getCodeRange() == CR_7BIT) {
1932+
if (singleByteOptimizable(flags, enc)) {
19311933
int s = value.getBegin();
19321934
int end = s + value.getRealSize();
19331935
byte[]bytes = value.getUnsafeBytes();
@@ -2000,8 +2002,7 @@ public IRubyObject downcase_bang(ThreadContext context, IRubyObject arg0, IRubyO
20002002
private IRubyObject downcase_bang(ThreadContext context, int flags) {
20012003
modifyAndKeepCodeRange();
20022004
Encoding enc = checkDummyEncoding();
2003-
if (((flags & Config.CASE_ASCII_ONLY) != 0 && (enc.isUTF8() || enc.maxLength() == 1)) ||
2004-
(flags & Config.CASE_FOLD_TURKISH_AZERI) == 0 && getCodeRange() == CR_7BIT) {
2005+
if (singleByteOptimizable(flags, enc)) {
20052006
int s = value.getBegin();
20062007
int end = s + value.getRealSize();
20072008
byte[]bytes = value.getUnsafeBytes();

core/src/main/java/org/jruby/util/StringSupport.java

Lines changed: 35 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@
5353
import org.jruby.runtime.Helpers;
5454
import org.jruby.runtime.ThreadContext;
5555
import org.jruby.runtime.builtin.IRubyObject;
56+
import org.jruby.util.StringSupport.TrTables;
5657
import org.jruby.util.collections.IntHashMap;
5758
import org.jruby.util.io.EncodingUtils;
5859
import sun.misc.Unsafe;
@@ -1700,7 +1701,30 @@ private static NeighborChar predChar(Encoding enc, byte[]bytes, int p, int len)
17001701
}
17011702

17021703
public static boolean isSingleByteOptimizable(CodeRangeable string, Encoding encoding) {
1703-
return string.getCodeRange() == CR_7BIT || encoding.maxLength() == 1;
1704+
if (encoding instanceof ASCIIEncoding || encoding instanceof USASCIIEncoding) {
1705+
return true;
1706+
}
1707+
else if (encoding.isUTF8()) {
1708+
// For UTF-8 it's worth scanning the string coderange when unknown.
1709+
return string.scanForCodeRange() == CR_7BIT;
1710+
}
1711+
/* Conservative. It may be ENC_CODERANGE_UNKNOWN. */
1712+
if (string.getCodeRange() == CR_7BIT) {
1713+
return true;
1714+
}
1715+
1716+
if (encoding.maxLength() == 1) {
1717+
return true;
1718+
}
1719+
1720+
/* Conservative. Possibly single byte.
1721+
* "\xa1" in Shift_JIS for example. */
1722+
return false;
1723+
}
1724+
1725+
public static boolean isSingleByteOptimizable(int flags, CodeRangeable string, Encoding encoding) {
1726+
return (((flags & Config.CASE_ASCII_ONLY) != 0 && (encoding.isUTF8() || encoding.maxLength() == 1)) ||
1727+
!((flags & Config.CASE_FOLD_TURKISH_AZERI) == 0 && string.getCodeRange() == CR_7BIT));
17041728
}
17051729

17061730
/**
@@ -1852,7 +1876,16 @@ public static boolean isAsciiOnly(CodeRangeable string) {
18521876
}
18531877

18541878
private static boolean isAsciiOnly(Encoding encoding, final int codeRange) {
1855-
return encoding.isAsciiCompatible() && codeRange == CR_7BIT;
1879+
switch(codeRange) {
1880+
case CR_UNKNOWN:
1881+
return codeRange == CR_7BIT && encoding.isAsciiCompatible();
1882+
case CR_7BIT:
1883+
return true;
1884+
default:
1885+
return false;
1886+
}
1887+
1888+
// return encoding.isAsciiCompatible() && codeRange == CR_7BIT;
18561889
}
18571890

18581891
/**

test/mri/excludes/TestCaseMapping.rb

Lines changed: 0 additions & 2 deletions
This file was deleted.

test/mri/excludes/TestCaseMappingPreliminary.rb

Lines changed: 0 additions & 2 deletions
This file was deleted.

0 commit comments

Comments
 (0)