Extension { #name : 'Unicode7' }

{ #category : 'Indexing Support' }
Unicode7 class >> _idxBasicCanCompareWithCharacterCollectionInstance: aCharacterCollection [
  "Returns true if <aCharacterCollection> may be inserted into a basic BtreeNode
   whose #lastElementClass is the receiver (see RangeEqualityIndex
   class>>isBasicClass:)."

  "If using Unicode compares, then *String and Unicode* instances may be compared.
   If not using Unicode compares then *String and Unicode* instances may not be compared."

  ^ Unicode16 usingUnicodeCompares

]

{ #category : 'Indexing Support' }
Unicode7 class >> _idxBasicCanCompareWithUnicodeInstance: aUnicodeString [
  "Returns true if <aUnicodeString> may be inserted into a basic BtreeNode whose
   #lastElementClass is the receiver (see RangeEqualityIndex class>>isBasicClass:)."

  "If using Unicode compares, then *String and Unicode* instances may be compared.
   If not using Unicode compares then *String and Unicode* instances may not be compared."

  ^ true

]

{ #category : 'Private' }
Unicode7 class >> _withAll: aString [

"Returns an instance of Unicode7 if all codePoints of aString are in the range 0..127.
 Otherwise returns nil.  If aString is a Utf8, returns nil."

  <primitive: 941>
  ^ nil

]

{ #category : 'Instance Creation' }
Unicode7 class >> withAll: aString [

"Returns an instance of Unicode7, Unicode16 or Unicode32 using the
 minimum bytes per character required to represent the argument."

<primitive: 941>
aString stringCharSize >= 1 ifTrue:[
  ^ Unicode16 withAll: aString
].
(aString isKindOfClass: Utf8) ifTrue:[ ^ aString decodeToUnicode ].
aString _validateClasses: { String . Utf8 } .
^ self _primitiveFailed: #withAll: args: { aString }

]

{ #category : 'Converting' }
Unicode7 >> _asUnicode7 [
  ^ self

]

{ #category : 'Adding' }
Unicode7 >> _basicAt: index put: char [
"Disallowed , use codePointAt:put:"
^ self shouldNotImplement: #_basicAt:put:

]

{ #category : 'Indexing Support' }
Unicode7 >> _equals: argString collator: anIcuCollator useMinSize: aFalse [

"Returns true if argString compares equals to the receiver using anIcuCollator
 and if argString is not a Symbol, otherwise returns false "
<primitive: 989>
anIcuCollator ifNil:[ (System __sessionStateAt: 20) ifNil:[
  ^ self _equals: argString collator: IcuCollator default useMinSize: aFalse ]
] ifNotNil:[  anIcuCollator _validateClass: IcuCollator ].
(argString isKindOf: CharacterCollection) ifTrue:[ ^ argString = self ].
^ false

]

{ #category : 'Private' }
Unicode7 >> _findString: subString startingAt: startIndex ignoreCase: aBoolean [

aBoolean ifFalse:[
  ^ super _findString: subString startingAt: startIndex ignoreCase: aBoolean
].
^ self _findStringNocase: subString startingAt: startIndex
	collator: IcuCollator default

]

{ #category : 'Indexing Support' }
Unicode7 >> _idxBasicCanCompareWithClass: aClass [
  "Returns true if the receiver may be inserted into a basic BtreeNode whose
   #lastElementClass is <aClass> (see RangeEqualityIndex class>>isBasicClass:)."

  "If using Unicode compares, then *String and Unicode* instances may be compared.
   If not using Unicode compares then *String and Unicode* instances may not be compared."

  ^ aClass _idxBasicCanCompareWithUnicodeInstance: self

]

{ #category : 'New Indexing Comparison - prims' }
Unicode7 >> _idxPrimCompareEqualTo: aCharCollection [
  "This comparison operation is used for the indexing subsystem to
 determine an ordering for insertion into indexing objects.

 This method collates letters AaBb..Zz."

  "The comparison should be compatible with the case-insensitive semantics
 of the String method with selector #= .
 Same primitive as String>>lessThan: "

  aCharCollection ifNil:[ ^ false ].
  aCharCollection isUnicodeString ifFalse: [ "not Unicode"
    aCharCollection _isSymbol ifTrue: [ ^false ].
    ArgumentError signal:'non-Unicode argument disallowed in Unicode comparison' ].
  ^self _idxUnicodeCompareEqualTo: aCharCollection

]

{ #category : 'New Indexing Comparison - prims' }
Unicode7 >> _idxPrimCompareGreaterThan: aCharCollection [

"This comparison operation is used for the indexing subsystem to
 determine an ordering for insertion into indexing objects and for
 doing indexing subsystem comparisons.

 This method collates the same as the > method, except that
 it returns true if the argument is nil."

  aCharCollection ifNil:[ ^ false ].
  aCharCollection isUnicodeString ifFalse: [ "not Unicode"
    ArgumentError signal:'non-Unicode argument disallowed in Unicode comparison' ].
  ^self _idxUnicodeCompareGreaterThan: aCharCollection

]

{ #category : 'New Indexing Comparison - prims' }
Unicode7 >> _idxPrimCompareLessThan: aCharCollection [

"This comparison operation is used for the indexing subsystem to
 determine an ordering for insertion into indexing objects and for
 doing indexing subsystem comparisons.

 This method collates the same as the < method, except that
 it returns false if the argument is nil."

  aCharCollection ifNil:[ ^ false ].
  aCharCollection isUnicodeString ifFalse: [ "not Unicode"
    ArgumentError signal:'non-Unicode argument disallowed in Unicode comparison' ].
  ^self _idxUnicodeCompareLessThan: aCharCollection

]

{ #category : 'New Indexing Comparison' }
Unicode7 >> _idxUnicodeCompareEqualTo: aCharCollection [

  ^ super _idxUnicodeCompareEqualTo: aCharCollection

]

{ #category : 'New Indexing Comparison' }
Unicode7 >> _idxUnicodeCompareGreaterThan: aCharCollection [

  ^ super _idxUnicodeCompareGreaterThan: aCharCollection

]

{ #category : 'New Indexing Comparison' }
Unicode7 >> _idxUnicodeCompareLessThan: aCharCollection [

  ^ super _idxUnicodeCompareLessThan: aCharCollection

]

{ #category : 'Private' }
Unicode7 >> _retryUnicodeAdd: aCharOrCharColl [
  | info cSize |
  aCharOrCharColl class == Character ifTrue:[ | av |
    (av := aCharOrCharColl codePoint) >= 16rD800 ifTrue:[
      av <= 16rDFFF ifTrue:[
       	OutOfRange signal:'codePoint 16r', av asHexString ,' is illegal for Unicode'.
      ].
      av > 16rFFFF ifTrue:[
	      ^ self _convertToQuadByte add: aCharOrCharColl
      ].
      ^ self _convertToDoubleByte add: aCharOrCharColl
    ].
    av > 16r7F ifTrue:[
      ^ self _convertToDoubleByte add: aCharOrCharColl
    ].
    ^ OutOfRange new name:'aChar codePoint' min: 0 max: 16r10FFFF
       actual: av ; signal
  ].
  info := aCharOrCharColl _stringCharSize .
  cSize := info bitAnd: 16r7 .
  (info bitAnd: 16r8) ~~ 0 ifTrue:[  "arg is a Unicode string"
    aCharOrCharColl _asUnicode7 ifNotNil:[:src7 |  self add: src7 . ^ aCharOrCharColl].
    cSize <= 2 ifTrue:[
      ^ self _convertToDoubleByte add: aCharOrCharColl
    ].
    ^ self _convertToQuadByte add: aCharOrCharColl .
  ].
  cSize ~~ 0 ifTrue:[
    self add: aCharOrCharColl asUnicodeString  .
    ^ aCharOrCharColl
  ].
  (aCharOrCharColl isKindOf: CharacterCollection) ifTrue:[
    ^ self add: aCharOrCharColl asString.
  ].
  aCharOrCharColl do: [:each | self add: each].
  ^ aCharOrCharColl

]

{ #category : 'Comparing' }
Unicode7 >> _unicodeEqual: argString [
  "Compares receiver to argument using  IcuCollator default."

^ self _equals: argString collator: nil useMinSize: false

]

{ #category : 'Comparing' }
Unicode7 >> _unicodeGreaterThan: argString [
  "Compares receiver to argument using  IcuCollator default."
  ^ (self compareTo: argString collator: nil useMinSize: false) > 0

]

{ #category : 'Comparing' }
Unicode7 >> _unicodeLessThan: argString [
  "Compares receiver to argument using  IcuCollator default."
  ^ (self compareTo: argString collator: nil useMinSize: false) < 0

]

{ #category : 'Adding' }
Unicode7 >> , aCharOrCharColl [

"Returns a new instance of the receiver's class that contains the elements of
 the receiver followed by the elements of aCharOrCharColl.  The argument
 must be a CharacterCollection or a Character."

<primitive: 920>
^ self copy addAll: aCharOrCharColl; yourself

]

{ #category : 'Comparing' }
Unicode7 >> < argString [
  "Compares receiver to argument using  IcuCollator default."
  | info |
  ((info := argString _stringCharSize) bitAnd: 16r8) == 0 ifTrue:[ "not Unicode"
    info ~~ 0 ifTrue:[
      ArgumentError signal:'non-Unicode argument disallowed in Unicode comparison'.
    ]
  ].
  ^ (self compareTo: argString collator: nil useMinSize: false) < 0

]

{ #category : 'Comparing' }
Unicode7 >> = argString [
  "Compares receiver to argument using  IcuCollator default."
  | info |
  ((info := argString _stringCharSize) bitAnd: 16r8) == 0 ifTrue:[ "not Unicode"
     argString _isSymbol ifTrue:[ ^ false ] .
     info == 0 ifTrue: [ ^ false ].
     ArgumentError signal:'non-Unicode argument disallowed in Unicode comparison'.
  ].
  ^ self _equals: argString collator: nil useMinSize: false

]

{ #category : 'Comparing' }
Unicode7 >> > argString [
  "Compares receiver to argument using  IcuCollator default."
  | info |
  ((info := argString _stringCharSize) bitAnd: 16r8) == 0 ifTrue:[ "not Unicode"
    info ~~ 0 ifTrue:[
      ArgumentError signal:'non-Unicode argument disallowed in Unicode comparison'.
    ]
  ].
  ^ (self compareTo: argString collator: nil useMinSize: false) > 0

]

{ #category : 'Adding' }
Unicode7 >> add: aCharOrCharColl [

<primitive: 918>
^ self _retryUnicodeAdd: aCharOrCharColl

]

{ #category : 'Adding' }
Unicode7 >> addAll: aCharOrCharColl [

<primitive: 918>
^ self _retryUnicodeAdd: aCharOrCharColl

]

{ #category : 'Adding' }
Unicode7 >> addCodePoint: aSmallInteger [

<primitive: 1049>
^ self add: (Character codePoint: aSmallInteger).

]

{ #category : 'Adding' }
Unicode7 >> addLast: aCharOrCharColl [

<primitive: 918>
^ self _retryUnicodeAdd: aCharOrCharColl

]

{ #category : 'Converting' }
Unicode7 >> asString [

"Returns a String representation of the receiver."

^ String withAll: self

]

{ #category : 'Comparing' }
Unicode7 >> at: offset equalsNoCase: aString [

"Returns true if aString is contained in the receiver, starting at
 offset.  Returns false otherwise.
 Comparison is done full case folding and code point order,
 using  icu:UnicodeString::caseCompare in libicu.
 aString must be a Unicode7, Unicode16 , or Unicode32"

<primitive: 930>
| info |
info := aString _stringCharSize .
(info bitAnd: 16r8) == 0 ifTrue:[  "arg is not a Unicode string"
   ^ self at: offset equalsNoCase: aString asUnicodeString
].
aString _validateClasses: { Unicode7 . Unicode16 . Unicode32 } .
offset _isSmallInteger ifFalse:[ offset _validateClass: SmallInteger ].
((offset <= 0) or: [offset > self size ])
  ifTrue: [ ^ self _errorIndexOutOfRange: offset].
^self _primitiveFailed: #at:equalsNoCase: args: { offset . aString }

]

{ #category : 'Adding' }
Unicode7 >> at: anIndex put: aChar [

"Stores aChar at the specified location."

<primitive: 919>
(aChar class == Character) ifTrue:[  | av |
  (av := aChar codePoint) >= 16rD800 ifTrue:[
    av <= 16rDFFF ifTrue:[
      OutOfRange signal:'codePoint 16r', av asHexString ,' is illegal for Unicode'.
    ].
    (av > 16rFFFF) ifTrue:[
      av > 16r10FFFF ifTrue:[
        OutOfRange new name:'aChar codePoint' min: 0 max: 16r10FFFF
                        actual: av ; signal
      ].
      ^ self _convertToQuadByte at: anIndex put: aChar
    ].
    ^ self _convertToDoubleByte at: anIndex put: aChar
  ].
  av > 16r7F ifTrue:[
    ^ self _convertToDoubleByte at: anIndex put: aChar
  ].
] ifFalse:[
 aChar _validateClass: Character .
].
(anIndex _isSmallInteger) ifTrue: [
  ((anIndex > (self size + 1)) or: [anIndex <= 0]) ifTrue: [
    ^ self _errorIndexOutOfRange: anIndex
  ]
] ifFalse: [
  ^ self _errorNonIntegerIndex: anIndex
] .
self _primitiveFailed: #at:put: args: { anIndex . aChar }

]

{ #category : 'Adding' }
Unicode7 >> codePointAt: anIndex put: anInt [

"Stores codePoint anInt at the specified location. Returns anInt.
 Class of receiver is changed to Unicode16 or Unicode32 if needed."

<primitive: 935>
(anInt class == SmallInteger) ifTrue:[
  anInt >= 16rD800 ifTrue:[
    anInt <= 16rDFFF ifTrue:[
      OutOfRange signal:'codePoint 16r', anInt asHexString ,' is illegal for Unicode'.
    ].
    anInt > 16rFFFF ifTrue:[
      anInt > 16r10FFFF ifTrue:[
        OutOfRange new name:'a codePoint' min: 0 max: 16r10FFFF
                        actual: anInt ; signal
      ].
      ^ self _convertToQuadByte codePointAt: anIndex put: anInt
    ].
    ^ self _convertToDoubleByte codePointAt: anIndex put: anInt
  ].
  anInt > 16r7F ifTrue:[
    ^ self _convertToDoubleByte codePointAt: anIndex put: anInt
  ].
].
(anIndex _isSmallInteger) ifTrue: [
  ((anIndex > (self size + 1)) or: [anIndex <= 0]) ifTrue: [
    ^ self _errorIndexOutOfRange: anIndex
  ]
] ifFalse: [
  ^ self _errorNonIntegerIndex: anIndex
] .
self _primitiveFailed: #at:put: args: { anIndex . anInt }

]

{ #category : 'Comparing' }
Unicode7 >> compareCase: aString [

"Returns -1, 0 or 1,  when receiver is less than,
 equal to, or greater than aString .
 Comparison is done full case folding and code point order,
 using  icu:UnicodeString::caseCompare in libicu.
 aString must be a Unicode7, Unicode16 , or Unicode32"

<primitive: 969>
| info |
info := aString _stringCharSize .
(info bitAnd: 16r8) == 0 ifTrue:[  "arg is not a Unicode string"
   ^ self compareCase: aString asUnicodeString
].
aString _validateClasses: { Unicode7 . Unicode16 . Unicode32 } .
^self _primitiveFailed: #compareCase: args: { aString }

]

{ #category : 'Indexing Support' }
Unicode7 >> compareTo: argString collator: anIcuCollator useMinSize: aMinSize [

"Returns -1, 0 or 1,  when receiver is less than,
 equal to, or greater than argString .
 argString must be a String, MultiByteString, or a Utf8 .
 anIcuCollator == nil is interpreted as   IcuCollator default .

 If aMinSize == false, normal comparision is done.

 If aMinSize==true, compare stops at (self size min: argString size),
 which is Squeak semantics for comparison.

 If aMinSize is a SmallInteger >= 1, compare stops at
   aMinSize min: (self size min: argString size) ."
<primitive: 917>
anIcuCollator ifNil:[
  (System __sessionStateAt: 20) ifNil:[
    ^ self compareTo: argString collator: IcuCollator default useMinSize: aMinSize ]
] ifNotNil:[ anIcuCollator _validateClass: IcuCollator ].
aMinSize _validateClasses: { Boolean . SmallInteger }.
argString _validateClasses: { String . Utf8 }.
^ self _primitiveFailed: #compareTo:collator:useMinSize:
        args: { argString . anIcuCollator . aMinSize }

]

{ #category : 'Converting' }
Unicode7 >> decodeFromUTF8 [
  "Receiver already 7 bit ascii"
  ^ self

]

{ #category : 'Comparing' }
Unicode7 >> equals: aString collatingTable: aTable [
  "disallowed"
  self shouldNotImplement: #equals:collatingTable:

]

{ #category : 'Comparing' }
Unicode7 >> equalsNoCase: aString [
  "Returns true if receiver and a String are equal using
  full case folding and code point order,
  using  icu:UnicodeString::caseCompare in libicu.
  Returns false otherwise.
  aString must be a Unicode7, Unicode16 , or Unicode32"

  ^ (self compareCase: aString) == 0

]

{ #category : 'Comparing' }
Unicode7 >> greaterThan: aString collatingTable: aTable [
  "disallowed"
  self shouldNotImplement: #greaterThan:collatingTable:

]

{ #category : 'Adding' }
Unicode7 >> insertAll: aCharOrCharColl at: offset [

<primitive: 921>
| aString cSize info |
offset _isSmallInteger ifFalse:[ offset _validateClass: SmallInteger ].
((offset <= 0) or: [offset > (self size + 1)])
  ifTrue: [ ^ self _errorIndexOutOfRange: offset].

info := aCharOrCharColl _stringCharSize .
cSize := info bitAnd: 16r7 .
(info bitAnd: 16r8) ~~ 0 ifTrue:[  "arg is a Unicode string"
  aCharOrCharColl _asUnicode7 ifNotNil:[ :src7 | ^ self insertAll: src7 at: offset].
  cSize == 2 ifTrue:[
     ^ self _convertToDoubleByte insertAll: aCharOrCharColl at: offset
  ].
  ^ self _convertToQuadByte insertAll: aCharOrCharColl at: offset
].
cSize ~~ 0 ifTrue:[
  ^ self insertAll: aCharOrCharColl asUnicodeString at: offset
].
aCharOrCharColl class == Character ifTrue:[
  (aString := Unicode7 new) add: aCharOrCharColl .
  ^ self insertAll: aString at: offset .
].
aCharOrCharColl _validateClasses: { Unicode7 . Unicode16 . Unicode32 } .
self _primitiveFailed: #insertAll:at: args: { aCharOrCharColl . offset }

]

{ #category : 'Comparing' }
Unicode7 >> isEquivalent: aString [
  "Returns true if receiver and a String are equal using
  full case folding and code point order,
  using  icu:UnicodeString::caseCompare in libicu.
  Returns false otherwise. "

  aString _stringCharSize == 0 ifTrue:[ ^ false "not a string"].
  ^ (self compareCase: aString) == 0

]

{ #category : 'Comparing' }
Unicode7 >> lessThan: aString collatingTable: aTable [
  "disallowed"
  self shouldNotImplement: #lessThan:collatingTable:

]

{ #category : 'Class Membership' }
Unicode7 >> speciesForPrint [

^ Unicode7

]

{ #category : 'Adding' }
Unicode7 >> squeakBasicAt: anIndex put: aValue [
"Disallowed , use codePointAt:put:"
^ self shouldNotImplement: #squeakBasicAt:put:

]
