Extension { #name : 'Utf8' }

{ #category : 'Indexing Support' }
Utf8 class >> _canCreateQueryOnInstances [
  "GsQuery may be created on most subclasses of Collection. Answer false if a GsQuery is not appropriate
   for the receiver."

  ^ false

]

{ #category : 'Private' }
Utf8 class >> _detachedEvaluateString: aUtf8 [
  "Used by GsTsExternalSession detached execution" 
  | src proc |
  proc := GsProcess current .
  System gemConfigurationAt:#GemExceptionSignalCapturesStack put: true .
  src := Unicode16 usingUnicodeCompares ifTrue:[ aUtf8 decodeToUnicode ] ifFalse:[ aUtf8 decodeToString ].
  [ 
    src evaluate
  ] on: Error, ControlInterrupt, TestFailure  do:[:ex | | stk stkDone |
    GsFile gciLogServer: ('ERROR ', ex asString) lf  .
    GsFile gciLogServer: ex stackReport .
    GsFile gciLogServer: '===========' .
    [ 
      stk := proc stackReportToLevel: 1000 withArgsAndTemps: true andMethods: true  includeSource: false .
      stkDone := true .
    ] fork .
    [ stkDone == nil ] whileTrue:[ Delay waitForMilliseconds:10 ].
    GsFile gciLogServer: stk .
    GsFile gciLogServer: '===========' .
    System removeGemLogOnExit: false .
    ex return
  ]. 
  ^ nil "no client to return anything to."
]


{ #category : 'Converting' }
Utf8 class >> fromString: aString [

^ aString encodeAsUTF8

]

{ #category : 'Storing and Loading' }
Utf8 class >> loadFrom: passiveObj [
  | ba |
  ba := ByteArray new: passiveObj readSize  . 
  ba loadFrom: passiveObj .
  ^ self withBytes: ba .
]

{ #category : 'Instance Creation' }
Utf8 class >> withAll: aStringOrUtf [
  ^ aStringOrUtf encodeAsUTF8

]

{ #category : 'Private' }
Utf8 class >> withBytes: aCollection [

"Returns an instance of the receiver with the byte values of the argument.
 Private, for use in testing."

<primitive: 1021>
((aCollection isKindOf: String) or:[ aCollection isKindOf: ByteArray]) ifFalse:[
  | res idx |
  res := self new .
  idx := 1 .
  aCollection do:[:elem |
     res unsigned8At: idx put: elem .
     idx := idx + 1 .
  ].
  ^ res
].
^ self _primitiveFailed: #withBytes: args: { aCollection }

]

{ #category : 'Comparing' }
Utf8 >> _asUnicode16 [
^ self decodeToUnicode _asUnicode16

]

{ #category : 'Comparing' }
Utf8 >> _asUnicode7 [
^ self decodeToUnicode _asUnicode7

]

{ #category : 'Comparing' }
Utf8 >> _basicAt: index put: char [
"Disallowed , build a complete Unicode7, Unicode16 or Unicode32
and then convert with encodeAsUTF8"
^ self shouldNotImplement: #at:put:

]

{ #category : 'Converting' }
Utf8 >> _coerceToUnicode [

^ self _decodeFromUtf8: true maxSize: nil

]

{ #category : 'Encoding' }
Utf8 >> _decodeFromUTF16toUTF8: bigEndianBoolean [
 "Disallowed, a Utf8 contains UTF8 encoded data, not UTF16 . "
 ^ self shouldNotImplement: #_decodeFromUTF16toUTF8:
]

{ #category : 'Comparing' }
Utf8 >> _deleteNoShrinkFrom: startIndex to: endIndex anchorTailSize: aSize [
"Disallowed"
^ self shouldNotImplement: #_deleteNoShrinkFrom:to:anchorTailSize:

]

{ #category : 'Comparing' }
Utf8 >> _equals: argString collator: anIcuCollator useMinSize: aFalse [

"Returns true if argString compares equals to the receiver using anIcuCollator
 and if argString is not a Symbol, otherwise returns false "
<primitive: 988>
anIcuCollator ifNil:[ (System __sessionStateAt: 20) ifNil:[
  ^ self _equals: argString collator: IcuCollator default useMinSize: aFalse ]
] ifNotNil:[  anIcuCollator _validateClass: IcuCollator ].
(argString isKindOf: CharacterCollection) ifTrue:[ ^ argString = self ].
^ false

]

{ #category : 'Comparing' }
Utf8 >> _int32LittleEndianAt: startIndex put: anInteger [

"Disallowed"
^ self shouldNotImplement: #_int32LittleEndianAt:put:

]

{ #category : 'Comparing' }
Utf8 >> _primAddRandomBytes: anIntHowMany startingAt: anIntOffset [

"Disallowed"
^ self shouldNotImplement: #_primAddRandomBytes:startingAt:

]

{ #category : 'Comparing' }
Utf8 >> _reverseDeleteNoShrinkFrom: startIndex to: endIndex anchorHeadSize: aSize [
"Disallowed"
^ self shouldNotImplement: #_reverseDeleteNoShrinkFrom:to:anchorHeadSize:

]

{ #category : 'Comparing' }
Utf8 >> < argString [
  "Compares receiver to argument using  IcuCollator default."
  ^ (self compareTo: argString collator: nil useMinSize: false) ==  -1

]

{ #category : 'Comparing' }
Utf8 >> <= argString [
  ^ (self > argString) not

]

{ #category : 'Comparing' }
Utf8 >> = argString [
  "Compares receiver to argument using  IcuCollator default."

  ^ self _equals: argString collator: nil useMinSize: false

]

{ #category : 'Comparing' }
Utf8 >> > argString [
  "Compares receiver to argument using  IcuCollator default."
  ^ (self compareTo: argString collator: nil useMinSize: false) ==  1

]

{ #category : 'Comparing' }
Utf8 >> >= argString [
  ^ (self < argString ) not

]

{ #category : 'Encoding' }
Utf8 >> addAllUtf8: aCharacterOrString [
  "appends the UTF8 encoding of the argument to the receiver."
  <primitive: 1045>
  aCharacterOrString _validateKindOfClasses: { Character . String } .
  ^ self _primitiveFailed: #nextPutAllUtf8: args: { aCharacterOrString }

]

{ #category : 'Converting' }
Utf8 >> asByteArray [
  ^ ByteArray withAll: self .

]

{ #category : 'Filesystem-Gemstone-Kernel' }
Utf8 >> asString [
  "include in bootstrap in case upgrade stops after bootstrap"
  "override the *filesystem  ByteArray >> asString"
  ^ self decodeToString   "or maybe  decodeToUnicode ??"

]

{ #category : 'Converting' }
Utf8 >> asUnicodeString [

"This will eventually be Deprecated.
 New code should use decodeToUnicode.
 Decode receiver from UTF8 format.
 Returns a Unicode7 , Unicode16 or Unicode32 ,
 using the minimum character size needed to represent decoded result."

^ self _decodeFromUtf8: true maxSize: nil

]

{ #category : 'Comparing' }
Utf8 >> at: index put: char [
"Disallowed , build a complete Unicode7, Unicode16 or Unicode32
and then convert with encodeAsUTF8"
^ self shouldNotImplement: #at:put:

]

{ #category : 'Comparing' }
Utf8 >> at: index put: aString fromOffset: stringOffset sizeBytes: numBytes [
"Disallowed , build a complete Unicode7, Unicode16 or Unicode32
and then convert with encodeAsUTF8"
^ self shouldNotImplement: #at:put:fromOffset:sizeBytes:

]

{ #category : 'Comparing' }
Utf8 >> at: index put: aNumber signed: aBool width: aWidthInBytes [
"Disallowed"
^ self shouldNotImplement: #at:put:signed:width:

]

{ #category : 'Accessing' }
Utf8 >> at: index putChar: char [
"Disallowed , build a complete Unicode7, Unicode16 or Unicode32
and then convert with encodeAsUTF8"
^ self shouldNotImplement: #at:putChar:

]

{ #category : 'Accessing' }
Utf8 >> at: aSmallInt putOldOopValueOfObject: anObject [
"Disallowed"
^ self shouldNotImplement: #at:putOldOopValueOfObject:

]

{ #category : 'Accessing' }
Utf8 >> at: aSmallInt putOopValueOfObject: anObject [
"Disallowed"
^ self shouldNotImplement: #at:putOopValueOfObject:

]

{ #category : 'Accessing' }
Utf8 >> at: index signed: aBool width: aWidthInBytes [
"Disallowed"
^ self shouldNotImplement: #at:signed:width:

]

{ #category : 'Accessing' }
Utf8 >> at: index sizeBytes: anInt stringSize: anIntOrNil [
"Disallowed"
^ self shouldNotImplement: #at:sizeBytes:stringSize:

]

{ #category : 'Accessing' }
Utf8 >> atOrNil: anIndex [
"Disallowed "
^ self shouldNotImplement: #atOrNil:

]

{ #category : 'Converting' }
Utf8 >> bytesIntoUnicode [
 "Disallowed, use decodeToUnicode"
 ^ self shouldNotImplement: #bytesIntoUnicode

]

{ #category : 'Accessing' }
Utf8 >> charAt: index [

"Disallowed, convert with decodeFromUTF8 first"
^ self shouldNotImplement: #charAt:

]

{ #category : 'Comparing' }
Utf8 >> compareStringAt: startIndex to: aString startingAt: stringIndex sizeBytes: numSizeBytes useCase: aBool [
"Disallowed"
^ self shouldNotImplement: #compareStringAt:to:startingAt:sizeBytes:useCase:

]

{ #category : 'Comparing' }
Utf8 >> compareTo: aString collator: anIcuCollator [

"Returns -1, 0 or 1,  when receiver is less than,
 equal to, or greater than argString .
 argString must be a String, MultiByteString, or a Utf8 .
 anIcuCollator == nil is interpreted as   IcuCollator default ."

^ self compareTo: aString collator: anIcuCollator useMinSize: false

]

{ #category : 'Comparing' }
Utf8 >> compareTo: argString collator: anIcuCollator useMinSize: aMinSize [

"Returns -1, 0 or 1,  when receiver is less than,
 equal to, or greater than argString .
 argString must be a String, MultiByteString, or a Utf8.
 The implementation uses libicu comparision code which can operate
 on a UTF-8 encoded string directly.
 anIcuCollator == nil is interpreted as   IcuCollator default .

 If aMinSize == false, normal comparision is done.

 If aMinSize==true, compare stops at (self size min: argString size),
 which is Squeak semantics for comparison.

 If aMinSize is a SmallInteger >= 1, compare stops at
   aMinSize min: (self size min: argString size) ."
<primitive: 931>
anIcuCollator ifNil:[
  (System __sessionStateAt: 20) ifNil:[
    ^ self compareTo: argString collator: IcuCollator default useMinSize: aMinSize ]
] ifNotNil:[ anIcuCollator _validateClass: IcuCollator ].
aMinSize _validateClasses: { Boolean . SmallInteger }.
argString _validateClasses: { String . Utf8 }.
^ self _primitiveFailed: #compareTo:collator:useMinSize:
        args: { argString . anIcuCollator . aMinSize }

]

{ #category : 'Accessing' }
Utf8 >> copyFrom: startIndex to: stopIndex [

"Disallowed"
^ self shouldNotImplement: #copyFrom:to:

]

{ #category : 'Accessing' }
Utf8 >> dateTimeAt: startIndex put: aDateTime width: anInt [
"Disallowed"
^ self shouldNotImplement: #dateTimeAt:put:width:

]

{ #category : 'Accessing' }
Utf8 >> dateTimeAt: startIndex width: anInt [
"Disallowed"
^ self shouldNotImplement: #dateTimeAt:width:

]

{ #category : 'Converting' }
Utf8 >> decodeToString [
  "Decode the receiver returning an instance of String , DoubleByteString
   or QuadByteString"

  ^ self _decodeFromUtf8: false maxSize: nil

]

{ #category : 'Converting' }
Utf8 >> decodeToUnicode [
  "Decode the receiver returning an instance of Unicode7 , Unicode16 or Unicode32."

  ^ self _decodeFromUtf8: true maxSize: nil

]

{ #category : 'Accessing' }
Utf8 >> deleteIndexKeyAt: anIndex [
"Disallowed"
^ self shouldNotImplement: #deleteIndexKeyAt:

]

{ #category : 'Accessing' }
Utf8 >> doubleByteCharAt: index [
"Disallowed, convert with decodeFromUTF8 first"
^ self shouldNotImplement: #doubleByteCharAt:

]

{ #category : 'Converting' }
Utf8 >> encodeAsUTF16 [

^ self decodeToUnicode encodeAsUTF16

]

{ #category : 'Converting' }
Utf8 >> encodeAsUTF8 [
  "The receiver is already a Utf8."
  ^ self

]

{ #category : 'Accessing' }
Utf8 >> getObjectWithOldOopValueAt: anOffset [
"Disallowed"
^ self shouldNotImplement: #getObjectWithOldOopValueAt:

]

{ #category : 'Accessing' }
Utf8 >> getObjectWithOopValueAt: anOffset [
"Disallowed"
^ self shouldNotImplement: #getObjectWithOopValueAt:

]

{ #category : 'Hashing' }
Utf8 >> hash [
"inefficient, not intended for frequent use"
^ self decodeToUnicode hash

]

{ #category : 'Accessing' }
Utf8 >> insertAll: aByteArray at: anIndex [

anIndex == (self size + 1) ifTrue:[ ^ super insertAll: aByteArray at: anIndex].

"Insertion in the middle of a Utf8 is disallowed"
ArgumentError signal:'a Utf8 may only be created to by encoding, or appended to'

]

{ #category : 'Accessing' }
Utf8 >> int32LittleEndianAt: startIndex [

"Disallowed"
^ self shouldNotImplement: #int32LittleEndianAt

]

{ #category : 'Accessing' }
Utf8 >> quadByteCharAt: index [
"Disallowed, convert with decodeFromUTF8 first"
^ self shouldNotImplement: #quadByteCharAt:

]

{ #category : 'Streams' }
Utf8 >> readStream [
  "Disallowed, you need to decode the Utf8 and then create a stream."
  ^ self shouldNotImplement: #readStream
]

{ #category : 'Accessing' }
Utf8 >> removeFrom: startIndex to: stopIndex [

"Disallowed"
^ self shouldNotImplement: #removeFrom:to:

]

{ #category : 'Accessing' }
Utf8 >> replaceFrom: startIndex to: stopIndex with: aSeqCollection startingAt: repIndex [
"Disallowed"
^ self shouldNotImplement: #replaceFrom:to:with:startingAt:

]

{ #category : 'Accessing' }
Utf8 >> shortStringAt: anIndex compareWith: aByteObject startingAt: stringOffset opCode: anOpCode [
"Disallowed"
^ self shouldNotImplement: #shortStringAt:compareWith:startingAt:opCode

]

{ #category : 'Converting' }
Utf8 >> sizeForEncodeAsUTF8 [

^ self size .

]

{ #category : 'Accessing' }
Utf8 >> squeakBasicAt: anIndex put: aValue [
"Disallowed , build a complete Unicode7, Unicode16 or Unicode32
and then convert with encodeAsUTF8"
^ self shouldNotImplement: #squeakBasicAt:put:

]

{ #category : 'Private' }
Utf8 >> unsigned8At: index put: aValue [
  "for use in testing"
  ^ super at: index put: aValue signed: false width: 1

]
