You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

231 regels
6.6 KiB

  1. unit utsUtils;
  2. {$IFDEF FPC}
  3. {$mode delphi}{$H+}
  4. {$ENDIF}
  5. interface
  6. uses
  7. Classes, SysUtils, utsTypes;
  8. function tsStrAlloc(aSize: Cardinal): PWideChar;
  9. function tsStrNew(const aText: PWideChar): PWideChar;
  10. procedure tsStrDispose(const aText: PWideChar);
  11. function tsStrLength(aText: PWideChar): Cardinal;
  12. function tsStrCopy(aDst, aSrc: PWideChar): PWideChar;
  13. function tsAnsiToWide(aDst: PWideChar; const aSize: Integer; aSrc: PAnsiChar; const aCodePage: TtsCodePage; const aDefaultChar: WideChar): Integer;
  14. function tsISO_8859_1ToWide(aDst: PWideChar; const aSize: Integer; aSrc: PAnsiChar): Integer;
  15. function tsUTF8ToWide(aDst: PWideChar; const aSize: Integer; const aSrc: PAnsiChar; const aDefaultChar: WideChar): Integer;
  16. function tsUTFBE16ToWide(aDst: PWideChar; const aDstSize: Integer; aSrc: PByte; aSrcSize: Integer; const aDefaultChar: WideChar): Integer;
  17. function tsAnsiSBCDToWide(aDst: PWideChar; const aSize: Integer; aSrc: PAnsiChar; const aCodePage: TtsCodePage; const aDefaultChar: WideChar): Integer;
  18. implementation
  19. uses
  20. utsCodePages;
  21. ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
  22. function tsStrAlloc(aSize: Cardinal): PWideChar;
  23. begin
  24. aSize := (aSize + 1) shl 1;
  25. GetMem(result, aSize);
  26. FillChar(result^, aSize, 0);
  27. end;
  28. ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
  29. function tsStrNew(const aText: PWideChar): PWideChar;
  30. begin
  31. result := tsStrAlloc(tsStrLength(aText));
  32. tsStrCopy(result, aText);
  33. end;
  34. ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
  35. procedure tsStrDispose(const aText: PWideChar);
  36. begin
  37. FreeMem(aText);
  38. end;
  39. ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
  40. function tsStrLength(aText: PWideChar): Cardinal;
  41. begin
  42. result := 0;
  43. if Assigned(aText) then
  44. while (ord(aText^) <> 0) do begin
  45. inc(result);
  46. inc(aText);
  47. end;
  48. end;
  49. ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
  50. function tsStrCopy(aDst, aSrc: PWideChar): PWideChar;
  51. begin
  52. result := aDst;
  53. if Assigned(aDst) and Assigned(aSrc) then
  54. while ord(aSrc^) <> 0 do begin
  55. aDst^ := aSrc^;
  56. inc(aDst);
  57. inc(aSrc);
  58. end;
  59. end;
  60. ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
  61. function tsAnsiToWide(aDst: PWideChar; const aSize: Integer; aSrc: PAnsiChar;
  62. const aCodePage: TtsCodePage; const aDefaultChar: WideChar): Integer;
  63. begin
  64. case aCodePage of
  65. tsUTF8:
  66. result := tsUTF8ToWide(aDst, aSize, aSrc, aDefaultChar);
  67. tsISO_8859_1:
  68. result := tsISO_8859_1ToWide(aDst, aSize, aSrc);
  69. else
  70. result := tsAnsiSBCDToWide(aDst, aSize, aSrc, aCodePage, aDefaultChar);
  71. end;
  72. end;
  73. ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
  74. function tsISO_8859_1ToWide(aDst: PWideChar; const aSize: Integer; aSrc: PAnsiChar): Integer;
  75. begin
  76. result := 0;
  77. if Assigned(aDst) and Assigned(aSrc) then
  78. while (ord(aSrc^) <> 0) and (result < aSize) do begin
  79. aDst^ := WideChar(aSrc^);
  80. inc(aDst);
  81. inc(aSrc);
  82. inc(result);
  83. end;
  84. end;
  85. ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
  86. function tsUTF8ToWide(aDst: PWideChar; const aSize: Integer; const aSrc: PAnsiChar; const aDefaultChar: WideChar): Integer;
  87. procedure AddToDest(aCharCode: UInt64);
  88. begin
  89. if (aCharCode > $FFFF) then
  90. aCharCode := ord(aDefaultChar);
  91. PWord(aDst)^ := aCharCode;
  92. inc(aDst);
  93. result := result + 1;
  94. end;
  95. const
  96. STATE_STARTBYTE = 0;
  97. STATE_FOLLOWBYTE = 1;
  98. var
  99. cc: UInt64;
  100. len, state, c: Integer;
  101. p: PByte;
  102. tmp: Byte;
  103. begin
  104. result := 0;
  105. if not Assigned(aDst) or not Assigned(aSrc) or (aSize <= 0) then
  106. exit;
  107. c := 0;
  108. cc := 0;
  109. p := PByte(aSrc);
  110. len := Length(aSrc);
  111. state := STATE_STARTBYTE;
  112. while (len > 0) do begin
  113. case state of
  114. STATE_STARTBYTE: begin
  115. if (p^ and $80 = 0) then begin
  116. AddToDest(p^);
  117. end else if (p^ and $40 > 0) then begin
  118. tmp := p^;
  119. c := 0;
  120. while (tmp and $80) > 0 do begin
  121. inc(c);
  122. tmp := tmp shl 1;
  123. end;
  124. cc := p^ and ((1 shl (7 - c)) - 1);
  125. state := STATE_FOLLOWBYTE;
  126. c := c - 1;
  127. end;
  128. end;
  129. STATE_FOLLOWBYTE: begin
  130. if ((p^ and $C0) = $80) then begin
  131. cc := (cc shl 6) or (p^ and $3F);
  132. c := c - 1;
  133. if (c = 0) then begin
  134. AddToDest(cc);
  135. state := STATE_STARTBYTE;
  136. end;
  137. end else
  138. state := STATE_STARTBYTE;
  139. end;
  140. end;
  141. if (result >= aSize) then
  142. exit;
  143. inc(p);
  144. end;
  145. end;
  146. ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
  147. function tsUTFBE16ToWide(aDst: PWideChar; const aDstSize: Integer; aSrc: PByte; aSrcSize: Integer;
  148. const aDefaultChar: WideChar): Integer;
  149. var
  150. tmp: Word;
  151. procedure AddToDest(aCharCode: Word);
  152. begin
  153. if ((aCharCode and $D800) = $D800) or
  154. ((aCharCode and $DC00) = $DC00) then
  155. aCharCode := Ord(aDefaultChar);
  156. aDst^ := WideChar(aCharCode);
  157. inc(aDst, 1);
  158. result := result + 1;
  159. end;
  160. begin
  161. result := 0;
  162. while (aSrcSize > 1) and (aDstSize > 0) do begin
  163. {$IFDEF FPC}
  164. tmp := (aSrc^ shl 8) or (aSrc + 1)^;
  165. {$ELSE}
  166. tmp := (PByteArray(aSrc)[0] shl 8) or PByteArray(aSrc)[1];
  167. {$ENDIF}
  168. inc(aSrc, 2);
  169. dec(aSrcSize, 2);
  170. AddToDest(tmp);
  171. end;
  172. end;
  173. ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
  174. function tsAnsiSBCDToWide(aDst: PWideChar; const aSize: Integer; aSrc: PAnsiChar;
  175. const aCodePage: TtsCodePage; const aDefaultChar: WideChar): Integer;
  176. var
  177. tmp: WideChar;
  178. cp: PtsCodePageValues;
  179. begin
  180. result := 0;
  181. cp := ANSI_TO_WIDE_CODE_PAGE_LUT[aCodePage];
  182. if not Assigned(aDst) or
  183. not Assigned(aSrc) or
  184. not Assigned(cp) or
  185. (aSize < 0) then exit;
  186. while (Ord(aSrc^) <> 0) and (result < aSize) do begin
  187. tmp := WideChar(cp^[aSrc^]);
  188. if (ord(tmp) = 0) then begin
  189. if (ord(aDefaultChar) <> 0) then begin
  190. aDst^ := aDefaultChar;
  191. inc(aDst);
  192. end;
  193. end else begin
  194. aDst^ := tmp;
  195. inc(aDst);
  196. end;
  197. inc(aSrc);
  198. end;
  199. end;
  200. end.