1: 2: 3: 4: 5: 6: 7: 8: 9: 10: 11: 12: 13: 14: 15: 16: 17: 18: 19: 20: 21: 22: 23: 24: 25: 26: 27: 28: 29: 30: 31: 32: 33: 34: 35: 36: 37: 38: 39: 40: 41: 42: 43: 44: 45: 46: 47: 48: 49: 50: 51: 52: 53: 54: 55: 56: 57: 58: 59: 60: 61: 62: 63: 64: 65: 66: 67: 68: 69: 70: 71: 72: 73: 74: 75: 76: 77: 78: 79: 80: 81: 82: 83: 84: 85: 86: 87: 88: 89: 90: 91: 92: 93: 94: 95: 96: 97: 98: 99: 100: 101: 102: 103: 104: 105: 106: 107: 108: 109: 110: 111: 112: 113: 114: 115: 116: 117: 118: 119: 120: 121: 122: 123: 124: 125:
| type TScribaFileFormat = (ffPC, ffUnix, ffMac, ffUnknown); TScribaCharacterSet = (csAnsi, csAscii, csUnicodeLE, csUnicodeBE, csUTF8, csUnknown);
uses cUnicodeReader, cUnicodeCodecs;
function GetFileContentsAsString(uFilename: String; var uFileFormat: TScribaFileFormat; var uCharSet: TScribaCharacterSet): String;
function DetectUnicode(var uSize: Integer; var uBigEndian: Boolean): Boolean; var tmp: TFileStream; signature: array[0..1] of Byte; begin tmp := TFileStream.Create(uFilename, fmOpenRead); if tmp.Size < 2 then begin Result := False; tmp.Free; Exit; end; tmp.ReadBuffer(signature, SizeOf(signature)); uBigEndian := (signature[0] = $FE) and (signature[1] = $FF); Result := ((signature[0] = $FF) and (signature[1] = $FE)) or uBigEndian; uBigEndian := Result and uBigEndian; uSize := tmp.Size; tmp.Free; end;
function DetectUTF8(uText: String): Boolean; begin Result := (Length(uText) > 2) and (Ord(uText[1]) = $EF) and (Ord(uText[2]) = $BB) and (Ord(uText[3]) = $BF); end;
function DetectAnsi(uText: String): Boolean; begin Result := (Pos('ä', uText) > 0) or (Pos('ö', uText) > 0) or (Pos('ü', uText) > 0); end;
function DetectAscii(uText: String): Boolean; begin Result := (Pos('„', uText) > 0) or (Pos('”', uText) > 0) or (Pos('', uText) > 0) and not ((Pos('ä', uText) > 0) or (Pos('ö', uText) > 0) or (Pos('ü', uText) > 0)); end;
function DetectFileFormat(uText: String): TScribaFileFormat; begin if Pos(#13#10, uText) > 0 then Result := ffPC else if Pos(#10, uText) > 0 then Result := ffUnix else if Pos(#13, uText) > 0 then Result := ffMac else Result := ffPC; end;
var tmp: TUnicodeMemoryReader; Size: Integer; BigEndian: Boolean; tmpFile: TFileStream; tmpMemStream: TMemoryStream; i: Integer; a, b: Byte; begin if DetectUnicode(Size, BigEndian) then begin tmpFile := TFileStream.Create(uFilename, fmOpenRead); tmpMemStream := TMemoryStream.Create; if BigEndian then begin for i := 0 to tmpFile.Size div 2 - 1 do begin tmpFile.ReadBuffer(a, SizeOf(a)); tmpFile.ReadBuffer(b, SizeOf(b)); tmpMemStream.WriteBuffer(b, SizeOf(b)); tmpMemStream.WriteBuffer(a, SizeOf(a)); end; uCharSet := csUnicodeBE; end else begin tmpMemStream.CopyFrom(tmpFile, tmpFile.Size); uCharSet := csUnicodeLE; end; tmpFile.Free;
tmp := TUnicodeMemoryReader.Create(tmpMemStream.Memory, tmpMemStream.Size); tmp.Skip(1); Result := tmp.ReadUTF8Str(Size - 1); tmp.Free; tmpMemStream.Free; end else begin uCharSet := csAnsi; with TStringList.Create do begin LoadFromFile(uFilename); Result := Text; end; if DetectUTF8(Result) then begin Delete(Result, 1, 3); uCharSet := csUTF8; end else if DetectAscii(Result) then begin OEMToChar(PChar(Result), PChar(Result)); uCharSet := csAscii; end; end; uFileFormat := DetectFileFormat(Result); end; |