Attribute VB_Name = "ReadFileContents" Option Explicit Option Private Module 'Patrick O’Beirne, Systems Modelling Ltd 'pob at sysmod.com 'http://www.sysmod.com 'http://ie.linkedin.com/in/patrickobeirne 'http://sysmod.wordpress.com '@ExcelAnalytics 'Open file for Binary raises err 52 for Unicode file or folder names 'Dir(), Filelen(), FileDateTime() cannot handle them either Function ReadBinaryText(ByVal FileName As String, ByVal lMaxChars As Long) As String Dim handle As Long, Text As String On Error GoTo OnError handle = FreeFile() 'If the file specified by pathname doesn't exist, it is created when a file is opened for Append, Binary, Output, or Random modes. If Len(Dir$(FileName)) > 0 Then Open FileName For Binary Access Read As #handle If lMaxChars = 0 Then lMaxChars = LOF(handle) ' 0 = all Text = Space(lMaxChars) ' enough spaces for entire file contents Get #handle, , Text Close #handle ReadBinaryText = Text End If ExitProc: Exit Function OnError: MsgBox "ReadBinaryText " & FileName & vbLf & " Error " & Err.Number & " " & Err.Description Resume ExitProc End Function 'The File System Object can read only ASCII or Unicode text files. 'You cannot use it to read or write UTF-8 encoded text files. ' returns 0=SUCCESS if it works, else err number Function ReadFSOStreamText(ByVal FileName As String, ByRef Text As String, Optional ByVal lBytes As Long = 0) As Long 'return err code. fread, FileRead, ReadFile Const forReading As Long = 1 ' iomode parameter 'Constants for optional format parameter: Const format As Long = -2 ' TristateUseDefault As Long = -2 'Opens the file by using the system default. 'Const TristateTrue As Long = -1 ' Opens the file as Unicode. 'Const TristateFalse As Long = 0 ' Opens the file as ASCII. Const create As Boolean = False ' whether a new file can be created if the specified filename doesn't exist. Text = vbNullString ' in case of err On Error Resume Next Dim fso As Object, fsoFile As Object, fsoStream As Object Set fso = CreateObject("Scripting.FileSystemObject") Set fsoFile = fso.GetFile(FileName) ' to get the filesize If Err = 0 Then If lBytes = 0 Then lBytes = fsoFile.Size 'FileSystemObject.OpenTextFile (filename, [ iomode, [ create, [ format ]]]) 'returns a TextStream object object.Read (characters) / object.ReadAll Set fsoStream = fso.OpenTextFile(FileName, forReading, create, format) Text = fsoStream.Read(lBytes) ' Read the contents IN and Close the file fsoStream.Close Else MsgBox "ReadFSOStreamText " & FileName & vbLf & "Error " & Err & " " & Err.Description End If ReadFSOStreamText = Err End Function ' ADO can handle Unicode file and folder names and UTF-8 content Function ReadADOStreamText(ByVal FileName As String, ByRef Text As String, _ Optional ByRef numchars As Long, Optional ByRef charset As String) As Long ' initialise default parameters If numchars = 0 Then numchars = -1 'numchars Optional. The number of characters to read from the file, _ or a StreamReadEnum value. Default is adReadAll=-1 If Len(charset) = 0 Then charset = "utf-8" With CreateObject("ADODB.Stream") .Type = 2 'adTypeText = 2 Specify stream type - text/string data. .charset = charset .Open On Error Resume Next .LoadFromFile FileName 'Err 3002 File could not be opened. Text = vbNullString ' in case of err Text = .ReadText(numchars) ReadADOStreamText = Err End With End Function 'https://en.wikipedia.org/wiki/Byte_order_mark 'Byte Order Mark (BOM) - FEFF - EFBBBF 'Encoding (hexadecimal) (decimal) Bytes as CP1252 characters 'UTF-8 BOM EF BB BF 239 187 191  'UTF-16 (BE) FE FF 254 255 þÿ 'UTF-16 (LE) FF FE 255 254 ÿþ ' Reads the entire contents and tries to GUESS the character encoding Function ReadFileContentsAndCharset(ByVal FileName As String, ByRef Text As String, _ ByRef charset As String) As Long charset = "UTF-8" ' first guess. If it's not UTF-8 it may still be readable as UTF-8 ReadFileContentsAndCharset = ReadADOStreamText(FileName, Text, -1, charset) If ReadFileContentsAndCharset = 0 Then ' it succeeded without error Debug.Print FileName ' comment these out in production Dim lUTF8Len As Long, sBOM As String ' UTF-8 files do not have to have a BOM so no point in testing for that sBOM = Left$(Text, 2) lUTF8Len = Len(Text) If sBOM = Chr$(254) & Chr$(255) Or sBOM = Chr$(255) & Chr$(254) _ Or InStr(1, Text, Chr(0)) > 0 _ Then ' UTF-16 or nulls? Might be double-byte character set charset = "UTF-16" ReadFileContentsAndCharset = ReadADOStreamText(FileName, Text, -1, charset) Debug.Print charset & " len=" & Len(Text) & ", utf-8 len=" & lUTF8Len If lUTF8Len = Len(Text) * 2 Then 'ReadFSOStreamText(FileName, Text) would work here too for UCS2 Debug.Print "As expected, double byte character set" End If ElseIf InStr(1, Text, ChrW$(65533), vbBinaryCompare) > 0 Then ' &HFFFD ' Check for U+FFFD replacement character ' used to replace an unknown, unrecognized or unrepresentable character 'https://en.wikipedia.org/wiki/Specials_(Unicode_block) charset = "Windows-1252" ' second guess; superset of ISO-8859-1 ReadFileContentsAndCharset = ReadADOStreamText(FileName, Text, -1, charset) Debug.Print charset & " len=" & Len(Text) & ", utf-8 len=" & lUTF8Len If Len(Text) > lUTF8Len Then Debug.Print charset & " converted some characters into multiple characters" ' so it must be some other encoding. No idea. End If End If Debug.Print Len(Text) & " chars, charset inferred=" & charset & " " & FileName Else MsgBox FileName & vbLf & "Error " & Err & " " & Err.Description End If End Function Function FileSize(ByVal FileName As String) As Long 'Dir() and FileLen() fail on Unicode & double byte file names On Error Resume Next ' simply return 0 for not found With CreateObject("Scripting.FileSystemObject") FileSize = .GetFile(FileName).Size End With End Function