Attribute VB_Name = "QueryTables" 'Patrick O'Beirne, Systems Modelling Ltd 'pob at sysmod.com 'http://www.sysmod.com 'http://ie.linkedin.com/in/patrickobeirne 'http://sysmod.wordpress.com 't: @ExcelAnalytics 'https://sysmod.wordpress.com/2021/01/13/querying-text-csv-files-method-1-querytable/ Option Explicit 'Option Private Module Private Const SUCCESS As Long = 0 ' The legacy Get Data Worksheet.QueryTables method. ' The QueryTextDelimited function here can read anything except UTF-16 with no BOM. 'https://docs.microsoft.com/en-us/dotnet/api/microsoft.office.interop.excel.querytable?view=excel-pia 'XlQueryType ENUMERATION(Excel) 'Specifies the type of query used by Microsoft Excel to populate the query table or PivotTable cache. 'Name Value Description 'xlADORecordset 7 Based on an ADO recordset query 'xlDAORecordset 2 Based on a DAO recordset query, for query tables only 'xlODBCQuery 1 Based on an ODBC data source 'xlOLEDBQuery 5 Based on an OLE DB query, including OLAP data sources 'xlTextImport 6 Based on a text file, for query tables only 'xlWebQuery 4 Based on a webpage, for query tables only 'XlCmdType ENUMERATION(Excel) 'Specifies the value of the CommandText property. 'Name Value Description 'xlCmdCube 1 Contains a cube name for an OLAP data source. 'xlCmdDAX 8 Contains a Data Analysis Expressions (DAX) formula. 'xlCmdDefault 4 Contains command text that the OLE DB provider understands. 'xlCmdExcel 7 Contains an Excel formula. 'xlCmdList 5 Contains a pointer to list data. 'xlCmdSql 2 Contains an SQL statement. 'xlCmdTable 3 Contains a table name for accessing OLE DB data sources. 'xlCmdTableCollection 6 Contains the name of a table collection. 'A simple example is QueryTableConnect("TEXT;" & Fullname, "", wsData, xlTextFormat, ",", 65001) ' Querytables can not handle quoted newlines. This function changes them to spaces in a temporary file before loading the data Function QueryTextDelimited(FullName As String, wsData As Worksheet, _ ColumnDataTypes As Variant, DelimiterChar As String) As Long ' return 0=success Dim Tempname As String, charset As String, lChanged As Long QueryTextDelimited = GetTempFullName(Tempname) ' FileSystem.GetTempFullName If Not QueryTextDelimited = SUCCESS Then Exit Function End If Dim byteArr() As Byte, DelimiterByte As Long, CountColumns As Long, CountLines As Long, LastHeaderCharIndex As Long QueryTextDelimited = ReadADOStreamBytes(FullName, byteArr) If Not QueryTextDelimited = SUCCESS Then Exit Function End If charset = CharsetFromBytes(byteArr) ' Infer the character set lChanged = ReplaceBytes(byteArr(), CountLines, 9, 32, 32) ' Replace any quoted newlines with spaces, no need to change tab delimiters CountColumns = CountColumnsFromBytes(byteArr, DelimiterByte, LastHeaderCharIndex) ' determine the fields delimiter, return columns count ' could use LastHeaderCharIndex to read the first line of headers from the file if desired DelimiterChar = Chr$(DelimiterByte) WriteADOStreamBytes Tempname, byteArr ' write the output even if unchanged If Debug_Mode() Then MessageBox "Charset=" & charset _ & ", Delimiter=" & DelimiterName(DelimiterByte) _ & ", " & CountColumns & " columns" _ & ", " & CountLines & " rows" _ & ", " & lChanged & " embedded newlines in " & FullName _ & vbLf & "Work file name = " & Tempname End If If wsData Is Nothing Then Set wsData = ActiveWorkbook.Worksheets.Add End If Const TextFilePlatform As Long = 65001 ' works for everything 'undocumented in https://docs.microsoft.com/en-us/office/vba/api/excel.querytable.textfileplatform QueryTextDelimited = QueryTableConnect("TEXT;" & Tempname, vbNullString, wsData, ColumnDataTypes, DelimiterChar, TextFilePlatform) DeleteFile Tempname ' assume not wanted any more End Function Function QueryTableConnect(strConnection As String, strCommand As String, wsData As Worksheet, ColumnDataTypes As Variant, _ FieldDelimiters As String, TextFilePlatform As Long) As Long ' return Err.number code Dim qt As QueryTable, lTries As Long, lErr As Long Do ' retry until success or out of tries On Error Resume Next lTries = lTries + 1 'https://docs.microsoft.com/en-us/office/vba/api/excel.querytables.add Set qt = wsData.QueryTables.Add(Connection:=strConnection, Destination:=wsData.Range("$A$1")) lErr = Err.Number ' Sometimes get Error -2147417848: Automation error; or -2147221080 Automation error ' The object invoked has disconnected from its clients. ' may get occasional Querytable error, just retry and it works Loop Until lErr = 0 Or lTries > 2 If Not lErr = 0 Then GoTo ExitProc ' still error, give up End If On Error GoTo OnError With qt '.Name = "QueryTableConnect" ' ExternalData_1 .AdjustColumnWidth = True .FieldNames = True 'FieldNames True if field names from the data source appear as column headings _ for the returned data. The default value is True. .RowNumbers = False .FillAdjacentFormulas = False 'Mac does not have .PreserveFormatting = True or .RefreshPeriod = 0 .BackgroundQuery = False ' True .RefreshOnFileOpen = False .RefreshStyle = xlInsertDeleteCells ' or xlOverwriteCells .SavePassword = False .SaveData = True .MaintainConnection = False If qt.QueryType = xlOLEDBQuery Then ' 5 Debug.Print "qt.CommandType "; qt.CommandType, strCommand .CommandText = strCommand Else ' xlTextImport=6 .TextFilePromptOnRefresh = False 'TextFilePlatform Use this property only when your query table is based on data from a text file _ (with the QueryType property set to xlTextImport). 'TextFilePlatform Returns or sets the origin of the text file you’re importing into the query table. This property determines which code page is used during the data import. Read/write XlPlatform. 'xlMacintosh=1 xlWindows=2 xlMSDOS=3 If Not TextFilePlatform = 0 Then 'invalid encoding gets Err.number 5 Invalid procedure call or argument .TextFilePlatform = TextFilePlatform ' 65001= charset UTF-8, also works with Unicode UTF-16 as long as there is a BOM End If .TextFileStartRow = 1 .TextFileParseType = xlDelimited .TextFileTextQualifier = xlTextQualifierDoubleQuote 'TextFileConsecutiveDelimiter True if consecutive delimiters are treated as a single delimiter _ when you import a text file into a query table. The default value is False .TextFileConsecutiveDelimiter = False 'FieldDelimiters can be a string of more than one delimiter .TextFileTabDelimiter = InStr(1, FieldDelimiters, vbTab) > 0 .TextFileSemicolonDelimiter = InStr(1, FieldDelimiters, ";") > 0 .TextFileCommaDelimiter = InStr(1, FieldDelimiters, ",") > 0 Or Len(FieldDelimiters) = 0 ' default comma if unspecified .TextFileSpaceDelimiter = InStr(1, FieldDelimiters, " ") > 0 If IsArray(ColumnDataTypes) Then .TextFileColumnDataTypes = ColumnDataTypes ' pass in an XlColumnDataType number to coerce all fields to that type, eg xlTextFormat=2 ElseIf IsNumeric(ColumnDataTypes) And Not IsEmpty(ColumnDataTypes) Then ' IsNumeric(Empty)=True Dim lColumnDataType As XlColumnDataType, lCol As Long lColumnDataType = ColumnDataTypes ' coerce variant to XlColumnDataType ReDim ColumnDataTypes(0 To wsData.Columns.Count - 1) ' maximum possible, no harm if excess For lCol = 0 To UBound(ColumnDataTypes) ColumnDataTypes(lCol) = lColumnDataType Next lCol 'err 5 Invalid procedure call or argument .TextFileColumnDataTypes = ColumnDataTypes ' Else default General End If End If 'err 1004 Excel cannot find the text file to refresh this external data range.... 'err 1004 The file could not be accessed. Try one of the following:... .Refresh BackgroundQuery:=False '.MaintainConnection = False End With GoTo ExitProc OnError: If MessageBox(strConnection & vbLf & "Error " & Err.Number & " " & Err.Description _ & vbLf & "Retry?", vbYesNo, "QueryTableConnect") = vbYes Then If Debug_Mode() Then Stop ' to debug Resume Else Resume ExitProc End If ExitProc: QueryTableConnect = Err.Number On Error Resume Next qt.WorkbookConnection.Delete ' .Type=1=OLEDB qt.Delete ' Assuming we do not want the query retained once we have the data Set qt = Nothing End Function 'Set the format for each column (Default = General) 'For example Array(1, 9, 1) to skip the second column '.TextFileColumnDataTypes = Array(1, 9, 1) 'Unfortunately there is no date+time format, have to import as text and parse afterwards 'xlGeneralFormat General 1 'xlTextFormat Text 2 'xlMDYFormat Month-Day-Year 3 'xlDMYFormat Day-Month-Year 4 'xlYMDFormat Year-Month-Day 5 'xlMYDFormat Month-Year-Day 6 'xlDYMFormat Day-Year-Month 7 'xlYDMFormat Year-Day-Month 8 'xlSkipColumn Skip 9 'XlConnectionType ENUMERATION(Excel) 'Specifies the type of database connection. 'Name Value Description 'xlConnectionTypeDATAFEED 6 Data Feed 'xlConnectionTypeMODEL 7 PowerPivot Model 'xlConnectionTypeNOSOURCE 9 No source 'xlConnectionTypeODBC 2 ODBC 'xlConnectionTypeOLEDB 1 OLEDB 'xlConnectionTypeTEXT 4 Text 'xlConnectionTypeWEB 5 Web 'xlConnectionTypeWORKSHEET 8 Worksheet 'xlConnectionTypeXMLMAP 3 XML MAP ' Clean up a CSV/text file and return some useful information about it Function RemoveEmbeddedNewlines(ByVal sInputFile As String, ByVal sOutputFile As String, _ ByRef lChanged As Long) As Long ' return success code Dim byteArr() As Byte, CountNewLines As Long 'Simple VB statements cannot handle Unicode file names RemoveEmbeddedNewlines = ReadADOStreamBytes(sInputFile, byteArr) If RemoveEmbeddedNewlines = SUCCESS Then lChanged = ReplaceBytes(byteArr(), CountNewLines, 9, 32, 32) ' Don't change tab, replace quoted newlines with spaces WriteADOStreamBytes sOutputFile, byteArr ' write the output even if unchanged End If End Function Function BOMLen(bytes() As Byte) As Long If bytes(0) = 239 And bytes(1) = 187 And bytes(2) = 191 Then ' UTF-8 BOMLen = 3 ' &HEFBBBF Else If bytes(0) = 255 And bytes(1) = 254 Then ' UTF-16LE BOMLen = 2 ' &HFFEE ElseIf bytes(0) = 254 And bytes(1) = 255 Then ' UTF-16BE BOMLen = 2 ' &HFEFF Else ' no BOM, no idea what charset BOMLen = 0 End If End If End Function ' this helps to clean up a file by optionally replacing quoted newlines with spaces, _ and optionally tab delimiters with comma. Returns number of bytes changed,CountNewLines byref Function ReplaceBytes(ByRef byteArr() As Byte, ByRef CountNewLines As Long, ByVal ReplaceTabDelimiter As Byte, _ ByVal ReplaceQuotedLF As Byte, ByVal ReplaceQuotedCR As Byte) As Long Dim bInQuote As Boolean Dim lIndex As Long, lFirstIndex As Long, lLastIndex As Long, lBytesPerChar As Long Dim lChar As Long, lPrevChar As Long lFirstIndex = BOMLen(byteArr) ' start at offset no.of bytes in the BOM if any If lFirstIndex = 2 Then ' UTF-16 lBytesPerChar = 2 Else lBytesPerChar = 1 End If CountNewLines = 0 ReplaceBytes = 0 bInQuote = False lLastIndex = UBound(byteArr) For lIndex = lFirstIndex To lLastIndex Step lBytesPerChar ' 0-based '384MB only takes 8 seconds, so this may be fast enough to not need DoEvents 'If lIndex Mod 10000000 = 0 Then ' once every 10M chars is enough ' DoEvents ' allow ctrl-break. 'End If lChar = byteArr(lIndex) If lChar = 34 Then ' double quote bInQuote = Not bInQuote ElseIf bInQuote Then ' replace any quoted special characters ' CR only=Mac, LF only= Windows, CRLF=MSDOS If lChar = 10 And ReplaceQuotedLF > 0 And Not ReplaceQuotedLF = 10 Then byteArr(lIndex) = ReplaceQuotedLF ' eg space=32 ReplaceBytes = ReplaceBytes + 1 End If If lChar = 13 And ReplaceQuotedCR > 0 And Not ReplaceQuotedCR = 13 Then byteArr(lIndex) = ReplaceQuotedCR ' eg space=32 ReplaceBytes = ReplaceBytes + 1 End If 'outside quotes, do field delimiter replacement if reqd ElseIf lChar = 9 Then ' tab outside of quotes If ReplaceTabDelimiter > 0 And Not ReplaceTabDelimiter = 9 Then byteArr(lIndex) = ReplaceTabDelimiter ' eg 44=comma ReplaceBytes = ReplaceBytes + 1 End If ElseIf lChar = 10 Or lChar = 13 Then ' newline outside of quotes, count another line If lPrevChar = 10 Or lPrevChar = 13 Then ' LFCR or CRLF, don't double count lPrevChar = 0 ' allow for blank lines CRLFCRLF Else lPrevChar = lChar CountNewLines = CountNewLines + 1 End If End If Next lIndex ' if file ends WITHOUT CR or LF, count the final line as well lChar = byteArr(lLastIndex - lBytesPerChar + 1) If Not (lChar = 10 Or lChar = 13) Then ' LFCR or CRLF, don't double count CountNewLines = CountNewLines + 1 End If End Function ' From a file allowing for quoted newlines, return no. columns and byref the delimiter byte code Function CountColumnsFromBytes(byteArr() As Byte, ByRef DelimiterByte As Long, LastHeaderCharIndex As Long) As Long Dim bInQuote As Boolean Dim lIndex As Long, FirstIndex As Long, lLastIndex As Long, LastHeaderByteIndex As Long Dim lChar As Long, lTabCount As Long, lCommaCount As Long, lSemicoloncount As Long Dim lBytesPerChar As Long FirstIndex = BOMLen(byteArr) ' start at offset no.of bytes in the BOM if any If FirstIndex = 2 Then ' UTF-16 lBytesPerChar = 2 Else lBytesPerChar = 1 End If lLastIndex = UBound(byteArr) For lIndex = FirstIndex To lLastIndex Step lBytesPerChar ' 0-based lChar = byteArr(lIndex) If lChar = 34 Then ' double quote bInQuote = Not bInQuote ' only test for special characters outside quoted strings ElseIf Not bInQuote Then If lChar = 10 Or lChar = 13 Then LastHeaderByteIndex = lIndex - 1 Exit For ' end of first line ElseIf lChar = 9 Then ' tab lTabCount = lTabCount + 1 ElseIf lChar = 44 Then ' comma lCommaCount = lCommaCount + 1 ElseIf lChar = 59 Then ' semicolon lSemicoloncount = lSemicoloncount + 1 End If ' special character tests End If Next lIndex If LastHeaderByteIndex = 0 Then ' no line endings found If Debug_Mode() Then MessageBox "No line endings found in text", vbExclamation End If LastHeaderByteIndex = lLastIndex End If LastHeaderCharIndex = LastHeaderByteIndex / lBytesPerChar ' pass back header length in characters If lTabCount > 0 Then DelimiterByte = 9 ' vbTab CountColumnsFromBytes = lTabCount ElseIf lSemicoloncount > lCommaCount Then DelimiterByte = 59 ' ";" CountColumnsFromBytes = lSemicoloncount Else If Debug_Mode() Then Debug.Assert lCommaCount > 0 End If DelimiterByte = 44 ' "," ' default CountColumnsFromBytes = lCommaCount End If End Function Function DelimiterName(DelimiterByte As Long) As String ' friendly name from byte code Select Case DelimiterByte Case 9 DelimiterName = "tab" Case 44 DelimiterName = "comma" Case 59 DelimiterName = "semicolon" Case 87 ' W means whitespace DelimiterName = "space" Case Else DelimiterName = "CHAR(" & CStr(DelimiterByte) & ")" End Select End Function ' Copy of ADOStream functions here so this module can be standalone ' reading bytes with no charset, just 1 char per byte Private Function ReadADOStreamBytes(ByVal filename As String, ByRef bytes() As Byte, _ Optional ByRef numchars As Long = -1) As Long With CreateObject("ADODB.Stream") .Type = 1 'Specify stream type, adTypeBinary=1 adTypeText=2 On Error GoTo OnError ' uncomment this for local error handling .Open .LoadFromFile filename ' Err.number 3219 if adModeRead specified bytes = .Read(numchars) .Close End With ExitProc: ReadADOStreamBytes = Err.Number Exit Function OnError: MessageBox Err.Number & " " & Err.Description & vbLf & filename, vbOKOnly, "ReadADOStreamBytes" Resume ExitProc End Function Private Function WriteADOStreamBytes(ByVal filename As String, ByRef bytes() As Byte) As Long With CreateObject("ADODB.Stream") .Type = adTypeBinary '.charset not allowed in binary context .Open .Write bytes .SaveToFile filename, 2 ' adSaveCreateNotExist=1 = no overwrite, adSaveCreateOverWrite=2 = overwrite .Close WriteADOStreamBytes = Err.Number End With End Function