Attribute VB_Name = "ADOQueryText" 'Patrick O'Beirne, Systems Modelling Ltd 'pob at sysmod.com 'http://www.sysmod.com 'http://ie.linkedin.com/in/patrickobeirne 'http://sysmod.wordpress.com 't: @ExcelAnalytics 'https://sysmod.wordpress.com/2021/01/18/querying-text-csv-files-method-2-ado-recordset/ ' Functions to query record-oriented data in text files ' whether CSV or Tab delimited, handling Unicode file names and contents. ' ADODB early binding needs a reference to Microsoft ActiveX Data Objects Option Explicit Option Private Module Private Const SUCCESS As Long = 0 ' This function to load text/csv or text/tab-separated-values into a worksheet infers the character set and ' whether the file is comma or tab delimited and writes a schema.ini if the latter ' DelimiterChar and Codepage changed byref if not specified Function ADOLoadWSFromDelimitedTextFile(ByVal OriginalFullname As String, ByRef wsData As Worksheet, _ Optional ByRef DelimiterChar As String, Optional ByRef CodePage As Long) As Long Dim charset As String, sMsg As String, FMT As String, rs As ADODB.Recordset Dim OriginalFolderName As String, OriginalFilename As String Dim WorkFolderName As String, WorkFullname As String, WorkFileName As String Const SAFECOPY As Long = 1 ' 0=read original file, only used for testing; ' 1=copy file to safe temp name; needed for Unicode folder or file names ' Split passed path & Name into folder and file names If Not GetPathAndName(OriginalFullname, OriginalFolderName, OriginalFilename) = SUCCESS Then GoTo ExitProc End If If Debug_Mode() Then On Error GoTo OnError End If If SAFECOPY = 1 Then '1=xlCopy, handle possibility that FOLDER name is Unicode too GetTempFullName WorkFullname CopyFile OriginalFullname, WorkFullname Else ' default, just use the original name WorkFullname = OriginalFullname End If GetPathAndName WorkFullname, WorkFolderName, WorkFileName If Debug_Mode() Then Debug.Print WorkFullname, WorkFolderName, WorkFileName MessageBox WorkFullname & vbLf & WorkFolderName & vbLf & WorkFileName, vbOKOnly, "Debug: File, path, name" End If Dim byteArr() As Byte, lChanged As Long, DelimiterByte As Long, CountLines As Long, CountColumns As Long, LastHeaderCharIndex As Long ' read ALL the text and infer the charset if possible ADOLoadWSFromDelimitedTextFile = ReadADOStreamBytes(WorkFullname, byteArr) If Not ADOLoadWSFromDelimitedTextFile = SUCCESS Then Exit Function End If charset = CharsetFromBytes(byteArr) ' Infer the character set ' the CharacterSet parameter for ADO queries uses a Windows codepage, not the Stream charset ' If not passed in, derive from the charset If CodePage = 0 Then CodePage = CodepageFromCharset(charset) End If 'Determine the DelimiterChar from the first line. CountColumns is only needed for the messagebox statistics, not essential CountColumns = CountColumnsFromBytes(byteArr, DelimiterByte, LastHeaderCharIndex) ' determine the fields DelimiterChar, return columns count DelimiterChar = Chr$(DelimiterByte) If DelimiterChar = vbTab Then FMT = "TabDelimited" Else FMT = "Delimited(" & DelimiterChar & ")" End If ' As an alternative to writing a schema.ini file, you could use a function in the QueryTables module 'ReplaceBytes(byteArr(), CountLines, 44, 10, 13) 'to replace tab delimiters with commas 'or ReplaceBytes(byteArr(), CountLines, 44, 32, 32) ' to replace tabs and also change all quoted newlines to spaces. If DelimiterChar = vbTab Then If Debug_Mode() Then If MessageBox("Tab delimited files require a schema.ini file to be written." _ & vbLf & "Continue?", vbOKCancel, "Tab delimited") = vbCancel Then GoTo ExitProc End If End If WriteSchemaIniEntry WorkFolderName, WorkFileName, "Format=Tabdelimited" End If ' Safety check of recordcount vs sheet capacity ADORecordsetFromTextfile WorkFullname, rs, FMT, CodePage, "COUNT(*)" CountLines = rs.Fields(0).Value + 1 ' add 1 for the fieldnames header If CheckLinesVsRows(CountLines, wsData) < 0 Then ' Uses Messagebox, returns -1 if error GoTo ExitProc End If ' final confirmation to go ahead and read it If Debug_Mode() Then If MessageBox(WorkFileName & vbLf & "Fields: " & CountColumns _ & ", Delimiter=" & DelimiterName(DelimiterByte) _ & " code page=" & CodePage _ & vbLf & "Proceed?", _ vbOKCancel + IIf(DelimiterChar = vbTab, vbDefaultButton2, vbDefaultButton1), _ "Confirm") = vbCancel Then GoTo ExitProc End If End If ADOLoadWSFromDelimitedTextFile = ADORecordsetFromTextfile(WorkFullname, rs, FMT, CodePage, "*") If Not ADOLoadWSFromDelimitedTextFile = SUCCESS Then wsData.Cells(1, 1).Value = "Error " & Err & " " & Err.Description If Debug_Mode() Then Stop GoTo ExitProc End If ' Write the recordset to the sheet CopyRecordsetToSheet rs, wsData, OriginalFilename ExitProc: If Not rs Is Nothing Then If rs.State = ADODB.adStateOpen Then rs.Close End If Set rs = Nothing End If If SAFECOPY = 1 Then DeleteFile WorkFullname End If Exit Function OnError: sMsg = IIf(Erl = 0, vbNullString, "Line " & Erl & " ") & "Error " & Err & " " & Err.Description Debug.Print sMsg MessageBox sMsg, , "testQueryTextFile" ADOLoadWSFromDelimitedTextFile = Err.Number Stop Resume Resume ExitProc End Function ' data control function to alert user if there are more rows of data than will fit on the sheet Function CheckLinesVsRows(CountLines As Long, wsData As Worksheet) As Long Dim lMaxRecords As Long If wsData Is Nothing Then lMaxRecords = 1048576 Else lMaxRecords = wsData.Rows.Count - wsData.UsedRange.Rows.Count End If If CountLines > lMaxRecords Then If MessageBox("There are " & CountLines & " lines in the file, but only " _ & lMaxRecords & " rows on the worksheet" _ & vbLf & "Continue?", vbOKCancel, "Too many records") = vbOK Then CheckLinesVsRows = SUCCESS Else CheckLinesVsRows = -1 ' error value End If End If End Function Function ADORecordsetFromTextfile(ByVal FullName As String, ByRef rs As ADODB.Recordset, _ ByVal FMT As String, ByVal CodePage As Long, Optional ByVal strSelect As String) As Long ' returns err.number Dim Foldername As String, filename As String ' DataSource is the folder Dim strConnection As String, strCommand As String '"Provider=Microsoft.ACE.OLEDB.12.0;Data Source=" & sDBFileName ' 64 bit safe '"Provider=Microsoft.Jet.OLEDB.4.0;Data Source=" & sDBFileName ' not in 64-bit Excel ADORecordsetFromTextfile = GetPathAndName(FullName, Foldername, filename) If Not ADORecordsetFromTextfile = SUCCESS Then Exit Function End If ' TabDelimited is ignored, need schema.ini, set that up in caller 'FMT=Delimited(" & Chr$(9) & ")" won't work. Can only specify Tabdelimited in a schema.ini ' see http://msdn.microsoft.com/en-us/library/ms709353(VS.85).aspx for schema.ini ' default charset is windows-1252 ' note the single quotes in Extended Properties 'data type inference could cause problems - may need schema.ini 'the “IMEX=1” extended property for Excel connection string is used when there are mixed data types in one column. 'http://dailydoseofexcel.com/archives/2004/06/03/external-data-mixed-data-types/ '"IMEX=1 is the only way to ensure ImportMixedTypes=Text is honored" 'BTW for reading Excel .xl* files, see 'http://excellerando.blogspot.com/2014/09/from-time-to-time-it-necessary-to.html ' Set the Dbq portion of the string to the physical folder where your files reside strConnection = "Provider=Microsoft.ACE.OLEDB.12.0;Data Source=" & "'" & Foldername & "'" _ & ";Extended Properties='text;HDR=Yes;FMT=" & FMT & ";" _ & "Characterset=" & CodePage & ";IMEX=1;ImportMixedTypes=Text;Readonly=True'" ' "UTF8" and "UTF-8" give "invalid argument" on conn.Open. '"Unicode" is allowed to mean UTF-16 LE same as in schema.ini CharacterSet 'Dim Conn As ADODB.Connection 'Set Conn = New ADODB.Connection 'Conn.Open strConnection If Len(strSelect) = 0 Then ' may have passed in the Select expression, eg COUNT(*), or a list of field names strSelect = "*" ' select all End If strCommand = "SELECT " & strSelect & " FROM `" & filename & "`;" ' need square brackets or backticks when space in file name ' only backticks work with Unicode file names, square brackets get: '-2147217865 The Microsoft Access database engine could not find the object 'S?????.csv'. ' Quotename gives syntax error in FROM clause whether ' or " used as string delimiters 'Set rs = Conn.Execute(strCommand) 'same as adOpenForwardOnly ' we cannot get an OpenStatic recordset from rs.Open, get error 'On Error Resume Next ' uncomment for local error trapping Set rs = New ADODB.Recordset rs.Open Source:=strCommand, ActiveConnection:=strConnection, Cursortype:=adOpenStatic, LockType:=adLockReadOnly ' adOpenForwardOnly ' adLockUnspecified ' Possible Err codes: '-2147467259 Automation error Unspecified error '-2147217887 Multiple-step OLE DB operation generated errors. Check each OLE DB status value, if available. No work was done. '-2147467259 Could not use ''; file already in use. '-2147217865 Method 'Open' of object '_Recordset' failed ' The Microsoft Access database engine could not find the object 'S?????.csv'. ' means that an unrecognisable file name has been used, such as Unicode names in Greek, Russian, Arabic, etc. '-2147217911 Cannot update. Database or object is read-only. means can't recognise .tsv file, need .tab extension 'see allowed file extensions in registry HKEY_CLASSES_ROOT\MIME\Database\Charset If Debug_Mode() And Err <> 0 Then Debug.Print Err; Err.Description Stop End If ADORecordsetFromTextfile = Err.Number End Function 'A forward-only query result cursor doesn't support getting a record count. 'Scrollable query cursors are the only kind of cursors that can return accurate record counts. _ These cursors are more expensive (in memory, mostly). Getting the record count can be an expensive operation. 'The RecordCount property doesn't indicate how many records are contained in a dynaset–, snapshot–, _ or forward–only–type Recordset object until all records have been accessed. _ Once the last record has been accessed, the RecordCount property indicates the total number of _ undeleted records in the Recordset or TableDef object. To force the last record to be accessed, _ use the MoveLast method on the Recordset object. 'Using the Requery method on a Recordset object resets the RecordCount property just as if the query were re-executed. 'adUseClient,adOpenStatic,adOpenKeyset 'https://social.msdn.microsoft.com/Forums/en-US/4478b233-51df-42e8-8938-85c8f6c75f82/read-unicode-character-using-openrowset 'Windows code page Name System.Text.Encoding schema.ini CharacterSet '20127 ASCII (US) ASCII 20127 '1252 ANSI Latin I Default ANSI '65001 UTF-8 UTF8 65001 '1200 UTF-16LE Unicode Unicode '1201 UTF-16BE BigEndianUnicode 1201 Function CodepageFromCharset(ByVal charset As String) As Long Select Case charset Case "UTF-8" CodepageFromCharset = 65001 Case "UTF-16", "UTF-16LE", "Unicode" CodepageFromCharset = 1200 Case "UTF-16BE" CodepageFromCharset = 1201 Case Else ' blank, unrecognised, use 65001 in case it's UTF-8 with No BOM, rather than Windows-1252 default CodepageFromCharset = 65001 End Select End Function 'copy a recordset to a sheet, auto add a workbook if necessary, pass back wsTarget byref if is nothing, auto populate headers Function CopyRecordsetToSheet(rs As ADODB.Recordset, Optional ByRef wsTarget As Worksheet, Optional ByVal RecordSource As String) As Long Dim wbTarget As Workbook, iCols As Long, lTargetRow As Long If rs.State = ADODB.adStateClosed Then MessageBox TypeName(rs), vbExclamation, "Recordset closed" Else On Error GoTo OnError ' create worksheet even if empty recordset If wsTarget Is Nothing Then Set wbTarget = Workbooks.Add(xlWBATWorksheet) ' new book 1 worksheet Set wsTarget = wbTarget.Worksheets(1) lTargetRow = 1 ElseIf Application.WorksheetFunction.CountA(wsTarget.UsedRange.Cells) = 0 Then ' blank existing sheet lTargetRow = 1 Else ' existing sheet with some data already. Put in below it lTargetRow = wsTarget.UsedRange.Rows.Count + 1 End If If IsEmpty(wsTarget.Cells(lTargetRow, 1)) Then ' if no headings, add them For iCols = 0 To rs.Fields.Count - 1 wsTarget.Cells(lTargetRow, iCols + 1).Value = rs.Fields(iCols).name Next wsTarget.Range(wsTarget.Cells(lTargetRow, 1), wsTarget.Cells(lTargetRow, rs.Fields.Count)).Font.Bold = True End If If rs.BOF And rs.EOF Then CopyRecordsetToSheet = 0 If Len(RecordSource) = 0 Then RecordSource = "recordset" End If If Debug_Mode() Then MessageBox "No records in " & RecordSource & vbLf & rs.Source, vbExclamation, "DEBUG: Recordset empty" ' not shown normally End If Else ' 64-bit RecordCount ([out, retval] ADO_LONGPTR *pl) CopyRecordsetToSheet = CLng(rs.RecordCount) ' will return -1 for forward-only If CopyRecordsetToSheet > 0 Then If Debug_Mode() Then Debug.Print RecordSource, CopyRecordsetToSheet End If ' preformat text columns so phone numbers arrive correctly For iCols = 0 To rs.Fields.Count - 1 'Debug.Print iCols, rs.Fields(iCols).Name, rs.Fields(iCols).Type, ColumnTypeName(rs.Fields(iCols).Type) If rs.Fields(iCols).Type = ADODB.adVarWChar Then ' nVarChar, 202 wsTarget.Cells(lTargetRow, 1 + iCols).Resize(CopyRecordsetToSheet).NumberFormat = "@" End If Next End If On Error Resume Next ' in case Crosstab query type, ignore error ' this works with ordinary queries where rs.CursorType = adOpenForwardOnly rs.MoveFirst ' does not work with TRANSFORM ... PIVOT rs ' get 'Operation is not supported for this type of object.' ' next lines then gives 'row handle is invalid' On Error GoTo OnError wsTarget.Cells(lTargetRow + 1, 1).CopyFromRecordset rs ' don't do autofit yet, let caller do that End If End If GoTo ExitProc OnError: Dim sErr As String sErr = Erl & " Error " & Err & " " & Err.Description & " Source:" & RecordSource Debug.Print sErr If Debug_Mode() Then Stop ' messagebox sErr, vbExclamation, "CopyRecordsetToSheet" Resume ExitProc Resume ExitProc: 'Debug.Print "CopyRecordsetToSheet " & CopyRecordsetToSheet & " rows, " & rs.Fields.Count & " fields, " & (Timer() - nTimer) & " seconds" End Function Function ReadSchemaIniEntry(sDataSource As String, filename As String, sEntries As String) As Long Dim SchemaIniFile As String sEntries = vbNullString SchemaIniFile = sDataSource & Application.PathSeparator & "schema.ini" If FileExists(SchemaIniFile) Then Dim SchemaIniContents As String, charset As String, lPos As Long, sFileEntry As String sFileEntry = "[" & filename & "]" ReadSchemaIniEntry = ReadADOStreamText(SchemaIniFile, SchemaIniContents, charset, -1) If ReadSchemaIniEntry = SUCCESS Then lPos = InStr(1, SchemaIniContents, sFileEntry) If lPos > 0 Then sEntries = Mid$(SchemaIniContents, lPos, InStr(lPos + 1, SchemaIniContents & "[", "[") - lPos) End If Else MessageBox SchemaIniFile & " error" If Debug_Mode() Then Stop End If End If End Function Function WriteSchemaIniEntry(sDataSource As String, filename As String, sEntries As String) As Long Dim SchemaIniFile As String, sFileEntry As String sFileEntry = "[" & filename & "]" SchemaIniFile = sDataSource & Application.PathSeparator & "schema.ini" If FileExists(SchemaIniFile) Then Dim SchemaIniContents As String, charset As String, lPos As Long WriteSchemaIniEntry = ReadADOStreamText(SchemaIniFile, SchemaIniContents, charset, -1) If WriteSchemaIniEntry = SUCCESS Then lPos = InStr(1, SchemaIniContents, sFileEntry) If lPos > 0 Then ' replace previous file entry entirely SchemaIniContents = Left$(SchemaIniContents, lPos - 1) & Mid$(SchemaIniContents, lPos + Len(sFileEntry)) End If WriteSchemaIniEntry = WriteADOStreamText(SchemaIniFile, SchemaIniContents _ & sFileEntry & vbLf _ & sEntries & vbLf, charset) Else MessageBox SchemaIniFile & " error" If Debug_Mode() Then Stop End If Else WriteSchemaIniEntry = WriteADOStreamText(SchemaIniFile, sFileEntry & vbLf _ & sEntries & vbLf, charset) End If End Function