Attribute VB_Name = "PowerQueryText" 'Patrick O'Beirne, Systems Modelling Ltd 'pob at sysmod.com 'http://www.sysmod.com 'http://ie.linkedin.com/in/patrickobeirne 'http://sysmod.wordpress.com 't: @ExcelAnalytics ' Functions to create tables of PowerQuery results from text / csv files 'https://sysmod.wordpress.com/2021/01/19/querying-text-csv-files-in-vba-method-3-power-query/ Option Explicit Option Private Module Sub testPQcsv() Dim FullName As String, wsInferContentType As Worksheet, wsPotentialDelimiters As Worksheet FullName = Worksheets("Info").Range("testFullName").Value ' put a Unicode file name in this cell ' or prompt for the file name: FullName = GetOpenFilenameStr("All files, *.*", , "Populate a worksheet from Delimited Text") If FileExists(FullName) Then Worksheets("Info").Range("testFullName").Value = FullName Dim Encoding As Long, DelimiterChar As String InferContentType FullName, Encoding, DelimiterChar ' creates two sheets with Content Type and Potential Delimiters 'the max of column 4 "NonEmptyColumns" is the max number of columns to be returned 'the max of column 5 "MaxRows" is the max number of rows to be returned Dim MFormula As String, wsProfile As Worksheet MFormula = PQMCsvFormula(FullName, Encoding, DelimiterChar) TableProfile MFormula, wsProfile ' Creates a profile of field count, max, min ' the number of rows in wsProfile is 1 more than the number of columns 'the max of column 6 "Count" is the max number of rows to be returned ' exclude the last two rows with PotentialDelimiter "W", no idea what that is. WriteRowArray Worksheets("Info").Range("testFullName").Offset(0, 3), _ Array(wsProfile.UsedRange.Rows.Count - 1, _ Application.Max(wsProfile.UsedRange.Resize(wsProfile.UsedRange.Rows.Count - 2).Columns(6)), _ DelimiterName(Asc(DelimiterChar)), Encoding) Dim wsData As Worksheet 'MFormula = PQMCsvFormula(FullName, Encoding, DelimiterChar) ' always ends with PQCsvTable PQDelimitedText MFormula & vbCr & vbLf & "in PQCsvTable", wsData End If End Sub Sub InferContentType(ByVal FullName As String, ByRef Encoding As Long, ByRef DelimiterChar As String) Dim wsInferContentType As Worksheet Dim QueryName As String, MFormula As String, WBQ As WorkbookQuery 'This gives Name and Value where Name is Content.Type, Content.Encoding, Csv.PotentialDelimiters 'Content.Encoding is returned only when detected, eg UTF-8 BOM (65001) and UTF-16 (1200) 'https://blog.crossjoin.co.uk/2018/04/08/the-binary-infercontenttype-m-function/ MFormula = "let Source = File.Contents(""" & FullName & """), info = Binary.InferContentType(Source) in info" QueryName = "InferContentType" Set wsInferContentType = ActiveWorkbook.Worksheets.Add wsInferContentType.name = Left$(QueryName & Int(Time() * 10000000000#), 31) Set WBQ = wsInferContentType.Parent.Queries.Add(name:=QueryName, Formula:=MFormula) ' name and formula required, Description optional WBQueryToListTable WBQ, wsInferContentType WBQ.Delete ' not needed any more Set WBQ = Nothing Dim lMaxNonEmptyColumns As Long ' row 2 should always be "Content.Type" but it may not have been able to infer the encoding 'text/csv or text/tab-separated-values If wsInferContentType.Cells(2, 1).Value = "Content.Type" Then Select Case wsInferContentType.Cells(2, 2).Value Case "text/tab-separated-values" DelimiterChar = vbTab Case Else ' eg "text/csv". There is no text/semicolon result. DelimiterChar = "," End Select End If If wsInferContentType.Cells(3, 1).Value = "Content.Encoding" Then Encoding = wsInferContentType.Cells(3, 2) ' eg 65001 or 1200 End If Dim PotentialDelimiter As String, wsPotentialDelimiters As Worksheet ' Check that decided above vs this list ' This returns Csv.PotentialDelimiters as a table PotentialDelimiter, QuoteStyle, MaxColumns, NonEmptyColumns, MaxRows, NonEmptyRows MFormula = "let Source = File.Contents(""" & FullName & """), info = Binary.InferContentType(Source)[Csv.PotentialDelimiters] in info" QueryName = "PotentialDelimiters" ' period is not allowed in Query names Set wsPotentialDelimiters = ActiveWorkbook.Worksheets.Add wsPotentialDelimiters.name = Left$(QueryName & Int(Time() * 10000000000#), 31) Set WBQ = wsPotentialDelimiters.Parent.Queries.Add(name:=QueryName, Formula:=MFormula) ' name and formula required, Description optional WBQueryToListTable WBQ, wsPotentialDelimiters If wsPotentialDelimiters.Cells(1, 4) = "NonEmptyColumns" Then ' it worked ' the DelimiterChar inference could probably be done in M code 'PotentialDelimiters col. D is NonEmptyColumns With wsPotentialDelimiters.UsedRange lMaxNonEmptyColumns = Application.Max(.Columns(4)) PotentialDelimiter = .Cells(Application.Match(lMaxNonEmptyColumns, .Columns(4), 0), 1).Value If Not DelimiterChar = PotentialDelimiter Then If MessageBox("Content.Type is " & wsInferContentType.Cells(2, 2).Value _ & ", delimiter is " & DelimiterName(Asc(DelimiterChar)) _ & vbLf & "but the potential delimiter identified is " _ & DelimiterName(Asc(PotentialDelimiter)) _ & vbLf & "Click Yes to use the " & DelimiterName(Asc(PotentialDelimiter)) _ & vbLf & "No to use the " & DelimiterName(Asc(DelimiterChar)) _ , vbYesNo, "Which delimiter to use?") = vbYes Then DelimiterChar = PotentialDelimiter End If End If End With ' add a column to show "PotentialDelimiter" code for info wsPotentialDelimiters.UsedRange.Columns(7).FormulaR1C1 = "=CODE(RC1)" wsPotentialDelimiters.Cells(1, 7) = "Code" wsPotentialDelimiters.UsedRange.AutoFilter Field:=2, Criteria1:="1" ' only for QuoteStyle=1 End If WBQ.Delete ' not needed any more Set WBQ = Nothing End Sub 'https://excel.uservoice.com/forums/274580-excel-for-the-web/suggestions/42402955-power-query-metadata-in-vba Sub TableProfile(ByVal MFormula As String, wsProfile As Worksheet) 'https://www.myonlinetraininghub.com/table-statistics-from-table-profile-in-power-query 'https://docs.microsoft.com/en-us/powerquery-m/table-profile ' MFormula ends in ", PQCsvTable = the table", so need to add " in " Dim QueryName As String, WBQ As WorkbookQuery QueryName = "TableProfile" MFormula = MFormula & vbCr & vbLf & " ,TProfile=Table.Profile(PQCsvTable) " _ & vbCr & vbLf & " in TProfile" If wsProfile Is Nothing Then Set wsProfile = ActiveWorkbook.Worksheets.Add wsProfile.name = Left$(QueryName & Int(Time() * 10000000000#), 31) End If Set WBQ = wsProfile.Parent.Queries.Add(name:=QueryName, Formula:=MFormula) ' name and formula required, Description optional WBQueryToListTable WBQ, wsProfile WBQ.Delete ' not needed any more Set WBQ = Nothing End Sub Function PQMCsvFormula(ByVal FullName As String, Optional ByVal Encoding As Long, Optional ByVal DelimiterChar As String, _ Optional ByVal HasHeaders As Boolean = True, Optional ByVal ColumnsType As String) Dim MFormula As String, ResultName As String 'https://docs.microsoft.com/en-us/powerquery-m/csv-document 'Columns: Can be null, the number of columns, a list of column names, or a table type. 'When not specified, the number of columns will be determined by what is found in the input. ' I assume All Columns are wanted 'QuoteStyle.None (default): All line breaks are treated as the end of the current row, even when they occur inside a quoted value. 'QuoteStyle.Csv: Quoted line breaks are treated as part of the data, not as the end of the current row. ' I assume quoted line breaks 'The text encoding of the file. Default: 65001 UTF-8 will only be inferred if it starts with a UTF-8 BOM. 'https://blog.crossjoin.co.uk/2018/03/09/an-in-depth-look-at-the-csv-document-m-function/ ' tab-separated-values extension does not have to be .tab, .tsv works too MFormula = "let Source = Csv.Document(File.Contents(""" & FullName & """), [QuoteStyle=QuoteStyle.Csv" 'Csv.Document QuoteStyle.None (default) so always specify .Csv here to handle line breaks within quotes. If Not Encoding = 0 Then MFormula = MFormula & ", Encoding=" & Encoding ' CSV.Document default is 65001 (TextEncoding.UTF8) End If If Not DelimiterChar = vbNullString Then 'to use a tab character as DelimiterChar you need to use the text “#(tab)” If DelimiterChar = vbTab Then DelimiterChar = "#(tab)" End If MFormula = MFormula & ", Delimiter=""" & DelimiterChar & """" ' CSV.Document default DelimiterChar is comma End If MFormula = MFormula & "])" ResultName = "Source" If HasHeaders Then MFormula = MFormula & vbCr & vbLf & " ,PromotedHeaders = Table.PromoteHeaders(Source, [PromoteAllScalars=true])" ResultName = "PromotedHeaders" End If If Not ColumnsType = vbNullString Then ' eg "text", "number" etc. Default is all text MFormula = MFormula & vbCr & vbLf & " ,ChangedTypes = Table.TransformColumnTypes(PromotedHeaders, " _ & "List.Transform( Table.ColumnNames(PromotedHeaders), each {_, type " & ColumnsType & "}) )" ResultName = "ChangedTypes" End If PQMCsvFormula = MFormula & " ,PQCsvTable = " & ResultName ' standard name to finish with End Function Sub PQDelimitedText(ByVal MFormula As String, wsData As Worksheet) Dim QueryName As String, WBQ As WorkbookQuery QueryName = "Q" & Int(Now() * 10000000000#) ' Q441937377430556 If wsData Is Nothing Then Set wsData = ActiveWorkbook.Worksheets.Add wsData.name = QueryName End If Set WBQ = wsData.Parent.Queries.Add(name:=QueryName, Formula:=MFormula) ' name and formula required, Description optional ' that launches the process Microsoft.Mashup.Container.Loader.exe WBQueryToSheet WBQ, wsData 'WBQueryToListTable WBQ, wsData ' alternative style of output wsData.Rows(1).RowHeight = 120 Dim shp As Shape Set shp = wsData.Shapes.AddTextbox(Orientation:=msoTextOrientationHorizontal, Left:=2, Top:=2, Width:=900, Height:=100) shp.TextFrame.Characters.text = WBQ.Formula WBQ.Delete ' not needed any more Set WBQ = Nothing ' but Microsoft.Mashup.Container.Loader.exe remains running End Sub Sub WBQueryToSheet(WBQ As WorkbookQuery, wsData As Worksheet) Dim strConnection As String, strCommand As String strConnection = "OLEDB;Provider=Microsoft.Mashup.OleDb.1;Data Source=$Workbook$;Location=""" & WBQ.name & """" strCommand = "SELECT * FROM [" & WBQ.name & "]" ' need [square brackets], not `backticks` QueryTableConnect strConnection, strCommand, wsData, Empty, vbNullString, 0 End Sub Sub WBQueryToListTable(WBQ As WorkbookQuery, wsData As Worksheet) Dim ListTable As ListObject, qt As QueryTable Set ListTable = wsData.ListObjects.Add(SourceType:=0, Source:= _ "OLEDB;Provider=Microsoft.Mashup.OleDb.1;Data Source=$Workbook$;Location=""" & WBQ.name & """" _ , Destination:=wsData.Range("$A$1")) 'why bother with empty: ;Extended Properties=""""" ' .Name = "Table_ExternalData_1" ' QueryTable object provides a link for the ListObject object to the list server. Set qt = ListTable.QueryTable 'https://docs.microsoft.com/en-us/office/vba/api/excel.querytable 'If you import data by using the user interface, data from a web query or a text query is imported _ as a QueryTable object, while all other external data is imported as a ListObject object. 'If you import data by using the object model, data from a web query or a text query must be imported as a QueryTable, _ while all other external data can be imported as either a ListObject or a QueryTable. With qt 'Name undefined for PQ .AdjustColumnWidth = True '.FieldNames undefined for PQ ListTable .CommandType = xlCmdSql ' The default value is xlCmdSQL=2 .CommandText = Array("SELECT * FROM [" & WBQ.name & "]") ' need [square brackets], not `backticks` .RowNumbers = False .FillAdjacentFormulas = False .PreserveFormatting = True .RefreshOnFileOpen = False .BackgroundQuery = False ' True 'BackgroundQuery True if queries for the query table are performed asynchronously (in the background) '.Refreshing=True if there is a background query in progress for the specified query table. .RefreshStyle = xlInsertDeleteCells .SavePassword = False .SaveData = True 'SaveData True if data for the QueryTable report is saved with the workbook. _ False if only the report definition is saved. .RefreshPeriod = 0 .PreserveColumnInfo = True 'PreserveColumnInfo True if column sorting, filtering, and layout information is preserved _ whenever a query table is refreshed. The default value is True. '.ListObject.DisplayName = QueryName 'Default Table_ExternalData_1 can't use numerics 441937399421296 as name 'The QueryTable whose parent in a ListObject doesn't have a Name property. .Refresh BackgroundQuery:=False .MaintainConnection = False ' will this remove connection at end? No. 'MaintainConnection: True if the connection to the specified data source is maintained after the refresh _ and until the workbook is closed. The default value is True. 'You can set the MaintainConnection property only if the QueryType _ property of the query table or PivotTable cache is set to xlOLEDBQuery. 'Setting the property to False causes an open connection to be closed. ' .Parameters is empty End With ' the order of these deletions matters, otherwise the file can be left in use. qt.WorkbookConnection.Delete ' not needed any more qt.Delete ' not needed any more Set qt = Nothing ListTable.Unlist ' convert to plain range, no autofilter Set ListTable = Nothing 'Debug.Print ThisWorkbook.Queries.Count, ThisWorkbook.Connections.Count, wsData.ListObjects.Count End Sub