Attribute VB_Name = "PowerQueryText"
'Patrick O'Beirne, Systems Modelling Ltd
'pob at sysmod.com
'http://www.sysmod.com
'http://ie.linkedin.com/in/patrickobeirne
'http://sysmod.wordpress.com
't: @ExcelAnalytics
' Functions to create tables of PowerQuery results from text / csv files
'https://sysmod.wordpress.com/2021/01/19/querying-text-csv-files-in-vba-method-3-power-query/

Option Explicit
Option Private Module

Sub testPQcsv()
   Dim FullName As String, wsInferContentType As Worksheet, wsPotentialDelimiters As Worksheet
   FullName = Worksheets("Info").Range("testFullName").Value ' put a Unicode file name in this cell
   ' or prompt for the file name:
   FullName = GetOpenFilenameStr("All files, *.*", , "Populate a worksheet from Delimited Text")
   If FileExists(FullName) Then
      Worksheets("Info").Range("testFullName").Value = FullName
      Dim Encoding As Long, DelimiterChar As String
      InferContentType FullName, Encoding, DelimiterChar ' creates two sheets with Content Type and Potential Delimiters
      'the max of column 4 "NonEmptyColumns" is the max number of columns to be returned
      'the max of column 5 "MaxRows" is the max number of rows to be returned
      
      Dim MFormula As String, wsProfile As Worksheet
      MFormula = PQMCsvFormula(FullName, Encoding, DelimiterChar)
      TableProfile MFormula, wsProfile ' Creates a profile of field count, max, min
      ' the number of rows in wsProfile is 1 more than the number of columns
      'the max of column 6 "Count" is the max number of rows to be returned
      ' exclude the last two rows with PotentialDelimiter "W", no idea what that is.
      WriteRowArray Worksheets("Info").Range("testFullName").Offset(0, 3), _
         Array(wsProfile.UsedRange.Rows.Count - 1, _
         Application.Max(wsProfile.UsedRange.Resize(wsProfile.UsedRange.Rows.Count - 2).Columns(6)), _
         DelimiterName(Asc(DelimiterChar)), Encoding)
      Dim wsData As Worksheet
      'MFormula = PQMCsvFormula(FullName, Encoding, DelimiterChar)    ' always ends with PQCsvTable
      PQDelimitedText MFormula & vbCr & vbLf & "in PQCsvTable", wsData
   
   End If
End Sub

Sub InferContentType(ByVal FullName As String, ByRef Encoding As Long, ByRef DelimiterChar As String)
   Dim wsInferContentType As Worksheet
   Dim QueryName As String, MFormula As String, WBQ As WorkbookQuery
  
   'This gives Name and Value where Name is Content.Type, Content.Encoding, Csv.PotentialDelimiters
   'Content.Encoding is returned only when detected, eg UTF-8 BOM (65001) and UTF-16 (1200)
   'https://blog.crossjoin.co.uk/2018/04/08/the-binary-infercontenttype-m-function/

   MFormula = "let Source = File.Contents(""" & FullName & """), info = Binary.InferContentType(Source) in info"
   QueryName = "InferContentType"
   Set wsInferContentType = ActiveWorkbook.Worksheets.Add
   wsInferContentType.name = Left$(QueryName & Int(Time() * 10000000000#), 31)
   
   Set WBQ = wsInferContentType.Parent.Queries.Add(name:=QueryName, Formula:=MFormula) ' name and formula required, Description optional
   WBQueryToListTable WBQ, wsInferContentType
   
   WBQ.Delete ' not needed any more
   Set WBQ = Nothing
   Dim lMaxNonEmptyColumns As Long
   ' row 2 should always be "Content.Type" but it may not have been able to infer the encoding
   'text/csv or text/tab-separated-values
   If wsInferContentType.Cells(2, 1).Value = "Content.Type" Then
      Select Case wsInferContentType.Cells(2, 2).Value
         Case "text/tab-separated-values"
            DelimiterChar = vbTab
         Case Else ' eg "text/csv". There is no text/semicolon result.
            DelimiterChar = ","
      End Select
      
   End If

   If wsInferContentType.Cells(3, 1).Value = "Content.Encoding" Then
      Encoding = wsInferContentType.Cells(3, 2) ' eg 65001 or 1200
   End If
   
   Dim PotentialDelimiter As String, wsPotentialDelimiters As Worksheet
   ' Check that decided above vs this list
   ' This returns Csv.PotentialDelimiters as a table PotentialDelimiter, QuoteStyle, MaxColumns, NonEmptyColumns, MaxRows, NonEmptyRows
   MFormula = "let Source = File.Contents(""" & FullName & """), info = Binary.InferContentType(Source)[Csv.PotentialDelimiters]  in info"
   QueryName = "PotentialDelimiters" ' period is not allowed in Query names
   Set wsPotentialDelimiters = ActiveWorkbook.Worksheets.Add
   wsPotentialDelimiters.name = Left$(QueryName & Int(Time() * 10000000000#), 31)
   
   Set WBQ = wsPotentialDelimiters.Parent.Queries.Add(name:=QueryName, Formula:=MFormula) ' name and formula required, Description optional
   WBQueryToListTable WBQ, wsPotentialDelimiters
   If wsPotentialDelimiters.Cells(1, 4) = "NonEmptyColumns" Then ' it worked
      ' the DelimiterChar inference could probably be done in M code
      'PotentialDelimiters col. D is NonEmptyColumns
      With wsPotentialDelimiters.UsedRange
         lMaxNonEmptyColumns = Application.Max(.Columns(4))
         PotentialDelimiter = .Cells(Application.Match(lMaxNonEmptyColumns, .Columns(4), 0), 1).Value
         If Not DelimiterChar = PotentialDelimiter Then
            If MessageBox("Content.Type is " & wsInferContentType.Cells(2, 2).Value _
                  & ", delimiter is " & DelimiterName(Asc(DelimiterChar)) _
               & vbLf & "but the potential delimiter identified is " _
                  & DelimiterName(Asc(PotentialDelimiter)) _
               & vbLf & "Click Yes to use the " & DelimiterName(Asc(PotentialDelimiter)) _
               & vbLf & "No to use the " & DelimiterName(Asc(DelimiterChar)) _
               , vbYesNo, "Which delimiter to use?") = vbYes Then
               DelimiterChar = PotentialDelimiter
            End If
         End If
         
      End With
      ' add a column to show "PotentialDelimiter" code for info
      wsPotentialDelimiters.UsedRange.Columns(7).FormulaR1C1 = "=CODE(RC1)"
      wsPotentialDelimiters.Cells(1, 7) = "Code"
      wsPotentialDelimiters.UsedRange.AutoFilter Field:=2, Criteria1:="1" ' only for QuoteStyle=1
   End If

   WBQ.Delete ' not needed any more
   Set WBQ = Nothing

End Sub


'https://excel.uservoice.com/forums/274580-excel-for-the-web/suggestions/42402955-power-query-metadata-in-vba
Sub TableProfile(ByVal MFormula As String, wsProfile As Worksheet)
   'https://www.myonlinetraininghub.com/table-statistics-from-table-profile-in-power-query
   'https://docs.microsoft.com/en-us/powerquery-m/table-profile
   
   ' MFormula ends in ", PQCsvTable = the table", so need to add " in <the table you return>"
   
   Dim QueryName As String, WBQ As WorkbookQuery
   QueryName = "TableProfile"
   MFormula = MFormula & vbCr & vbLf & "   ,TProfile=Table.Profile(PQCsvTable) " _
      & vbCr & vbLf & " in TProfile"
   If wsProfile Is Nothing Then
      Set wsProfile = ActiveWorkbook.Worksheets.Add
      wsProfile.name = Left$(QueryName & Int(Time() * 10000000000#), 31)
   End If
   Set WBQ = wsProfile.Parent.Queries.Add(name:=QueryName, Formula:=MFormula) ' name and formula required, Description optional
   WBQueryToListTable WBQ, wsProfile
   
   WBQ.Delete ' not needed any more
   Set WBQ = Nothing
      
End Sub

Function PQMCsvFormula(ByVal FullName As String, Optional ByVal Encoding As Long, Optional ByVal DelimiterChar As String, _
      Optional ByVal HasHeaders As Boolean = True, Optional ByVal ColumnsType As String)
   Dim MFormula As String, ResultName As String
 
   'https://docs.microsoft.com/en-us/powerquery-m/csv-document
   
   'Columns: Can be null, the number of columns, a list of column names, or a table type.
   'When not specified, the number of columns will be determined by what is found in the input.
   ' I assume All Columns are wanted
   
   'QuoteStyle.None (default): All line breaks are treated as the end of the current row, even when they occur inside a quoted value.
   'QuoteStyle.Csv: Quoted line breaks are treated as part of the data, not as the end of the current row.
   ' I assume quoted line breaks
   
   'The text encoding of the file. Default: 65001 UTF-8 will only be inferred if it starts with a UTF-8 BOM.
   'https://blog.crossjoin.co.uk/2018/03/09/an-in-depth-look-at-the-csv-document-m-function/
   ' tab-separated-values extension does not have to be .tab, .tsv works too
   MFormula = "let Source = Csv.Document(File.Contents(""" & FullName & """), [QuoteStyle=QuoteStyle.Csv"
   'Csv.Document QuoteStyle.None (default) so always specify .Csv here to handle line breaks within quotes.
   
   If Not Encoding = 0 Then
      MFormula = MFormula & ", Encoding=" & Encoding ' CSV.Document default is 65001 (TextEncoding.UTF8)
   End If
   
   If Not DelimiterChar = vbNullString Then
      'to use a tab character as DelimiterChar you need to use the text “#(tab)”
      If DelimiterChar = vbTab Then
         DelimiterChar = "#(tab)"
      End If
      MFormula = MFormula & ", Delimiter=""" & DelimiterChar & """" ' CSV.Document default DelimiterChar is comma
   End If
   MFormula = MFormula & "])"
    
   ResultName = "Source"
   If HasHeaders Then
      MFormula = MFormula & vbCr & vbLf & "   ,PromotedHeaders = Table.PromoteHeaders(Source, [PromoteAllScalars=true])"
      ResultName = "PromotedHeaders"
   End If
   
   If Not ColumnsType = vbNullString Then ' eg "text", "number" etc. Default is all text
      MFormula = MFormula & vbCr & vbLf & "   ,ChangedTypes = Table.TransformColumnTypes(PromotedHeaders, " _
         & "List.Transform( Table.ColumnNames(PromotedHeaders), each {_, type " & ColumnsType & "})  )"
      ResultName = "ChangedTypes"
   End If
   
   PQMCsvFormula = MFormula & "   ,PQCsvTable = " & ResultName ' standard name to finish with

End Function

Sub PQDelimitedText(ByVal MFormula As String, wsData As Worksheet)

   Dim QueryName As String, WBQ As WorkbookQuery
   QueryName = "Q" & Int(Now() * 10000000000#) ' Q441937377430556
   If wsData Is Nothing Then
      Set wsData = ActiveWorkbook.Worksheets.Add
      wsData.name = QueryName
   End If
   Set WBQ = wsData.Parent.Queries.Add(name:=QueryName, Formula:=MFormula)  ' name and formula required, Description optional
   ' that launches the process Microsoft.Mashup.Container.Loader.exe
   WBQueryToSheet WBQ, wsData
   'WBQueryToListTable WBQ, wsData ' alternative style of output
   
   wsData.Rows(1).RowHeight = 120
   Dim shp As Shape
   Set shp = wsData.Shapes.AddTextbox(Orientation:=msoTextOrientationHorizontal, Left:=2, Top:=2, Width:=900, Height:=100)
   shp.TextFrame.Characters.text = WBQ.Formula
   
   WBQ.Delete ' not needed any more
   Set WBQ = Nothing
   ' but Microsoft.Mashup.Container.Loader.exe remains running
End Sub

Sub WBQueryToSheet(WBQ As WorkbookQuery, wsData As Worksheet)
   Dim strConnection As String, strCommand As String
   strConnection = "OLEDB;Provider=Microsoft.Mashup.OleDb.1;Data Source=$Workbook$;Location=""" & WBQ.name & """"
   strCommand = "SELECT * FROM [" & WBQ.name & "]"  ' need [square brackets], not `backticks`
   QueryTableConnect strConnection, strCommand, wsData, Empty, vbNullString, 0
End Sub

Sub WBQueryToListTable(WBQ As WorkbookQuery, wsData As Worksheet)
   Dim ListTable As ListObject, qt As QueryTable
   Set ListTable = wsData.ListObjects.Add(SourceType:=0, Source:= _
      "OLEDB;Provider=Microsoft.Mashup.OleDb.1;Data Source=$Workbook$;Location=""" & WBQ.name & """" _
      , Destination:=wsData.Range("$A$1"))  'why bother with empty: ;Extended Properties="""""
   ' .Name = "Table_ExternalData_1"
   ' QueryTable object provides a link for the ListObject object to the list server.
   Set qt = ListTable.QueryTable

   'https://docs.microsoft.com/en-us/office/vba/api/excel.querytable
   'If you import data by using the user interface, data from a web query or a text query is imported _
    as a QueryTable object, while all other external data is imported as a ListObject object.
   'If you import data by using the object model, data from a web query or a text query must be imported as a QueryTable, _
    while all other external data can be imported as either a ListObject or a QueryTable.
   With qt
      'Name undefined for PQ
      .AdjustColumnWidth = True
      '.FieldNames undefined for PQ ListTable
      .CommandType = xlCmdSql ' The default value is xlCmdSQL=2
      .CommandText = Array("SELECT * FROM [" & WBQ.name & "]") ' need [square brackets], not `backticks`
      .RowNumbers = False
      .FillAdjacentFormulas = False
      .PreserveFormatting = True
      .RefreshOnFileOpen = False
      .BackgroundQuery = False ' True
      'BackgroundQuery True if queries for the query table are performed asynchronously (in the background)
      '.Refreshing=True if there is a background query in progress for the specified query table.
      .RefreshStyle = xlInsertDeleteCells
      .SavePassword = False
      .SaveData = True
      'SaveData True if data for the QueryTable report is saved with the workbook. _
       False if only the report definition is saved.
      .RefreshPeriod = 0
      .PreserveColumnInfo = True
      'PreserveColumnInfo  True if column sorting, filtering, and layout information is preserved _
       whenever a query table is refreshed. The default value is True.
      '.ListObject.DisplayName = QueryName  'Default Table_ExternalData_1 can't use numerics 441937399421296 as name
      'The QueryTable whose parent in a ListObject doesn't have a Name property.
      .Refresh BackgroundQuery:=False
      .MaintainConnection = False ' will this remove connection at end? No.
      'MaintainConnection: True if the connection to the specified data source is maintained after the refresh _
       and until the workbook is closed. The default value is True.
      'You can set the MaintainConnection property only if the QueryType _
       property of the query table or PivotTable cache is set to xlOLEDBQuery.
      'Setting the property to False causes an open connection to be closed.
      ' .Parameters is empty
   End With
   ' the order of these deletions matters, otherwise the file can be left in use.
   qt.WorkbookConnection.Delete ' not needed any more
   qt.Delete ' not needed any more
   Set qt = Nothing
   ListTable.Unlist ' convert to plain range, no autofilter
   Set ListTable = Nothing
   
   'Debug.Print ThisWorkbook.Queries.Count, ThisWorkbook.Connections.Count, wsData.ListObjects.Count

End Sub