Welcome to OGeek Q&A Community for programmer and developer-Open, Learning and Share
Welcome To Ask or Share your Answers For Others

Categories

0 votes
864 views
in Technique[技术] by (71.8m points)

json - Extracting fields from a javascript section of an HTML document into tables? Geographic coordinates

I have an HTML document that contains geographic information within a block of Javascript. It is the source code from this web page: https://energy.ehawaii.gov/epd/public/energy-projects-map.html

This can be viewed as a map and also as a list.

What I want to achieve is to have that list in Excel, but with a field for "Latitude" and a field for "Longitude". The Google Maps marker specifies the LatLng in the Javascript.

How can I use something like VB to process the source code of the HTML file, and organize into a table that has the following fields/columns:

  • Description (from the <a ... title="such and such">)
  • Technology (from <p>Technology: Solar</p> for example)
  • Latitude (from google.maps.LatLng(latitude, longitude);
  • Longtitude (from the same code line as latitude, but using the second variable)?

All help appreciated!

See Question&Answers more detail:os

与恶龙缠斗过久,自身亦成为恶龙;凝视深渊过久,深渊将回以凝视…
Welcome To Ask or Share your Answers For Others

1 Reply

0 votes
by (71.8m points)

Try this VBScript solution based on XMLHTTP requests. Just copy the code below, paste to text file, save it as .vbs and run it. Script hasn't been optimized, all requests are not async, so it takes about 40 seconds on my PC to get all data.

Option Explicit
Dim arrCells(), arrList, arrTmp, sRespHeaders, sRespText, arrSetHeaders, i, j, iTotal, oApp, oWB, oWS, oOutput

' Create output window
Output oOutput

' Get cookies
oOutput.write "Get cookies"
XmlHttpGet "https://energy.ehawaii.gov/epd/public/energy-projects-map.html", Array(), sRespHeaders, sRespText

ParseResponse "^Set-(Cookie): ([S]*?=[S]*?);[sS]*?$", sRespHeaders, arrSetHeaders

' Get project list
oOutput.write "Get project list"
arrList = Array()
XmlHttpGet "https://energy.ehawaii.gov/epd/public/energy-projects-list.json?sEcho=2&iColumns=5&sColumns=&iDisplayStart=1&iDisplayLength=0&mDataProp_0=0&mDataProp_1=1&mDataProp_2=2&mDataProp_3=3&mDataProp_4=4&sSearch=&bRegex=false&sSearch_0=&bRegex_0=false&bSearchable_0=true&sSearch_1=&bRegex_1=false&bSearchable_1=true&sSearch_2=&bRegex_2=false&bSearchable_2=true&sSearch_3=&bRegex_3=false&bSearchable_3=true&sSearch_4=&bRegex_4=false&bSearchable_4=true&iSortCol_0=0&sSortDir_0=asc&iSortingCols=1&bSortable_0=true&bSortable_1=true&bSortable_2=true&bSortable_3=true&bSortable_4=true", arrSetHeaders, "", sRespText
ParseProjects sRespText, arrList, iTotal
oOutput.write "Get project list: " & (UBound(arrList) + 1) & " of " & iTotal

' Rearrange to 2-dimensional array, get LatLng
ReDim arrCells(UBound(arrList), 8) ' Name, Technology, Island, Capacity, Location, RID, Type, Lat, Lng
For i = 0 To UBound(arrList)
    For j = 0 To 6
        arrCells(i, j) = arrList(i)(j)
    Next
    oOutput.write "Get LatLng: " & (i + 1) & " of " & iTotal
    arrTmp = RequestLatLng(arrList(i)(5))
    arrCells(i, 7) = arrTmp(0)
    arrCells(i, 8) = arrTmp(1)
Next

' Create Excel worksheet, output data
oOutput.write "Export to Excel"
Set oApp = CreateObject("Excel.Application")
oApp.Visible = True
Set oWB = oApp.Workbooks.Add(-4167) ' xlWBATWorksheet
Set oWS = oWB.Worksheets(1)
oWS.Range(oWS.Cells(1, 1), oWS.Cells(UBound(arrCells) + 1, 9)).Value = arrCells
oWS.Columns.AutoFit
oWB.Saved = True
oOutput.write "Completed"

Sub XmlHttpGet(sQuery, arrSetHeaders, sRespHeaders, sRespText)
    Dim arrHeader
    With CreateObject("MSXML2.ServerXMLHTTP")
        .SetOption 2, 13056 ' SXH_SERVER_CERT_IGNORE_ALL_SERVER_ERRORS
        .Open "GET", sQuery, False
        For Each arrHeader In arrSetHeaders
            .SetRequestHeader arrHeader(0), arrHeader(1)
        Next
        .Send ""
        sRespHeaders = .GetAllResponseHeaders
        sRespText = .ResponseText
    End With
End Sub

Sub ParseResponse(sPattern, sResponse, aData)
    Dim oMatch, aTmp, sSubMatch
    aData = Array()
    With CreateObject("VBScript.RegExp")
        .Global = True
        .MultiLine = True
        .Pattern = sPattern
        For Each oMatch In .Execute(sResponse)
            If oMatch.SubMatches.Count = 1 Then
                PushItem aData, oMatch.SubMatches(0)
            Else
                aTmp = Array()
                For Each sSubMatch In oMatch.SubMatches
                    PushItem aTmp, sSubMatch
                Next
                PushItem aData, aTmp
            End If
        Next
    End With
End Sub

Sub PushItem(aList, vItem)
    ReDim Preserve aList(UBound(aList) + 1)
    aList(UBound(aList)) = vItem
End Sub

Sub ParseProjects(sJson, arrProj, iTotalRecords)
    Dim i, q
    With CreateObject("htmlfile")
        With .parentwindow
            .execscript ";", "jscript"
            .eval ("json = " & sJson & ";")
            iTotalRecords = CInt(.json.iTotalRecords)
            Do While .json.aaData.Length
                ReDim Preserve arrProj(UBound(arrProj) + 1)
                With .json.aaData.Shift()
                    arrProj(UBound(arrProj)) = Array(.Shift(), .Shift(), .Shift(), .Shift(), .Shift(), .Shift(), .Shift())
                End With
            Loop
        End With
    End With
End Sub

Function RequestLatLng(sRid)
    Dim sRespText, arrTmp, sTmp
    XmlHttpGet "https://energy.ehawaii.gov/epd/public/energy-project-details.html?rid=" & sRid, Array(), "", sRespText
    arrTmp = Split(sRespText, "google.maps.LatLng(")
    If UBound(arrTmp) >= 1 Then
        sTmp = arrTmp(1)
        arrTmp = Split(sTmp, "),")
        If UBound(arrTmp) >= 1 Then
            RequestLatLng = Split(arrTmp(0), ", ")
            Exit Function
        End If
    End If
    RequestLatLng = Array("#", "#")
End Function

Sub Output(oWnd)
    Set oWnd = ShowWindow("energy.ehawaii.gov", "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAWIAAAB2CAYAAADybJlDAAAACXBIWXMAAC4jAAAuIwF4pT92AAAAIGNIUk0AAHolAACAgwAA+f8AAIDpAAB1MAAA6mAAADqYAAAXb5JfxUYAAAUjSURBVHja7N05ztxGEIBRjqHciZ34Bn3/w9QNnPsE49QwtPwc9lLd9V4kAdKwyQE+Fjnb6/1+XwCs85tDACDEAEIMgBADCDEAQgwgxAAIMYAQAyDEAEIMgBADCDEAQgwgxACM980hgD28Xq/Td7Hd/PdxzHPri+FBiBPH9hPbBVqIQYh3D+/2YRZiEOIT47tVmIUYhLhCfFMHWYhBiKsFOF2UhRiEuHKAUwRZiEGIBXhxkIUYhFiAFwdZiEGIBXhxjIUYhDhLhCP5OocFWYhBiGeHLaxdiEGI54YsNljj0n0SYhDiEYGLhGtKG2MhBiHOFuCdXiDsEmMhhtoh7hW9SLKOLWMsxFA3xD3iVznA3Y6FEEPNED8NoAB3PC5CDPVCvDLCFT4gcvv4CDHUCvGTEFYKcMw8Vn48FOpYEeG28RQ87cRjIoYaE/GnMdxpCo7O2336eH9f1/WPEIMQr4jwyo8Zt4HbaqOOoRCDEGePcCzc9pQYCzGcHeJZER45ia6ewmP0MfViHZxrxwjHle9XltvoYyTEIMIZIvwkwDNvvfT6/0IMpInw0wn4SYTj4Xa63T4RYjANr4hwj1sQKybhIc+NEIMIz4xwr3vAvSI8eyo2EQNLAphhGo1Ej92EGESyW0AWhbD3fkaHbT86lkIMNc2KcO+3ox3500lCDPWm4ZkRXjnx31lDTDgeP3wcIYZaEZ712Bk+lBG7bFOIoZbYdBsz3g0SC/a/CTGw6lZAlok/xZqEGGrFsg163FW3AnqvY8lULMQg3pkj3JKsYyghhjrT8KhL66oRbr3WI8Qg5OUm0Gz7JMQgxhmDdcoJ4UvrEmKoF9AfPVbbdN+2n8qFGNg5fkfcGhFiIFP82mb72Ho8lhDD+dOrqdxEDCyc6mLAY54+lQsxkDrspvwBx06IQUDtj4kYGDA5xnf+Hpvv07GEGEzHd4LZO5qVXqALIQZ6xaxtuu602xNiYNV03A46FkIM7BuhpNPwVEIM50VvxddBjvqC+hKEGAQ9++PG6U+IEAOzp+M7wS7xHmghBnaajk3EgEA+nI4rT8NCDNy+hO4VQtPxL3xzCIAvTKWz3zOcYRr+a9ZaTcSA2wTf9/usDQkxcCfGMWk7pQgx1PGHUAoxsNafHUM7ajouGXkhBjKFc8RXbQoxUCLGgizEgCB33+60KwIhBgR5ceB9oAPYNWTHvLBnIoa9Js2el9UnRPmIfRViYNol+CZBnh53tyaAE+O/1ScAhRgQ5cWEGBgZwa9OlW3SeqLj2rsdF/eIQQxXif/9OSYdlx73lLuuVYhhL9kvs3sELiau9b9RXnbyEmIg6wln5kln6U84CTGcK/Ptibjx72ZHefpz4cU6YJfwx6Tt3NnPLicIIYY9L9vb5us/Icg/W8OtfRRiOH9KPfXL1mPiND706kCIgenhKRblX/JiHdS5vM/wpUGzPnq81Qt8Qgym1dNPWOmjLMRQayquHPq0URZiMBVXO3GYiIEtYnjy7Yxs+xZCDDXCOipAuwW7fXB8h0/y3r4GZDthZIrwz9bfeh0XIYYzIvfVKNyJx0kfBmmDjnuXbbk1AZzukwjHhG2FEINL/4xTZpUIm4hBjKfGdtXtjZZ4WyHEwOk+jfCSk4YQg6k402S5Y4Tb0+0IMTAyPpF0XWkiLMRgKj5lKn7yy8yx+pgIMYhxlel8dYRDiEGMT5uK23VAhIUYqDgFp4rwdV3X6/1+e0phA6/Xa7dJ9rT3NQ+7B/0vAAAA//8DAERsQ7O6796eAAAAAElFTkSuQmCC", 354, 118)
End Sub

Function ShowWindow(sTitle, sBG, iWidth, iHeight)
    Set ShowWindow = CreateWindow()
    With ShowWindow
        With .document
            .title = sTitle
            .getElementsByTagName("head")(0).appendChild .createElement("style")
            .styleSheets(0).cssText = "* {font: 8pt tahoma; margin: 5px;}"
            .body.style.background = "buttonface"
            .body.style.backgroundRepeat = "no-repeat"
            .body.style.backgroundImage = "url(" & sBG & ")"
            .body.innerHTML = ""
        End With
        .resizeTo .screen.availWidth, .screen.availHeight
        .resizeTo iWidth + .screen.availWidth - .document.body.offsetWidth, iHeight + .screen.availHeight - .document.body.offsetHeight
        .moveTo CInt((.screen.availWidth - iWidth) / 2), CInt((.screen.availHeight - iHeight) / 2)
        .execScript "var handlers, thunks = {body_onunload: function() {handlers.WSHQuit()}};"
        Execute "Class clsHandlers: Public Sub WSHQuit(): WScript.Quit: End Sub: End Class"
        Set .handlers = New clsHandlers
        Set .document.body.onunload = .thunks.body_onunload
        .execScript "var write = function(t) {document.body.innerHTML = t};"
    End With
End Function

Function CreateWindow()
    ' source http://forum.script-coding.com/viewtopic.php?pid=75356#p75356
    Dim sSignature, oShellWnd, oProc
    On Error Resume Next
    sSignature = Left(CreateObject("Scriptlet.TypeLib").Guid, 38)
    Do
        Set oProc = CreateObject("WScript.Shell").Exec("mshta ""about:<head><script>moveTo(-32000,-32000);document.title=' '</script><hta:application id=app border=thick minimizebutton=no maximizebutton=no scroll=no showintaskbar=yes contextmenu=no selection=no innerborder=no icon=""""/><object id='shell' classid='clsid:8856F961-340A-11D0-A96B-00C04FD705A2'><param name=RegisterAsBrowser value=1></object><script>shell.putproperty('" & sSignature & "',document.parentWindow);</script></head>""")
        Do
            If oProc.Status > 0 Then Exit Do
            For Each oShellWnd In CreateObject("Shell.Application").Windows
                Set CreateWindow = oShellWnd.GetProperty(sSignature)
                If Err.Number = 0 Then Exit Function
                Err.Clear
            Next
        Loop
    Loop
End Function

与恶龙缠斗过久,自身亦成为恶龙;凝视深渊过久,深渊将回以凝视…
OGeek|极客中国-欢迎来到极客的世界,一个免费开放的程序员编程交流平台!开放,进步,分享!让技术改变生活,让极客改变未来! Welcome to OGeek Q&A Community for programmer and developer-Open, Learning and Share
Click Here to Ask a Question

...