fix html
First I would remark that you are misusing dl
. From the MDN docs -
The HTML <dl>
element represents a description list. The element encloses a list of groups of terms (specified using the <dt>
element) and descriptions (provided by <dd>
elements) ...
Here's what the correct use of dl
, dt
, and dd
would look like -
<dl>
<dt>Title 1</dt>
<dd>
<dl>
<dt>Title 1.1</dt>
<dd><a href="#">Item 1.1.1</a></dd>
<dd><a href="#">Item 1.1.2</a></dd>
</dl>
</dd>
<dd><a href="#">Item 1.2</a></dd>
<dd><a href="#">Item 1.3</a></dd>
<dd><a href="#">Item 1.4</a></dd>
<dd><a href="#">Item 1.5</a></dd>
<dd>
<dl>
<dt>Title 1.6</dt>
<dd><a href="#">Item 1.6.1</a></dd>
<dd><a href="#">Item 1.6.2</a></dd>
</dl>
</dd>
<dd><a href="#">Item 1.7</a></dd>
</dl>
Notice it matches the expected shape of your output -
{
"title": "Title 1",
"children": [
{
"title": "Title 1.1",
"children": [
{"title": "Item 1.1.1"},
{"title": "Item 1.1.2"}
]
},
{"title": "Item 1.2"},
{"title": "Item 1.3"},
{"title": "Item 1.4"},
{"title": "Item 1.5"},
{
"title": "Title 1.6",
"children": [
{"title": "Item 1.6.1"},
{"title": "Item 1.6.2"}
]
},
{"title": "Item 1.7"}
]
}
fromHtml
If you are not willing (or able) to change the input html as described above, please see Scott's wonderful answer. To write a program for the proposed html, I would break it into two parts. First we write fromHtml
with a simple recursive form -
function fromHtml (e)
{ switch (e?.tagName)
{ case "DL":
return Array.from(e.childNodes, fromHtml).flat()
case "DD":
return [ Array.from(e.childNodes, fromHtml).flat() ]
case "DT":
case "A":
return e.textContent
default:
return []
}
}
fromHtml(document.querySelector('dl'))
Which gives us this intermediate format -
[
"Title 1",
[
"Title 1.1",
[ "Item 1.1.1" ],
[ "Item 1.1.2" ]
],
[ "Item 1.2" ],
[ "Item 1.3" ],
[ "Item 1.4" ],
[ "Item 1.5" ],
[
"Title 1.6",
[ "Item 1.6.1" ],
[ "Item 1.6.2" ]
],
[ "Item 1.7" ]
]
applyLabels
Following that, I would write a separate applyLabels
function which adds the title
and children
labels you require -
const applyLabels = ([ title, ...children ]) =>
children.length
? { title, children: children.map(applyLabels) }
: { title }
const result =
applyLabels(fromHtml(document.querySelector('dl')))
{
"title": "Title 1",
"children": [
{
"title": "Title 1.1",
"children": [
{"title": "Item 1.1.1"},
{"title": "Item 1.1.2"}
]
},
{"title": "Item 1.2"},
{"title": "Item 1.3"},
{"title": "Item 1.4"},
{"title": "Item 1.5"},
{
"title": "Title 1.6",
"children": [
{"title": "Item 1.6.1"},
{"title": "Item 1.6.2"}
]
},
{"title": "Item 1.7"}
]
}
I might suggest one final change, which guarantees all nodes in the output have a uniform shape, { title, children }
. It's a change worth noting because in this case applyLabels
is easier to write and it behaves better -
const applyLabels = ([ title, ...children ]) =>
({ title, children: children.map(applyLabels) })
Yes, this means that deepest descendants will have an empty children: []
property, but it makes consuming the data much easier as we don't have to null-check certain properties.
demo
Expand the snippet below to verify the results of fromHtml
and applyLabels
in your own browser -
function fromHtml (e)
{ switch (e?.tagName)
{ case "DL":
return Array.from(e.childNodes, fromHtml).flat()
case "DD":
return [ Array.from(e.childNodes, fromHtml).flat() ]
case "DT":
case "A":
return e.textContent
default:
return []
}
}
const applyLabels = ([ title, ...children ]) =>
children.length
? { title, children: children.map(applyLabels) }
: { title }
const result =
applyLabels(fromHtml(document.querySelector('dl')))
console.log(result)
<dl>
<dt>Title 1</dt>
<dd>
<dl>
<dt>Title 1.1</dt>
<dd><a href="#">Item 1.1.1</a></dd>
<dd><a href="#">Item 1.1.2</a></dd>
</dl>
</dd>
<dd><a href="#">Item 1.2</a></dd>
<dd><a href="#">Item 1.3</a></dd>
<dd><a href="#">Item 1.4</a></dd>
<dd><a href="#">Item 1.5</a></dd>
<dd>
<dl>
<dt>Title 1.6</dt>
<dd><a href="#">Item 1.6.1</a></dd>
<dd><a href="#">Item 1.6.2</a></dd>
</dl>
</dd>
<dd><a href="#">Item 1.7</a></dd>
</dl>