javascript - extract all css classes from html text - Stack Overflow

admin2025-04-20 1

I have an html as a text string for example

var html="bla<p class="  c1 c2">blabla<button></button><div id="bla" class="c1   c3 "></div>"

I want to extract all unique classes into an array. So I want a result such as:

classes=['c1','c2','c3']

I tried to use regexp but could not figure out how to group and collect all the individual classes, and also how to then extract only unique ones into the array ?

Note - I need a plain javascript (no jquery) solution pls !

I have an html as a text string for example

var html="bla<p class="  c1 c2">blabla<button></button><div id="bla" class="c1   c3 "></div>"

I want to extract all unique classes into an array. So I want a result such as:

classes=['c1','c2','c3']

I tried to use regexp but could not figure out how to group and collect all the individual classes, and also how to then extract only unique ones into the array ?

Note - I need a plain javascript (no jquery) solution pls !

Share Improve this question edited Oct 8, 2015 at 23:03 asked Oct 8, 2015 at 22:57 kofifus 19.4k23 gold badges118 silver badges186 bronze badges

Add a ment |

7 Answers 7

Sorted by: Reset to default 3

You can do this way:

var html = "bla<p class='c1 c2'>blabla<button></button><div id='bla' class='c1 c3'></div>";

var classes = []; // empty array

html.replace(/class=['"][^'"]+/g, function(m){ // https://regex101./r/jD0wX1/1
    classes = classes.concat(m.match(/[^'"]+$/)[0].split(' ')); // https://regex101./r/jD0wX1/2
}); // take all classes

classes = classes.filter(function(item, pos) {
    return classes.indexOf(item) == pos;
}); // return unique classes

console.log(classes); // Run code snippet -> then press f12 to see the array

Hope it helps.

This isn't the prettiest solution, but here goes.

First split the string into parts

var parts = html.split(/class=/);
var classes = [];
for (var i = 0; i < parts.length-1; i++) {
    classes[i] = parts[i+1].split(/\'>/)[0].replace(/\'/).trim();
}

I created a regex for you, play with it, it serves for javascript and php regex, hope it helps: https://regex101./r/hR5mM0/4, just use it with javascript:

var re = /class="(.*?)"/g; 
var str = 'bla<p class="c1 c2">blabla<button></button><div id="bla" class="c1 c3"></div>';
var m;
var arr = []; 
while ((m = re.exec(str)) !== null) {
    arr = arr.concat(arr, m[1].split(" ") );
}
arr = arr.filter(function(item, pos, self) {
    return self.indexOf(item) == pos;
});

console.log(arr); //["c1", "c2", "c3"]

here is the fiddle: https://jsfiddle/jnyym0ye/7/

By no means a pretty solution but an alternative to using RegEx would be to create an element. Give it the HTML as content and then run a query selector against it for anything with a class. You can then iterate over all elements populating an array with only unique classes.

(function(){
  var html="bla<p class='c1 c2'>blabla<button></button><div id='bla' class='c1   c3 '></div>",
    div = document.createElement('div'),
    classed,
    classes = [],
    newClasses;
  div.innerHTML = html;
  classed = div.querySelectorAll('[class]');
  for(var i=0; i < classed.length; i++) {
    newClasses = classed[i].className.split(' ');
    for (var c = 0; c < newClasses.length; c++) {
      if (newClasses[c].trim() !== '' && classes.indexOf(newClasses[c]) === -1) {   classes.push(newClasses[c]);
    }
  }
}  
}())

/* classes == ['c1', 'c2', 'c3'] */

As disclaimed, it's not pretty but was just an alternative route I came up with.

Or in Java if it helps your cause:

import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.Arrays;
import java.util.HashSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

//reads in file passed as argument, or called index.html & prints out a list of all classes with dupes removed.
public class CSSApp {

    public static void main(String[] args) throws Exception{
        byte[] encoded = Files.readAllBytes(Paths.get( (args.length > 0) ? args[0] : "index.html"));
          
        String html = new String(encoded, "UTF-8");
        HashSet<String> noDupes = new HashSet<String>();
        Matcher m = Pattern.pile("class=['\"](.*?)['\"]")
                 .matcher(html);
        while (m.find()) {
            String [] occurences = m.group().replaceAll("class=", "").replaceAll("\"", "").replaceAll("'", "").split("([ ]){1,}");
            for(int i = 0; i < occurences.length; i++) {
                noDupes.add(occurences[i]);
            }
        }
        
        String [] classes  = noDupes.toArray(new String[] {});
        Arrays.sort(classes);
        
        for(int i = 0; i < classes.length; i++) {
            System.out.println(classes[i]);
        }
    }
}

Try

function extract(html) {
  let m= html.match(/class=(".*?"|'.*?')/g)||[];            // class list
  let c= m.map(c=>c.replace(/class=("|')\s*/,'').slice(0,-1)); // only names
  return [...new Set(c.map(x=>x.split` `).flat())];         // del duplicates
}


// TEST
function print(classList) {
  output.value = extract(classList).reduce((a,c) => a+=`.${c} {\n}\n\n`, '')
}

<textarea id="inp" cols="70" rows="4" placeholder="Paste html"></textarea>
<br><button onclick="print(inp.value)">Extract CSS!</button><br><br>
<textarea id="output" cols="70" rows="4"></textarea>

Building on John Diaz answer above, here's my solution:

var str = 'bla<p class="c1 c2">blabla<button></button><div id="bla" class="  c1    c3 "></div>';
var classes=getHTMLclasses(str);
console.log(classes);


function getHTMLclasses(html) {
    // get all unique css classes in html into dict
    var classRegexp = /class=['"](.*?)['"]/g;
    var dict = [];
    var m;
    while ((m = classRegexp.exec(html))) 
    {
        var classes=m[1].replace(/\s+/g, ' ').trim();
        classes.split(" ").forEach(function(item) {
            dict[item]=true;
        });
    }

    // convert dict to arr
    var arr=[];
    for (var key in dict) arr.push(key);

    return arr;
}

see jsfiddle

转载请注明原文地址:http://conceptsofalgorithm.com/Algorithm/1745085268a284101.html

javascriptextract all css classes from html textStack Overflow

最新回复(0)