HTML простой парсер Как пропустить tr внутри td в каждом цикле

0

У меня есть html-контент, подобный этому...

$html = <<<EOF
<table  id="specialTbl">
   <tbody>
      <tr>
         <td> row-1-td-1</td>
         <td> row-1-td-2</td>
         <td> row-1-td-3</td>
         <td>
            <table class="runsOn">  // Problem starts here
               <tbody>
                  <tr>
                    <td>row-1-td-4-Child-1</td> 
                    <td>row-1-td-4-Child-2</td>               
                  </tr>               
               </tbody>
            </table>
         </td>
         <td> row-1-td-5</td>
         <td> row-1-td-6</td>
      </tr>
      <tr>
         <td> row-2-td-1</td>
         <td> row-2-td-2</td>
         <td> row-2-td-3</td>
         <td>
            <table class="runsOn">
               <tbody>
                  <tr>
                    <td>row-2-td-4-Child-1</td>
                    <td>row-2-td-4-Child-2</td>               
                  </tr>               
               </tbody>
            </table>
         </td>
         <td> row-2-td-5</td>
         <td> row-2-td-6</td>
      </tr>
      <tr>
         <td> row-3-td-1</td>
         <td> row-3-td-2</td>
         <td> row-3-td-3</td>
         <td>
            <table class="runsOn">
               <tbody>
                  <tr>
                    <td>row-3-td-4-Child-1</td>
                    <td>row-3-td-4-Child-2</td>               
                  </tr>               
               </tbody>
            </table>
         </td>
         <td> row-3-td-5</td>
         <td> row-3-td-6</td>
      </tr>
      <tr>
         <td> row-4-td-1</td>
         <td> row-4-td-2</td>
         <td> row-4-td-3</td>
         <td>
            <table class="runsOn">
               <tbody>
                  <tr>
                    <td>row-4-td-4-Child-1</td>
                    <td>row-4-td-4-Child-2</td>               
                  </tr>               
               </tbody>
            </table>
         </td>
         <td> row-4-td-5</td>
         <td> row-4-td-6</td>
      </tr>
      <tr>
         <td> row-5-td-1</td>
         <td> row-5-td-2</td>
         <td> row-5-td-3</td>
         <td>
            <table class="runsOn">
               <tbody>
                  <tr>
                    <td>row-5-td-4-Child-1</td>
                    <td>row-5-td-4-Child-2</td>               
                  </tr>               
               </tbody>
            </table>
         </td>
         <td> row-5-td-5</td>
         <td> row-5-td-6</td>
      </tr>
      <tr>
         <td> row-6-td-1</td>
         <td> row-6-td-2</td>
         <td> row-6-td-3</td>
         <td>
            <table class="runsOn">
               <tbody>
                  <tr>
                    <td>row-6-td-4-Child-1</td>
                    <td>row-6-td-4-Child-2</td>               
                  </tr>               
               </tbody>
            </table>
         </td>
         <td> row-6-td-5</td>
         <td> row-6-td-6</td>
      </tr>
      <tr>
         <td> row-7-td-1</td>
         <td> row-7-td-2</td>
         <td> row-7-td-3</td>
         <td>
            <table class="runsOn">
               <tbody>
                  <tr>
                    <td>row-7-td-4-Child-1</td>
                    <td>row-7-td-4-Child-2</td>               
                  </tr>               
               </tbody>
            </table>
         </td>
         <td> row-7-td-5</td>
         <td> row-7-td-6</td>
      </tr>  
   </tbody>
</table>
EOF;



$html= str_get_html($html);
$table =$html->find('table#specialTbl',0) ; 
$response["response_code"] = 200;
$response["rows"]   = array();
foreach($table->find('tr') as $key=>$value) {
 $post["td1"]= trim(strip_tags($value->find('td',0)->plaintext));
 $post["td2"]= trim(strip_tags($value->find('td',1)->plaintext));
 $post["td3"]= trim(strip_tags($value->find('td',2)->plaintext));
 $post["td4"]= trim(strip_tags($value->find('td',3)->plaintext)); 
 $post["td5"]= trim(strip_tags($value->find('td',4)->plaintext));
 $post["td6"]= trim(strip_tags($value->find('td',5)->plaintext)); 
  array_push($response["rows"], $post);  
}
$json = json_encode($response);
echo $json_content;

А Json Response - это

{
"response_code":200,
"rows":[
{
"td1":"row-1-td-1",
"td2":"row-1-td-2",
"td3":"row-1-td-3",
"td4":"row-1-td-4-Child-1 row-1-td-4-Child-2",
"td5":"row-1-td-4-Child-1",
"td6":"row-1-td-4-Child-2"
},
{
"td1":"row-1-td-4-Child-1",
"td2":"row-1-td-4-Child-2",
"td3":"",
"td4":"",
"td5":"",
"td6":""
},
{
"td1":"row-2-td-1",
"td2":"row-2-td-2",
"td3":"row-2-td-3",
"td4":"row-2-td-4-Child-1 row-2-td-4-Child-2",
"td5":"row-2-td-4-Child-1",
"td6":"row-2-td-4-Child-2"
},
{
"td1":"row-2-td-4-Child-1",
"td2":"row-2-td-4-Child-2",
"td3":"",
"td4":"",
"td5":"",
"td6":""
},
{
"td1":"row-3-td-1",
"td2":"row-3-td-2",
"td3":"row-3-td-3",
"td4":"row-3-td-4-Child-1 row-3-td-4-Child-2",
"td5":"row-3-td-4-Child-1",
"td6":"row-3-td-4-Child-2"
},
{
"td1":"row-3-td-4-Child-1",
"td2":"row-3-td-4-Child-2",
"td3":"",
"td4":"",
"td5":"",
"td6":""
},
{
"td1":"row-4-td-1",
"td2":"row-4-td-2",
"td3":"row-4-td-3",
"td4":"row-4-td-4-Child-1 row-4-td-4-Child-2",
"td5":"row-4-td-4-Child-1",
"td6":"row-4-td-4-Child-2"
},
{
"td1":"row-4-td-4-Child-1",
"td2":"row-4-td-4-Child-2",
"td3":"",
"td4":"",
"td5":"",
"td6":""
},
{
"td1":"row-5-td-1",
"td2":"row-5-td-2",
"td3":"row-5-td-3",
"td4":"row-5-td-4-Child-1 row-5-td-4-Child-2",
"td5":"row-5-td-4-Child-1",
"td6":"row-5-td-4-Child-2"
},
{
"td1":"row-5-td-4-Child-1",
"td2":"row-5-td-4-Child-2",
"td3":"",
"td4":"",
"td5":"",
"td6":""
},
{
"td1":"row-6-td-1",
"td2":"row-6-td-2",
"td3":"row-6-td-3",
"td4":"row-6-td-4-Child-1 row-6-td-4-Child-2",
"td5":"row-6-td-4-Child-1",
"td6":"row-6-td-4-Child-2"
},
{
"td1":"row-6-td-4-Child-1",
"td2":"row-6-td-4-Child-2",
"td3":"",
"td4":"",
"td5":"",
"td6":""
},
{
"td1":"row-7-td-1",
"td2":"row-7-td-2",
"td3":"row-7-td-3",
"td4":"row-7-td-4-Child-1 row-7-td-4-Child-2",
"td5":"row-7-td-4-Child-1",
"td6":"row-7-td-4-Child-2"
},
{
"td1":"row-7-td-4-Child-1",
"td2":"row-7-td-4-Child-2",
"td3":"",
"td4":"",
"td5":"",
"td6":""
}
]
}

Проблема foreach с foreach. Как я могу пропустить tr внутри td. У меня 7 строк в таблице с идентификатором "specialTbl". Но for each возвращается 14 строк в json, поскольку он проходит через таблицу под названием runOn.

Как я могу избежать цикла через таблицу внутри td (4th)

  • 0
    Что такое функция str_get_html() ? Какой парсер вы используете?
  • 0
    Я использую этот простойhtmldom.sourceforge.net/manual.htm
Теги:
simple-html-dom

2 ответа

2
Лучший ответ

Было бы проще использовать DomDocument с Xpath следующим образом. DomDocument уже присутствует в PHP5. Он даст вам желаемый результат.

$doc = new DOMDocument();
$doc->loadHTML($html);
$xpath = new DOMXpath($doc);
$response["response_code"] = 200;
$response["rows"]   = array();
$trs = $xpath->query("//table[@id='specialTbl']/tbody/tr"); // all child tr in all child tbody in any table that has id 'specialTbl'
foreach ($trs as $tr) {
    $post = array();
    $tds = $xpath->query("td", $tr); // all child td in $tr
    foreach ($tds as $key => $td) {
        $post["td" . ++$key] = $td->textContent;
    }
    array_push($response["rows"], $post);
}
$json_content = json_encode($response);
echo $json_content;

Но вы также можете использовать http://simplehtmldom.sourceforge.net/manual.htm и использовать css-подобные селектор (непроверенный код, у меня нет simplehtmldom):

$html= str_get_html($html);
$response["response_code"] = 200;
$response["rows"]   = array();
$trs = $html->find("table#specialTbl>tbody>tr");
foreach ($trs as $tr) {
    $post = array();
    $tds = $tr->children();
    foreach ($tds as $key => $td) {
        $post["td" . ++$key] = $td->innertext;
    }
    array_push($response["rows"], $post);
}
$json_content = json_encode($response);
echo $json_content;
  • 0
    Попробую и обновлю .... нл-х спасибо
  • 0
    @IndraKumarS Я только что отредактировал свой ответ, потому что я забыл $key => , и потому что я добавил способ, возможно, продолжать использовать simplehtmldom.sourceforge.net/manual.htm
Показать ещё 2 комментария
0

Вы могли бы использовать парсер PHP DOM, и перед поиском элементов tr можете отключить все вложенные таблицы из структуры HTML:

// Parse the HTML into a DOM object & find the table by ID
$doc = new DOMDocument();
$doc->loadHTML($html);
$table = $doc->getElementById('specialTbl');

// Remove all nested TRs from the DOM table object
$nested = $table->getElementsByTagName('table');
foreach ($nested as $element)
{
    // Remove the TRs from thi nested table
    foreach ($element->getElementsByTagName('tr') as $tr)
        $tr->parentNode->removeChild($tr);
} 

// Now when we search through TRs, we only get the top level ones
$rows = $table->getElementsByTagName('tr');
$response = array();
foreach ($rows as $row)
{
    // Collect the values of this row TDs
    $tds = array();
    foreach ($row->getElementsByTagName('td') as $td)
    {
        $tds[] = trim($td->nodeValue);
    }

    // Add this row to the response
    $response['rows'][] = $tds;
}

// Add extra response details
$response['response_code'] = 200; // You shouldn't need to explicitly send this
$json = json_encode($response);

// Output JSON
header('Content-type: application/json'); // Use the correct MIME type
echo $json;
  • 0
    Это возвращает фатальную ошибку: вызов функции-члена getElementsByTagName () для необъекта в строке $ nested = $ table-> getElementsByTagName ('table');
  • 0
    Можете ли вы помочь мне

Ещё вопросы

Сообщество Overcoder
Наверх
Меню