2010-05-26 179 views
1

我有一個非常大的CSV文件。確切地說51427行。在PHP中讀取大型CSV文件

難道沒有辦法只能將所需的行讀入數組嗎?這會顯着加快速度。

+1

你有沒有嘗試過像'ini_set(「max_execution_time」,0)''這樣的最大執行時間? – robjmills 2010-05-26 15:46:06

+0

幾個問題: - 你如何將文件導入數據庫? - 您是在導入之前上傳文件還是實時讀取文件? – allnightgrocery 2010-05-26 15:48:49

回答

2

您可能想看看流式傳輸csv文件。發送啓動文件位置,起始位置和字節數改爲得到paramters到ProgressiveReader.php

class NoFileFoundException extends Exception { 
    function __toString() { 
     return '<h1><b>ERROR:</b> could not find (' 
        .$this->getMessage(). 
        ') please check your settings.</h1>'; 
    } 
} 

class NoFileOpenException extends Exception { 
    function __toString() { 
     return '<h1><b>ERROR:</b> could not open (' 
        .$this->getMessage(). 
        ') please check your settings.</h1>'; 
    } 
} 

interface Reader { 
    function setFileName($fName); 
    function open(); 
    function setBufferOffset($offset); 
    function bufferSize(); 
    function isOffset(); 
    function setPacketSize($size); 
    function read(); 
    function isEOF(); 
    function close(); 
    function readAll(); 
} 

class ProgressiveReader implements Reader { 
    private $fName; 
    private $fileHandler; 
    private $offset = 0; 
    private $packetSize = 0; 

    public function setFileName($fName) { 
     $this->fName = $fName; 
     if(!file_exists($this->fName)) { 
      throw new NoFileFoundException($this->fName); 
     } 
    } 

    public function open() { 
     try { 
      $this->fileHandler = fopen($this->fName, 'rb'); 
     } 
     catch (Exception $e) { 
      throw new NoFileOpenException($this->fName); 
     } 
     fseek($this->fileHandler, $this->offset); 
    } 

    public function setBufferOffset($offset) { 
     $this->offset = $offset; 
    } 

    public function bufferSize() { 
     return filesize($this->fName) - (($this->offset > 0) ? ($this->offset + 1) : 0); 
    } 

    public function isOffset() { 
     if($this->offset === 0) { 
      return false; 
     } 
     return true; 
    } 

    public function setPacketSize($size) { 
     $this->packetSize = $size; 
    } 

    public function read() { 
     return fread($this->fileHandler, $this->packetSize); 
    } 

    public function isEOF() { 
     return feof($this->fileHandler); 
    } 

    public function close() { 
     if($this->fileHandler) { 
      fclose($this->fileHandler); 
     } 
    } 

    public function readAll() { 
     return fread($this->fileHandler, filesize($this->fName)); 
    } 
} 

下面是單元測試:

require_once 'PHPUnit/Framework.php'; 

require_once dirname(__FILE__).'/../ProgressiveReader.php'; 

class ProgressiveReaderTest extends PHPUnit_Framework_TestCase { 

    protected $reader; 
    private $fp; 
    private $fname = "Test.txt"; 

    protected function setUp() { 
     $this->createTestFile(); 
     $this->reader = new ProgressiveReader(); 
    } 

    protected function tearDown() { 
     $this->reader->close(); 
    } 

    public function test_isValidFile() { 
     $this->reader->setFileName($this->fname); 
    } 

    public function test_isNotValidFile() { 
     try { 
      $this->reader->setFileName("nothing.tada"); 
     } 
     catch (Exception $e) { 
      return; 
     } 

     $this->fail(); 
    } 

    public function test_isFileOpen() { 
     $this->reader->setFileName($this->fname); 
     $this->reader->open(); 
    } 

    public function test_couldNotOpenFile() { 
     $this->reader->setFileName($this->fname); 
     try { 
      $this->deleteTestFile(); 
      $this->reader->open(); 
     } 
     catch (Exception $e) { 
      return; 
     } 

     $this->fail(); 
    } 

    public function test_bufferSizeZeroOffset() { 
     $this->reader->setFileName($this->fname); 
     $this->reader->open(); 
     $this->assertEquals($this->reader->bufferSize(), 12); 
    } 

    public function test_bufferSizeTwoOffset() { 
     $this->reader->setFileName($this->fname); 
     $this->reader->setBufferOffset(2); 
     $this->reader->open(); 
     $this->assertEquals($this->reader->bufferSize(), 9); 
    } 

    public function test_readBuffer() { 
     $this->reader->setFileName($this->fname); 
     $this->reader->setBufferOffset(0); 
     $this->reader->setPacketSize(1); 
     $this->reader->open(); 
     $this->assertEquals($this->reader->read(), "T"); 
    } 

    public function test_readBufferWithOffset() { 
     $this->reader->setFileName($this->fname); 
     $this->reader->setBufferOffset(2); 
     $this->reader->setPacketSize(1); 
     $this->reader->open(); 
     $this->assertEquals($this->reader->read(), "S"); 
    } 

    public function test_readSuccesive() { 
     $this->reader->setFileName($this->fname); 
     $this->reader->setBufferOffset(0); 
     $this->reader->setPacketSize(6); 
     $this->reader->open(); 
     $this->assertEquals($this->reader->read(), "TEST1\n"); 
     $this->assertEquals($this->reader->read(), "TEST2\n"); 
    } 

    public function test_readEntireBuffer() { 
     $this->reader->setFileName($this->fname); 
     $this->reader->open(); 
     $this->assertEquals($this->reader->readAll(), "TEST1\nTEST2\n"); 
    } 

    public function test_isNotEOF() { 
     $this->reader->setFileName($this->fname); 
     $this->reader->setBufferOffset(2); 
     $this->reader->setPacketSize(1); 
     $this->reader->open(); 
     $this->assertFalse($this->reader->isEOF()); 
    } 

    public function test_isEOF() { 
     $this->reader->setFileName($this->fname); 
     $this->reader->setBufferOffset(0); 
     $this->reader->setPacketSize(15); 
     $this->reader->open(); 
     $this->reader->read(); 
     $this->assertTrue($this->reader->isEOF()); 
    } 

    public function test_isOffset() { 
     $this->reader->setFileName($this->fname); 
     $this->reader->setBufferOffset(2); 
     $this->assertTrue($this->reader->isOffset()); 
    } 

    public function test_isNotOffset() { 
     $this->reader->setFileName($this->fname); 
     $this->assertFalse($this->reader->isOffset()); 
    } 

    private function createTestFile() { 
     $this->fp = fopen($this->fname, "wb"); 
     fwrite($this->fp, "TEST1\n"); 
     fwrite($this->fp, "TEST2\n"); 
     flush(); 
     fclose($this->fp); 
    } 

    private function deleteTestFile() { 
     if(file_exists($this->fname)) { 
      unlink($this->fname); 
     } 

    } 
} 
+1

Lotsa代碼可能是一次(或罕見)導入過程。 上傳csv並使用mysql控制檯加載數據。 – racerror 2010-05-26 20:02:44

+0

是的你的權利,除了他不想等待整個文件上傳然後存儲*'我不高興在處理'*之前將CSV文件的整個51427行到達數組。還有一些代碼測試。哦,一個人的**一次**是另一個重複的努力,直到你生氣和自動化。 – Gutzofter 2010-05-26 20:16:38

2

您可以直接連接到數據庫服務器嗎?

如果是這樣,我會考慮使用像SQLyog第三方程序來導入您的csv。

你也可以上傳文件,並使用mysql外殼直接導入數據:

LOAD DATA INFILE '/path/to/your_file.csv' INTO TABLE table_name FIELDS TERMINATED BY ','; 
1

您的腳本可能花費的時間太長,它被終止。

您應該在php.ini中查找max_execution_time指令並將其設置爲適合您的值。

默認的max_execution_time設置爲30秒,所以你的腳本可能會被終止。

如果您還有腳本需要及時進行限制,您可以通過調用set_time_init()來單獨執行該腳本;

1

您是否嘗試過使用bash/shell(如果您在linux上)將您的csv導入到mysql中?你也可以使用ruby或者perl或者whatnot,因爲我認爲你應該使用它來代替php(或任何web應用程序)來導入文件。

2

此讀取整個CSV文件到一個數組

所有50000+行?

通過逐行讀取(fgets()),然後將每個(需要的)行添加到數組,從PHP開始讀取文件的所需塊;你可以用fgetcsv()獲得該行的數組。

編輯:我不知道確切的細節,但我覺得將所有內容讀入數據結構的成本比讀取我們需要的更多。

0

呸!忽略這個答案。是重複的。見Scorchio上面提到的fgetcsv()。