2013-10-18 26 views
9

我正在尋找使用C#和FileHelpers庫將大型SQL Server表導出爲CSV文件。 我可以考慮C#和bcp,但我認爲FileHelpers比bcp更靈活。速度不是特別的要求。 OutOfMemoryException被運行下面的代碼當在storage.ExtractRecords()拋出(一些不太重要的代碼已被略去):如何使用FileHelpers庫將大型SQL Server表導出爲CSV文件?

SqlServerStorage storage = new SqlServerStorage(typeof(Order)); 
    storage.ServerName = "SqlServer"; 
    storage.DatabaseName = "SqlDataBase"; 
    storage.SelectSql = "select * from Orders"; 
    storage.FillRecordCallback = new FillRecordHandler(FillRecordOrder); 
    Order[] output = null; 
    output = storage.ExtractRecords() as Order[]; 

當下面的代碼運行時,「超時過期」被扔到link.ExtractToFile()

SqlServerStorage storage = new SqlServerStorage(typeof(Order)); 
    string sqlConnectionString = "Server=SqlServer;Database=SqlDataBase;Trusted_Connection=True"; 
    storage.ConnectionString = sqlConnectionString; 
    storage.SelectSql = "select * from Orders"; 
    storage.FillRecordCallback = new FillRecordHandler(FillRecordOrder); 
    FileDataLink link = new FileDataLink(storage); 
    link.FileHelperEngine.HeaderText = headerLine; 
    link.ExtractToFile("file.csv"); 

SQL查詢運行的時間超過默認值30秒,因此超時異常。不幸的是,我無法在FileHelpers文檔中找到如何將SQL命令超時設置爲更高的值。

我可以考慮在小數據集上循環一個SQL選擇,直到整個表被導出爲止,但是這個過程太複雜了。 是否有一種簡單的方法在大型數據庫表上導出使用FileHelpers?

+0

你試過沒有*** FileHelpers ***嗎? – Kiquenet

回答

0

FileHelpers有an async engine這更適合處理大文件。不幸的是,FileDataLink類不使用它,所以沒有簡單的方法與SqlStorage一起使用它。

修改SQL超時也不是很容易。最簡單的方法是複製SqlServerStorage的代碼以創建您自己的替代存儲提供程序,並提供ExecuteAndClose()ExecuteAndLeaveOpen()的替代設置,它們設置IDbCommand上的超時值。 (SqlServerStorage是一個密封類,所以你不能只是子類)。

您可能想要查看ReactiveETL,它使用FileHelpers異步引擎來處理文件,並使用ReactiveExtensions來重寫Ayende的RhinoETL來處理大型數據集。

+0

這就是我所害怕的。 FileHelpers團隊應考慮將Timeout屬性添加到SqlServerStorage類。 FileHelpers涵蓋更多的用例,IMO會很好。 – mircea

1

試試這個:

private void exportToCSV() 
{ 
    //Asks the filenam with a SaveFileDialog control. 

    SaveFileDialog saveFileDialogCSV = new SaveFileDialog(); 
    saveFileDialogCSV.InitialDirectory = Application.ExecutablePath.ToString(); 

    saveFileDialogCSV.Filter = "CSV files (*.csv)|*.csv|All files (*.*)|*.*"; 
    saveFileDialogCSV.FilterIndex = 1; 
    saveFileDialogCSV.RestoreDirectory = true; 

    if (saveFileDialogCSV.ShowDialog() == DialogResult.OK) 
    { 
     // Runs the export operation if the given filenam is valid. 
     exportToCSVfile(saveFileDialogCSV.FileName.ToString()); 
    } 
} 


* Exports data to the CSV file. 
*/ 
private void exportToCSVfile(string fileOut) 
{ 
    // Connects to the database, and makes the select command. 
    string sqlQuery = "select * from dbo." + this.lbxTables.SelectedItem.ToString(); 
    SqlCommand command = new SqlCommand(sqlQuery, objConnDB_Auto); 

    // Creates a SqlDataReader instance to read data from the table. 
    SqlDataReader dr = command.ExecuteReader(); 

    // Retrives the schema of the table. 
    DataTable dtSchema = dr.GetSchemaTable(); 

    // Creates the CSV file as a stream, using the given encoding. 
    StreamWriter sw = new StreamWriter(fileOut, false, this.encodingCSV); 

    string strRow; // represents a full row 

    // Writes the column headers if the user previously asked that. 
    if (this.chkFirstRowColumnNames.Checked) 
    { 
     sw.WriteLine(columnNames(dtSchema, this.separator)); 
    } 

    // Reads the rows one by one from the SqlDataReader 
    // transfers them to a string with the given separator character and 
    // writes it to the file. 
    while (dr.Read()) 
    { 
     strRow = ""; 
     for (int i = 0; i < dr.FieldCount; i++) 
     { 
      switch (Convert.ToString(dr.GetFieldType(i))) 
      { 
       case "System.Int16": 
        strRow += Convert.ToString(dr.GetInt16(i)); 
        break; 

       case "System.Int32" : 
        strRow += Convert.ToString(dr.GetInt32(i)); 
        break; 

       case "System.Int64": 
        strRow += Convert.ToString(dr.GetInt64(i)); 
        break; 

       case "System.Decimal": 
        strRow += Convert.ToString(dr.GetDecimal(i)); 
        break; 

       case "System.Double": 
        strRow += Convert.ToString(dr.GetDouble(i)); 
        break; 

       case "System.Float": 
        strRow += Convert.ToString(dr.GetFloat(i)); 
        break; 

       case "System.Guid": 
        strRow += Convert.ToString(dr.GetGuid(i)); 
        break; 

       case "System.String": 
        strRow += dr.GetString(i); 
        break; 

       case "System.Boolean": 
        strRow += Convert.ToString(dr.GetBoolean(i)); 
        break; 

       case "System.DateTime": 
        strRow += Convert.ToString(dr.GetDateTime(i)); 
        break; 
      } 

      if (i < dr.FieldCount - 1) 
      { 
       strRow += this.separator; 
      } 
     } 
     sw.WriteLine(strRow); 
    } 


    // Closes the text stream and the database connenction. 
    sw.Close(); 
    dr.Close(); 

    // Notifies the user. 
    MessageBox.Show("ready"); 
} 
+0

我最終使用了實體框架,你的示例代碼也可以工作,但我們有點不在話題中。 – mircea

12

睿斯萬的答案是正確的軌道上,因爲它會與大文件很好地擴展,因爲它避免了讀取整個表到內存中。但是,代碼可以清理。

shamp00的解決方案需要外部庫。

下面是一個簡單的表到CSV文件導出,將很好擴展到大型文件,並且不需要任何外部庫:

using System; 
using System.Collections.Generic; 
using System.Data; 
using System.Data.SqlClient; 
using System.IO; 
using System.Linq; 

public class TableDumper 
{ 
    public void DumpTableToFile(SqlConnection connection, string tableName, string destinationFile) 
    { 
     using (var command = new SqlCommand("select * from " + tableName, connection)) 
     using (var reader = command.ExecuteReader()) 
     using (var outFile = File.CreateText(destinationFile)) 
     { 
      string[] columnNames = GetColumnNames(reader).ToArray(); 
      int numFields = columnNames.Length; 
      outFile.WriteLine(string.Join(",", columnNames)); 
      if (reader.HasRows) 
      { 
       while (reader.Read()) 
       { 
        string[] columnValues = 
         Enumerable.Range(0, numFields) 
            .Select(i => reader.GetValue(i).ToString()) 
            .Select(field => string.Concat("\"", field.Replace("\"", "\"\""), "\"")) 
            .ToArray(); 
        outFile.WriteLine(string.Join(",", columnValues)); 
       } 
      } 
     } 
    } 
    private IEnumerable<string> GetColumnNames(IDataReader reader) 
    { 
     foreach (DataRow row in reader.GetSchemaTable().Rows) 
     { 
      yield return (string)row["ColumnName"]; 
     } 
    } 
} 

我寫了這個代碼,並宣佈它CC0 (public domain)

4

我合併了2上面的代碼。我使用這個代碼。我使用VS 2010.

 //this is all lib that i used||||||||||||||| 

using System; 
using System.Collections.Generic; 
using System.ComponentModel; 
using System.Data; 
using System.Drawing; 
using System.Linq; 
using System.Text; 
using System.Windows.Forms; 
using UsbLibrary; 
using System.Data; 
using System.Data.SqlClient; 
using System.Configuration; 
using System.Globalization; 




     //cocy in a button|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| 
     SqlConnection _connection = new SqlConnection(); 
     SqlDataAdapter _dataAdapter = new SqlDataAdapter(); 
     SqlCommand _command = new SqlCommand(); 
     DataTable _dataTable = new DataTable(); 

     _connection = new SqlConnection(); 
     _dataAdapter = new SqlDataAdapter(); 
     _command = new SqlCommand(); 
     _dataTable = new DataTable(); 

     //dbk is my database name that you can change it to your database name 
     _connection.ConnectionString = "Data Source=.;Initial Catalog=dbk;Integrated Security=True"; 
     _connection.Open(); 

     SaveFileDialog saveFileDialogCSV = new SaveFileDialog(); 
     saveFileDialogCSV.InitialDirectory = Application.ExecutablePath.ToString(); 

     saveFileDialogCSV.Filter = "CSV files (*.csv)|*.csv|All files (*.*)|*.*"; 
     saveFileDialogCSV.FilterIndex = 1; 
     saveFileDialogCSV.RestoreDirectory = true; 

     string path_csv=""; 
     if (saveFileDialogCSV.ShowDialog() == DialogResult.OK) 
     { 
      // Runs the export operation if the given filenam is valid. 
      path_csv= saveFileDialogCSV.FileName.ToString(); 
     } 


      DumpTableToFile(_connection, "tbl_trmc", path_csv); 

     } 
     //end of code in button||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| 


    public void DumpTableToFile(SqlConnection connection, string tableName, string destinationFile) 
    { 
     using (var command = new SqlCommand("select * from " + tableName, connection)) 
     using (var reader = command.ExecuteReader()) 
     using (var outFile = System.IO.File.CreateText(destinationFile)) 
     { 
      string[] columnNames = GetColumnNames(reader).ToArray(); 
      int numFields = columnNames.Length; 
      outFile.WriteLine(string.Join(",", columnNames)); 
      if (reader.HasRows) 
      { 
       while (reader.Read()) 
       { 
        string[] columnValues = 
         Enumerable.Range(0, numFields) 
            .Select(i => reader.GetValue(i).ToString()) 
            .Select(field => string.Concat("\"", field.Replace("\"", "\"\""), "\"")) 
            .ToArray(); 
        outFile.WriteLine(string.Join(",", columnValues)); 
       } 
      } 
     } 
    } 
    private IEnumerable<string> GetColumnNames(IDataReader reader) 
    { 
     foreach (DataRow row in reader.GetSchemaTable().Rows) 
     { 
      yield return (string)row["ColumnName"]; 
     } 
    } 
0

非常感謝Jay Sullivan的回答 - 對我非常有幫助。

大廈上,我觀察到,在他的解決方案VARBINARY和字符串數據類型的字符串格式化並不好 - VARBINARY領域會出來字面上"System.Byte"或類似的東西,而日期時間字段將被格式化MM/dd/yyyy hh:mm:ss tt,這對我來說是不可取的。

下面我是我的黑客一起解決方案,根據數據類型轉換爲字符串不同。它使用嵌套的三元運算符,但它的工作原理!

希望對某人有幫助。

public static void DumpTableToFile(SqlConnection connection, Dictionary<string, string> cArgs) 
{ 
    string query = "SELECT "; 
    string z = ""; 
    if (cArgs.TryGetValue("top_count", out z)) 
    { 
     query += string.Format("TOP {0} ", z); 
    } 
    query += string.Format("* FROM {0} (NOLOCK) ", cArgs["table"]); 
    string lower_bound = "", upper_bound = "", column_name = ""; 
    if (cArgs.TryGetValue("lower_bound", out lower_bound) && cArgs.TryGetValue("column_name", out column_name)) 
    { 
     query += string.Format("WHERE {0} >= {1} ", column_name, lower_bound); 
     if (cArgs.TryGetValue("upper_bound", out upper_bound)) 
     { 
      query += string.Format("AND {0} < {1} ", column_name, upper_bound); 
     } 
    } 
    Console.WriteLine(query); 
    Console.WriteLine(""); 
    using (var command = new SqlCommand(query, connection)) 
    using (var reader = command.ExecuteReader()) 
    using (var outFile = File.CreateText(cArgs["out_file"])) 
    { 
     string[] columnNames = GetColumnNames(reader).ToArray(); 
     int numFields = columnNames.Length; 
     Console.WriteLine(string.Join(",", columnNames)); 
     Console.WriteLine(""); 
     if (reader.HasRows) 
     { 
      Type datetime_type = Type.GetType("System.DateTime"); 
      Type byte_arr_type = Type.GetType("System.Byte[]"); 
      string format = "yyyy-MM-dd HH:mm:ss.fff"; 
      int ii = 0; 
      while (reader.Read()) 
      { 
       ii += 1; 
       string[] columnValues = 
        Enumerable.Range(0, numFields) 
         .Select(i => reader.GetValue(i).GetType()==datetime_type?((DateTime) reader.GetValue(i)).ToString(format):(reader.GetValue(i).GetType() == byte_arr_type? String.Concat(Array.ConvertAll((byte[]) reader.GetValue(i), x => x.ToString("X2"))) :reader.GetValue(i).ToString())) 
         ///.Select(field => string.Concat("\"", field.Replace("\"", "\"\""), "\"")) 
         .Select(field => field.Replace("\t", " ")) 
           .ToArray(); 
       outFile.WriteLine(string.Join("\t", columnValues)); 
       if (ii % 100000 == 0) 
       { 
        Console.WriteLine("row {0}", ii); 
       } 
      } 
     } 
    } 
} 
public static IEnumerable<string> GetColumnNames(IDataReader reader) 
{ 
    foreach (DataRow row in reader.GetSchemaTable().Rows) 
    { 
     yield return (string)row["ColumnName"]; 
    } 
} 
相關問題