2015-04-08 46 views
2

我有幾個SSIS(.dtsx)文件,我需要將它們的功能重寫爲C#。解析SSIS .xml源來檢索表映射

它們被配置爲從一個db1.table1中簡單地獲取數據,並通過一些自定義映射將數據傳輸到db2.table2(列名稱不完全匹配)。

是否存在關於SSIS xml格式的一些文檔?當手動解析XML格式以檢索源 - 目標表名稱和列名時,是否應該注意一些事情?

+1

的XML格式跨SSIS的每一個版本已經改變,所以你得到的享受這一點。 2005/2008年,您可以跟蹤血統ID,而2012/2014您將擁有基於文本的參考ID進行跟蹤。祝你好運 – billinkc

+0

事實證明,這很簡單,因爲我建議你使用血統ID。 –

+0

我可以問*爲什麼*你需要用C#重寫它們?我很難想象一個情況,你可以在任何東西中複製特製的ETL工具的功能集。至少不是沒有花費大量的時間和精力來獲得很大的收益。 –

回答

2

下面的代碼是專爲SSIS包PackageFormatVersion = 3

這是不漂亮,但它的確定單程XML轉換。

解析源

private static Mapping<ColumnMapping> ParseSourceComponent(XElement source) 
{ 
    var table = source.XPathSelectElement("properties/property[@name='OpenRowset']").Value; 


    var nonErrorOutput = source.XPathSelectElement("outputs").Elements().First(x => !((string)x.Attribute("name")).Contains("Error")); 

    var outputColumns = nonErrorOutput.XPathSelectElement("outputColumns").Elements().Select(x => 
     new ColumnMapping 
     { 
      Id = (int)x.Attribute("id"), 
      Name = (string)x.Attribute("name") 
     }).ToList(); 

    return new Mapping<ColumnMapping> 
    { 
     TableName = NormalizeTableNames(table), 
     Columns = outputColumns 
    }; 
} 
static readonly Regex tableNameRegex = new Regex("\\[dbo\\]\\.\\[(.*)\\]"); 
private static string NormalizeTableNames(string rawTableName) 
{ 
    var matches = tableNameRegex.Match(rawTableName); 
    if (matches.Success) 
     return matches.Groups[1].Value; 
    return rawTableName; 

} 

解析目的地

private static Mapping<InputColumnMapping> ParseDestinationComponent(string ssisName,XElement source) 
{ 
    var table = source.XPathSelectElement("properties/property[@name='OpenRowset']").Value; 


    var nonErrorOutput = source.XPathSelectElement("inputs").Elements().First(x => !((string)x.Attribute("name")).Contains("Error")); 

    var inputColumns = nonErrorOutput.XPathSelectElement("inputColumns").Elements().Select(x => 
     new 
     { 
      lineageId = (int)x.Attribute("lineageId"), 
      externalMetadataColumnId = (int)x.Attribute("externalMetadataColumnId") 
     }).ToList(); 

    var externalMetadataColumns = nonErrorOutput.XPathSelectElement("externalMetadataColumns").Elements().Select(x => 
     new InputColumnMapping 
     { 
      Id = (int)x.Attribute("id"), 
      Name = (string)x.Attribute("name") 
     }).ToList(); 
    foreach (var externalMetadataColumn in externalMetadataColumns.ToList()) 
    { 
     var inputMapping = 
      inputColumns.FirstOrDefault(x => x.externalMetadataColumnId == externalMetadataColumn.Id); 
     if (inputMapping == null) 
     { 
      Console.WriteLine("{0} | destination external column {1} with id {2} was not found in input mappings", ssisName, externalMetadataColumn.Name, externalMetadataColumn.Id); 
      externalMetadataColumns.Remove(externalMetadataColumn); 
      continue; 
     } 
     externalMetadataColumn.MappsToId = inputMapping.lineageId; 
    } 
    return new Mapping<InputColumnMapping> 
    { 
     TableName = NormalizeTableNames(table), 
     Columns = externalMetadataColumns 
    }; 
} 

處理整個.dtsx程序文件

private static RemoteMappingFile ParseDtsx(string ssisName) 
{ 
    var xml = XDocument.Load(@"ssis/"+ssisName); 

    if (xml.Root == null) 
    { 
     throw new Exception("Root is null"); 
    } 
    var mappings = new List<RemoteMapping>(); 

    XNamespace ns = "www.microsoft.com/SqlServer/Dts"; 
    XmlNamespaceManager man = new XmlNamespaceManager(new NameTable()); 
    man.AddNamespace("DTS", "www.microsoft.com/SqlServer/Dts"); 
    var executables = xml.Root.Descendants(ns + "Executable").Select(x => x).ToList(); 
    foreach (var executable in executables) 
    { 
     var components = executable.Descendants(ns + "ObjectData").First().XPathSelectElement("pipeline/components").Elements().ToList(); 
     if (components.Count != 2) 
     { 
      Console.WriteLine("{0} | WARN - 2 components expected. Found {1} with names: {2}", ssisName, components.Count, string.Join(",",components.Select(x=>((string)x.Attribute("name"))).ToList())); 
     } 
     var source = components.First(x => ((string)x.Attribute("name")).Contains("Source")); 
     var destination = components.First(x => ((string)x.Attribute("name")).Contains("Destination")); 
     var sourceMapping = ParseSourceComponent(source); 
     var destinationMapping = ParseDestinationComponent(ssisName,destination); 
     var remoteMapping = new RemoteMapping 
     { 
      TableNames = new Column { Source = sourceMapping.TableName, Destination = destinationMapping.TableName }, 
      Columns = new List<Column>() 
     }; 
     foreach (var sourceItem in sourceMapping.Columns) 
     { 
      var foundMatchingDestinationColumn = 
       destinationMapping.Columns.FirstOrDefault(x => x.MappsToId == sourceItem.Id); 
      if (foundMatchingDestinationColumn == null) 
      { 
       Console.WriteLine("{0} | input mapping {1} with id {2} was not found in destination mappings", 
        ssisName, sourceItem.Name, sourceItem.Id); 
       continue; 
      } 
      remoteMapping.Columns.Add(new Column 
      { 
       Destination = foundMatchingDestinationColumn.Name, 
       Source = sourceItem.Name 
      }); 
     } 
     mappings.Add(remoteMapping); 
    } 

    return new RemoteMappingFile 
    { 
     RemoteMappings = mappings, 
     SSISName = ssisName 
    }; 
} 

所需的數據結構

public class ColumnMapping 
{ 
    public int Id { get; set; } 
    public string Name { get; set; } 
} 
public class InputColumnMapping : ColumnMapping 
{ 
    public int MappsToId { get; set; } 
} 
public class Mapping<T> where T : ColumnMapping 
{ 
    [XmlAttribute] 
    public string TableName { get; set; } 
    public List<T> Columns { get; set; } 
} 
public class RemoteMapping 
{ 
    public Column TableNames { get; set; } 
    public List<Column> Columns { get; set; } 
} 

public class Column 
{ 
    [XmlAttribute] 
    public string Source { get; set; } 

[XmlAttribute] 
    public string Destination { get; set; } 
} 
public class RemoteMappingFile 
{ 
    [XmlAttribute] 
    public string SSISName { get; set; } 
    public List<RemoteMapping> RemoteMappings { get; set; } 
} 
public class MappingsXml 
{ 
    public List<RemoteMappingFile> Mappings { get; set; } 
} 

主要方法需要在SSIS夾中的所有文件.dtsx程序

internal class Program 
{ 
    private static void Main() 
    { 
     //var mappings = Directory.EnumerateFiles("ssis","*.dtsx").Select(x=>ParseDtsx(Path.GetFileName(x).ToString())).ToList(); 
     var list = new MappingsXml 
     { 
      Mappings = 
       Directory.EnumerateFiles("ssis", "*.dtsx") 
        .Select(x => ParseDtsx((Path.GetFileName(x) ?? "").ToString())) 
        .ToList() 
     }; 
     var xsSubmit = new XmlSerializer(typeof (MappingsXml)); 

     using (var file = new StreamWriter(
      @"AutoRemoteMappingXmls.xml")) 
     { 
      xsSubmit.Serialize(file, list); 
     } 
    } 
} 

最終輸出:

<?xml version="1.0" encoding="utf-8"?> 
<MappingsXml xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema"> 
    <Mappings> 
    <RemoteMappingFile SSISName="ssis1.dtsx"> 
     <RemoteMappings> 
     <RemoteMapping> 
      <TableNames Source="sourceTable1" Destination="destinationTable1" /> 
      <Columns> 
      <Column Source="sourceColumn1" Destination="destinationColumn1" /> 
      <Column Source="sourceColumn2" Destination="destinationColumn2" /> 
      </Columns> 
     </RemoteMapping> 
     <RemoteMapping> 
      <TableNames Source="sourceTable2" Destination="destinationTable2" /> 
      <Columns> 
      <Column Source="sourceColumn3" Destination="destinationColumn3" /> 
      <Column Source="sourceColumn4" Destination="destinationColumn4" /> 
      </Columns> 
     </RemoteMapping> 
     </RemoteMappings> 
    </RemoteMappingFile> 
    </Mappings> 
</MappingsXml> 

還寫道,如果勸慰:

  1. 有超過2和DTS :ObjectData/pipeline/components/component (我們只期望「OLE DB Source」和「OLE DB Destination」。有些時候 有一些數據轉換組件,以便有可能需要爲
  2. 一些 額外的工作有沒有被映射到目標列一些源列
  3. 有未映射到源列一些目標列
  4. 源和目標表名不匹配(不是一個真正的問題)