2
我有幾個SSIS(.dtsx)文件,我需要將它們的功能重寫爲C#。解析SSIS .xml源來檢索表映射
它們被配置爲從一個db1.table1中簡單地獲取數據,並通過一些自定義映射將數據傳輸到db2.table2(列名稱不完全匹配)。
是否存在關於SSIS xml格式的一些文檔?當手動解析XML格式以檢索源 - 目標表名稱和列名時,是否應該注意一些事情?
我有幾個SSIS(.dtsx)文件,我需要將它們的功能重寫爲C#。解析SSIS .xml源來檢索表映射
它們被配置爲從一個db1.table1中簡單地獲取數據,並通過一些自定義映射將數據傳輸到db2.table2(列名稱不完全匹配)。
是否存在關於SSIS xml格式的一些文檔?當手動解析XML格式以檢索源 - 目標表名稱和列名時,是否應該注意一些事情?
下面的代碼是專爲SSIS包PackageFormatVersion = 3
這是不漂亮,但它的確定單程XML轉換。
解析源
private static Mapping<ColumnMapping> ParseSourceComponent(XElement source)
{
var table = source.XPathSelectElement("properties/property[@name='OpenRowset']").Value;
var nonErrorOutput = source.XPathSelectElement("outputs").Elements().First(x => !((string)x.Attribute("name")).Contains("Error"));
var outputColumns = nonErrorOutput.XPathSelectElement("outputColumns").Elements().Select(x =>
new ColumnMapping
{
Id = (int)x.Attribute("id"),
Name = (string)x.Attribute("name")
}).ToList();
return new Mapping<ColumnMapping>
{
TableName = NormalizeTableNames(table),
Columns = outputColumns
};
}
static readonly Regex tableNameRegex = new Regex("\\[dbo\\]\\.\\[(.*)\\]");
private static string NormalizeTableNames(string rawTableName)
{
var matches = tableNameRegex.Match(rawTableName);
if (matches.Success)
return matches.Groups[1].Value;
return rawTableName;
}
解析目的地
private static Mapping<InputColumnMapping> ParseDestinationComponent(string ssisName,XElement source)
{
var table = source.XPathSelectElement("properties/property[@name='OpenRowset']").Value;
var nonErrorOutput = source.XPathSelectElement("inputs").Elements().First(x => !((string)x.Attribute("name")).Contains("Error"));
var inputColumns = nonErrorOutput.XPathSelectElement("inputColumns").Elements().Select(x =>
new
{
lineageId = (int)x.Attribute("lineageId"),
externalMetadataColumnId = (int)x.Attribute("externalMetadataColumnId")
}).ToList();
var externalMetadataColumns = nonErrorOutput.XPathSelectElement("externalMetadataColumns").Elements().Select(x =>
new InputColumnMapping
{
Id = (int)x.Attribute("id"),
Name = (string)x.Attribute("name")
}).ToList();
foreach (var externalMetadataColumn in externalMetadataColumns.ToList())
{
var inputMapping =
inputColumns.FirstOrDefault(x => x.externalMetadataColumnId == externalMetadataColumn.Id);
if (inputMapping == null)
{
Console.WriteLine("{0} | destination external column {1} with id {2} was not found in input mappings", ssisName, externalMetadataColumn.Name, externalMetadataColumn.Id);
externalMetadataColumns.Remove(externalMetadataColumn);
continue;
}
externalMetadataColumn.MappsToId = inputMapping.lineageId;
}
return new Mapping<InputColumnMapping>
{
TableName = NormalizeTableNames(table),
Columns = externalMetadataColumns
};
}
處理整個.dtsx程序文件
private static RemoteMappingFile ParseDtsx(string ssisName)
{
var xml = XDocument.Load(@"ssis/"+ssisName);
if (xml.Root == null)
{
throw new Exception("Root is null");
}
var mappings = new List<RemoteMapping>();
XNamespace ns = "www.microsoft.com/SqlServer/Dts";
XmlNamespaceManager man = new XmlNamespaceManager(new NameTable());
man.AddNamespace("DTS", "www.microsoft.com/SqlServer/Dts");
var executables = xml.Root.Descendants(ns + "Executable").Select(x => x).ToList();
foreach (var executable in executables)
{
var components = executable.Descendants(ns + "ObjectData").First().XPathSelectElement("pipeline/components").Elements().ToList();
if (components.Count != 2)
{
Console.WriteLine("{0} | WARN - 2 components expected. Found {1} with names: {2}", ssisName, components.Count, string.Join(",",components.Select(x=>((string)x.Attribute("name"))).ToList()));
}
var source = components.First(x => ((string)x.Attribute("name")).Contains("Source"));
var destination = components.First(x => ((string)x.Attribute("name")).Contains("Destination"));
var sourceMapping = ParseSourceComponent(source);
var destinationMapping = ParseDestinationComponent(ssisName,destination);
var remoteMapping = new RemoteMapping
{
TableNames = new Column { Source = sourceMapping.TableName, Destination = destinationMapping.TableName },
Columns = new List<Column>()
};
foreach (var sourceItem in sourceMapping.Columns)
{
var foundMatchingDestinationColumn =
destinationMapping.Columns.FirstOrDefault(x => x.MappsToId == sourceItem.Id);
if (foundMatchingDestinationColumn == null)
{
Console.WriteLine("{0} | input mapping {1} with id {2} was not found in destination mappings",
ssisName, sourceItem.Name, sourceItem.Id);
continue;
}
remoteMapping.Columns.Add(new Column
{
Destination = foundMatchingDestinationColumn.Name,
Source = sourceItem.Name
});
}
mappings.Add(remoteMapping);
}
return new RemoteMappingFile
{
RemoteMappings = mappings,
SSISName = ssisName
};
}
所需的數據結構
public class ColumnMapping
{
public int Id { get; set; }
public string Name { get; set; }
}
public class InputColumnMapping : ColumnMapping
{
public int MappsToId { get; set; }
}
public class Mapping<T> where T : ColumnMapping
{
[XmlAttribute]
public string TableName { get; set; }
public List<T> Columns { get; set; }
}
public class RemoteMapping
{
public Column TableNames { get; set; }
public List<Column> Columns { get; set; }
}
public class Column
{
[XmlAttribute]
public string Source { get; set; }
[XmlAttribute]
public string Destination { get; set; }
}
public class RemoteMappingFile
{
[XmlAttribute]
public string SSISName { get; set; }
public List<RemoteMapping> RemoteMappings { get; set; }
}
public class MappingsXml
{
public List<RemoteMappingFile> Mappings { get; set; }
}
主要方法需要在SSIS夾中的所有文件.dtsx程序
internal class Program
{
private static void Main()
{
//var mappings = Directory.EnumerateFiles("ssis","*.dtsx").Select(x=>ParseDtsx(Path.GetFileName(x).ToString())).ToList();
var list = new MappingsXml
{
Mappings =
Directory.EnumerateFiles("ssis", "*.dtsx")
.Select(x => ParseDtsx((Path.GetFileName(x) ?? "").ToString()))
.ToList()
};
var xsSubmit = new XmlSerializer(typeof (MappingsXml));
using (var file = new StreamWriter(
@"AutoRemoteMappingXmls.xml"))
{
xsSubmit.Serialize(file, list);
}
}
}
最終輸出:
<?xml version="1.0" encoding="utf-8"?>
<MappingsXml xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema">
<Mappings>
<RemoteMappingFile SSISName="ssis1.dtsx">
<RemoteMappings>
<RemoteMapping>
<TableNames Source="sourceTable1" Destination="destinationTable1" />
<Columns>
<Column Source="sourceColumn1" Destination="destinationColumn1" />
<Column Source="sourceColumn2" Destination="destinationColumn2" />
</Columns>
</RemoteMapping>
<RemoteMapping>
<TableNames Source="sourceTable2" Destination="destinationTable2" />
<Columns>
<Column Source="sourceColumn3" Destination="destinationColumn3" />
<Column Source="sourceColumn4" Destination="destinationColumn4" />
</Columns>
</RemoteMapping>
</RemoteMappings>
</RemoteMappingFile>
</Mappings>
</MappingsXml>
還寫道,如果勸慰:
的XML格式跨SSIS的每一個版本已經改變,所以你得到的享受這一點。 2005/2008年,您可以跟蹤血統ID,而2012/2014您將擁有基於文本的參考ID進行跟蹤。祝你好運 – billinkc
事實證明,這很簡單,因爲我建議你使用血統ID。 –
我可以問*爲什麼*你需要用C#重寫它們?我很難想象一個情況,你可以在任何東西中複製特製的ETL工具的功能集。至少不是沒有花費大量的時間和精力來獲得很大的收益。 –