2016-01-31 42 views
0
#!/bin/bash 

    function compare { 
    for file1 in /dir1/*.csv 
    do 
    file2=/dir2/$(basename "$file1") 

    if [[ -e "$file2" ]] ### loop only if the file2 with same filename as file1 is present ### 
    then 
    awk 'BEGIN {FS==","} NR == FNR{arr[$0];next} ! ($0 in arr)' $file1 $file2 > /dirDiff/`echo $(basename "$file1")_diff` 
    fi 
    done 
    } 

    function removeNULL { 
    for i in /dirDiff/*_diff 
    do 
    if [[ ! -s "$i" ]]  ### if file exists with zero size ### 
    then 
    \rm -- "$i" 
    fi 
    done 
    } 

    compare 
    removeNULL 

file1和file2是來自兩個不同來源的格式化文件。 Source1引發任意newLine字符,使一條記錄分裂爲兩條記錄,導致腳本失敗並生成錯誤的diff o/p。 我希望我的腳本通過忽略Source1引發的newLine字符來比較b/w file1和file2。但是,我不確定我的腳本將如何識別新的實際記錄和手動引導的newLine。如何忽略比較腳本中的newLine字符如下

file1:- 
    11447438218480362,6005560623,6005560623,11447438218480362,5,20160130103044,100,195031,,1,0,00,49256,0 
    ,195031_5_00_6,0.1,6; 
    11447691224860640,6997557634,6997557634,11447691224860640,601511,20160130103457,500,195035,,2,0,00,45394,0 
    ,195035_601511_00_6,0.5,6; 

    file2:- 
    11447438218480362,6005560623,6005560623,11447438218480362,5,20160130103044,100,195031,,1,0,00,49256,0,195031_5_00_6,0.1,6; 
    11447691224860640,6997557634,6997557634,11447691224860640,601511,20160130103457,500,195035,,2,0,00,45394,0,195035_601511_00_6,0.5,6; 

感謝您的支持。

+1

爲什麼不使用'diff'命令? – bricklore

+1

提供您正在比較和預期輸出的兩個文件的示例 – anubhava

+3

將此評論添加到您的問題並刪除您的評論。 – Cyrus

回答

1

你可以你進行預處理加盟的file1線;與下一行沒有結束:

sed -r ":again; /;$/! { N; s/(.+)[\r\n]+(.+)/\1\2/g; b again; }" file1 

使文件1和文件2是可比