2014-02-13 62 views
1
#!/usr/bin/python 

import csv 
import re 

string_1 = ('OneTouch AT') 
string_2 = ('LinkRunner AT') 
string_3 = ('AirCheck') 

#searched = ['OneTouch AT', 'LinkRunner AT', 'AirCheck'] 
print "hello Pythong! " 

#def does_match(string): 
# stringl = string.lower() 
# return any(s in stringl for s in searched) 

inFile = open('data.csv', "rb") 
reader = csv.reader(inFile) 
outFile = open('data2.csv', "wb") 
writer = csv.writer(outFile, delimiter='\t', quotechar='"', quoting=csv.QUOTE_ALL) 

for row in reader: 
    found = False 
    for col in row: 
     if col in [string_1, string_2, string_3] and not found: 
      writer.writerow(row) 
      found = True 


#for row in reader: 
# if any(does_match(col) for col in row): 
    #  writer.writerow(row[:2]) # write only 2 first columns 

inFile.close() 
outFile.close() 

我想弄清楚如何搜索3個項目的CSV文件。如果這些項目存在,則打印該行。理想情況下,我只希望列1和列3打印到新文件。使用python搜索CSV文件中的字符串並寫入結果

示例數據文件

LinkRunner AT Video,10,20 
Wireless Performance Video OneTouch AT,1,2 
Wired OneTouch AT,200,300 
LinkRunner AT,200,300 
AirCheck,200,300 
+1

什麼不起作用? – 2014-02-13 20:55:32

+0

目前唯一沒有工作的是我只想打印出第一和第三列。理想情況下,它會首先打印空氣檢測線,然後Linkrunner線然後在線上打印。 – Kris

回答

3

I'm trying to figure out how to search a CSV file for 3 items. If those items exist print the row. Ideally I would like only Columns 1 and 3 to print to a new file.

試試這個:

import csv 

search_for = ['OneTouch AT','LinkRunner AT','AirCheck'] 

with open('in.csv') as inf, open('out.csv','w') as outf: 
    reader = csv.reader(inf) 
    writer = csv.writer(outf, delimiter='\t', quotechar='"', quoting=csv.QUOTE_MINIMAL) 
    for row in reader: 
     if row[0] in search_for: 
      print('Found: {}'.format(row)) 
      writer.writerow(row) 
0
#!/usr/bin/python 

import csv 
import numpy as np 

class search_csv(object): 
    def __init__(self, infile, outfile): 
     infile = open(infile, 'rb') 
     read_infile = [i for i in csv.reader(infile, delimiter='\t', quotechar='"', quoting=csv.QUOTE_MINIMAL)] 
     self.non_numpy_data = read_infile 
     self.data = np.array(read_infile, dtype=None) 
     self.outfile = open(outfile, 'wb') 
     self.writer_ = csv.writer(self.outfile, delimiter='\t', quotechar='"', quoting=csv.QUOTE_MINIMAL) 

    def write_to(self, matched_values): 
     self.writer_.writerows(matched_values) 
     print ' Matched Values Written ' 
     return True 

    def searcher(self, items, return_cols=[0,2]): ##// items should be passed as list -> ['OneTouch AT', 'LinkRunner AT', 'AirCheck'] 
     find_these = np.array(items, dtype=None) 
     matching_y = np.in1d(self.data, find_these).reshape(self.data.shape).nonzero()[0] 
     matching_data = self.data[matching_y][:,return_cols] 
     self.write_to(matching_data) 
     self.outfile.close() 
     return True 

    def non_numpy_search(self, items, return_cols=[0,2]): 
     lst = [] 
     for i in self.non_numpy_data: 
      for ii in items: 
       if ii in i: 
        z = [] 
        for idx in return_cols: 
         z.append(i[idx]) 
        lst.append(z) 
       break 
     self.write_to(lst) 
     return True 


### now use the class ### 

SEARCHING_FOR = ['OneTouch AT', 'LinkRunner AT', 'AirCheck'] 

IN_FILE = 'in_file.csv' 
OUT_FILE = 'out_file.csv' 

non_numpy_search(IN_FILE, OUT_FILE).non_numpy_search(SEARCHING_FOR) 

通過你的問題我假設你只是想完成手頭的任務,並不真正關心如何措辭。因此,請複製並粘貼此文件,並將您的數據文件用作'IN_FILE'值和要寫入的文件名作爲'OUT_FILE'值。完成後,將要搜索的值放入「SEARCHING_FOR」列表中。

注意事項.... SEARCHING_FOR應該是一個列表。

SEARCHING_FOR中的值完全匹配,因此'A'不匹配'a'。如果你想使用正則表達式或更復雜的東西讓我知道。

函數'non_numpy_search'中有'return_cols'參數。它默認爲第一列和第三列。

如果你沒有numpy讓我知道。

+0

我得到了以下信息:回溯(最近通話最後一個): 文件 「csvparserV2.0.py」 34行,在 search_csv(in_file中,OUT_FILE).searcher(SEARCHING_FOR) 文件「csvparserV2.0.py 「,第21行,在搜索器中 matching_y = np.in1d(self.data,find_these).reshape(self.data.shape).nonzero()[0] 文件」/System/Library/Frameworks/Python.framework/版本/ 2.7/Extras/lib/python/numpy/lib/arraysetops.py「,第335行,in1d order = ar.argsort(kind ='mergesort') TypeError:請求排序不適用於類型 – Kris

+0

你知道嗎你正在運行哪個版本的numpy?打印n.p .__版本___ –

+0

如果文件不是巨大的,我剛剛發佈的編輯應該可以正常工作。 –

0
#!/usr/bin/python 

import csv 
import re 
import sys 
import gdata.docs.service 


#string_1 = ('OneTouch AT') 
#string_2 = ('LinkRunner AT') 
#string_3 = ('AirCheck') 

searched = ['aircheck', 'linkrunner at', 'onetouch at'] 

def find_group(row): 
    """Return the group index of a row 
     0 if the row contains searched[0] 
     1 if the row contains searched[1] 
     etc 
     -1 if not found 
    """ 
    for col in row: 
     col = col.lower() 
     for j, s in enumerate(searched): 
      if s in col: 
       return j 
     return -1 

def does_match(string): 
    stringl = string.lower() 
    return any(s in stringl for s in searched) 

#Opens Input file for read and output file to write. 
inFile = open('data.csv', "rb") 
reader = csv.reader(inFile) 
outFile = open('data2.csv', "wb") 
writer = csv.writer(outFile, delimiter='\t', quotechar='"', quoting=csv.QUOTE_ALL) 


#for row in reader: 
# found = False 
# for col in row: 
#  if col in [string_1, string_2, string_3] and not found: 
#   writer.writerow(row) 
#   found = True 



"""Built a list of items to sort. If row 12 contains 'LinkRunner AT' (group 1), 
    one stores a triple (1, 12, row) 
    When the triples are sorted later, all rows in group 0 will come first, then 
    all rows in group 1, etc. 
""" 
stored = [] 
for i, row in enumerate(reader): 
    g = find_group(row) 
    if g >= 0: 
     stored.append((g, i, row)) 
stored.sort() 

for g, i, row in stored: 
    writer.writerow(tuple(row[k] for k in (0,2))) # output col 1 & 5 

#for row in reader: 
# if any(does_match(col) for col in row): 
    #  writer.writerow(row[:2]) # write only 2 first columns 

# Closing Input and Output files. 
inFile.close() 
outFile.close() 
+0

經過一些谷歌搜索多一點閱讀我想出了另一種方式來做到這一點。 :-)目標已經完成。下一個目標是更新Google文檔的電子表格。 – Kris

相關問題