我有這個文件,它使用UTF-8編碼,我試圖讀取它。到目前爲止,我已經使用BinaryReader,FileStream,我試過File.ReadAllLines。到目前爲止,我只得到文件的第一行。讀取C#中的UTF-8文件
這裏是我到目前爲止已經試過一些樣品:
public partial class Form1 : Form
{
private string filename = @"C:\UNICORN\Server\Fil\Users30.mpm";
public Form1()
{
InitializeComponent();
}
private static void clearText(RichTextBox rtb)
{
rtb.Text = "";
}
private void button1_Click(object sender, EventArgs e)
{
// use a binary reader
clearText(richTextBox1);
StringBuilder sb = new StringBuilder();
using(BinaryReader br = new BinaryReader(File.Open(filename, FileMode.Open)))
{
int pos = 0;
int length = (int) br.BaseStream.Length; // length of the file
byte[] bytes = new byte[length];
for(int i = 0; i < length; i++)
{
bytes[i] = br.ReadByte();
}
sb.Append("File Size: " + bytes.Length + "\n");
sb.AppendLine(System.Text.ASCIIEncoding.ASCII.GetString(bytes));
}
richTextBox1.Text = sb.ToString();
}
private void button2_Click(object sender, EventArgs e)
{
// use a binary reader
clearText(richTextBox1);
StringBuilder sb = new StringBuilder();
using(BinaryReader br = new BinaryReader(File.Open(filename, FileMode.Open)))
{
int pos = 0;
int length = (int) br.BaseStream.Length;
sb.Append("File Size: " + length + "\n");
while (pos < length)
{
var v = br.ReadInt32();
sb.Append((char)v + "\n");
pos += sizeof (int);
}
richTextBox1.Text = sb.ToString();
}
}
private void button3_Click(object sender, EventArgs e)
{
// use a binary reader
clearText(richTextBox1);
StringBuilder sb = new StringBuilder();
using (BinaryReader br = new BinaryReader(File.Open(filename, FileMode.Open)))
{
int pos = 0;
int length = (int)br.BaseStream.Length; // length of the file
byte[] bytes = new byte[length];
sb.Append("File Size: " + bytes.Length + "\n");
for (int i = 0; i < length; i++)
{
var b = br.ReadByte();
sb.Append("Byte: " + b + " - " + (char) b + "\n");
}
//sb.AppendLine(System.Text.ASCIIEncoding.ASCII.GetString(bytes));
}
richTextBox1.Text = sb.ToString();
}
private void button4_Click(object sender, EventArgs e)
{
// use a stream reader
clearText(richTextBox1);
StringBuilder sb = new StringBuilder();
using(StreamReader sr = new StreamReader(filename, Encoding.UTF8))
{
sb.Append(sr.ReadLine() + "\n");
}
richTextBox1.Text = sb.ToString();
}
private void button5_Click(object sender, EventArgs e)
{
// use a stream reader
clearText(richTextBox1);
StringBuilder sb = new StringBuilder();
using (StreamReader reader = new StreamReader(File.OpenRead(filename)))
{
while(!reader.EndOfStream)
{
var line = reader.ReadLine();
if(line != null)
{
sb.AppendLine(line);
}
}
}
richTextBox1.Text = sb.ToString();
}
private void button6_Click(object sender, EventArgs e)
{
// use a file stream and a decoder
clearText(richTextBox1);
StringBuilder sb = new StringBuilder();
byte[] byData = new byte[255];
char[] charData = new char[255];
try
{
FileStream aFile = new FileStream(filename, FileMode.Open);
aFile.Seek(55, SeekOrigin.Begin);
aFile.Read(byData, 0, 100);
} catch (Exception ex)
{
sb.Append("ERROR: " + ex.ToString());
}
Decoder d = Encoding.UTF8.GetDecoder();
d.GetChars(byData, 0, byData.Length, charData, 0);
foreach(char c in charData)
{
sb.Append(c + " ");
}
richTextBox1.Text = sb.ToString();
}
private void button7_Click(object sender, EventArgs e)
{
// find the encoding of a file, just trying to find out the encoding with this
clearText(richTextBox1);
StringBuilder sb = new StringBuilder();
using(var r = new StreamReader(filename, detectEncodingFromByteOrderMarks: true))
{
var es = r.CurrentEncoding;
sb.Append("Encoding: " + es);
}
richTextBox1.Text = sb.ToString();
}
private void button8_Click(object sender, EventArgs e)
{
// use File.ReadAllLines()
clearText(richTextBox1);
StringBuilder sb = new StringBuilder();
foreach(var line in File.ReadAllLines(filename, Encoding.UTF8))
{
sb.Append(line.ToString() + "\n");
}
richTextBox1.Text = sb.ToString();
}
}
這些例子都只是告訴我該文件的一行。你將如何閱讀/解析整個文件?
該文件包含一個用戶列表,我最終試圖閱讀文件來查找這些用戶名。現在,它只顯示文件的第一行是「UNICORN 3.06」。
如果是UTF-8編碼,它是一個文本文件,而不是二進制文件。另外,如果它是UTF-8,爲什麼你使用'System.Text.ASCIIEncoding.ASCII.GetString('而不是'System.Text.ASCIIEncoding.UTF8.GetString('? –
當我不知道它是UTF-8並沒有費心去修改它,但由於某種原因,我遇到了這個問題,我把那行代碼改成了UTF8,它仍然只讀取文件的第一行 – user
如果這是一個文本文件,而不是一個真正的二進制文件,你想只是文件的文本全部作爲字符串我會使用['File.ReadAllText'](http://msdn.microsoft.com/en-us/library/ms143368 %28v = vs.110%29.aspx) –