C#
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160
using System.Collections.Generic; using System.IO; namespace GedcomReader { class Gedcom { private string GedcomText = ""; public struct INDI { public string ID; public string Name; public string Sex; public string BDay; public bool Dead; public string FamS; public string FamC; } public struct FAM { public string FamID; public string Type; public string IndiID; } public List<INDI> Individuals = new List<INDI>(); public List<FAM> Families = new List<FAM>(); public Gedcom(string fileName) { //reads in the file as a block of text using (StreamReader SR = new StreamReader(fileName)) { GedcomText = SR.ReadToEnd(); } ReadGedcom(); } private void ReadGedcom() { //Replaces "0 @" with an abratray char then splits on it to get the diffrent nodes of the gedcom file string[] Nodes = GedcomText.Replace("0 @", "\u0646").Split('\u0646'); foreach (string Node in Nodes) //searched though the nodes to see what type they are { string[] SubNode = Node.Replace("\r\n", "\r").Split('\r'); if (SubNode[0].Contains("INDI")) // im currently only intrested in the indi (individual) tag and the fam(family) tag all others get thrown out. { Individuals.Add(ExtractINDI(SubNode)); } else if (SubNode[0].Contains("FAM")) { Families.Add(ExtractFAM(SubNode)); } } } private FAM ExtractFAM(string[] Node) { string sFID = Node[0].Replace("@ FAM", ""); string sID = ""; string sType = ""; foreach (string Line in Node) { // If node is HUSB if (Line.Contains("1 HUSB ")) { sType = "PAR"; sID = Line.Replace("1 HUSB ", "").Replace("@", "").Trim(); } //If node for Wife else if (Line.Contains("1 WIFE ")) { sType = "PAR"; sID = Line.Replace("1 WIFE ", "").Replace("@", "").Trim(); } //if node for multi children else if (Line.Contains("1 CHIL ")) { sType = "CHIL"; sID = Line.Replace("1 CHIL ", "").Replace("@", ""); } } FAM Fam = new FAM(); Fam.FamID = sFID; Fam.Type = sType; Fam.IndiID = sID; return Fam; } private INDI ExtractINDI(string[] Node) { //string[] SubNode = Node.Replace("\r\n", "\r").Split('\r'); //If a individual is found INDI I = new INDI(); //Individual I = new Individual(); if (Node[0].Contains("INDI")) { //Create new Structure //Add the ID number and remove extra formating I.ID = Node[0].Replace("@", "").Replace(" INDI", "").Trim(); //Find the name remove extra formating for last name I.Name = Node[FindIndexinArray(Node, "NAME")].Replace("1 NAME", "").Replace("/", "").Trim(); //Find Sex and remove extra formating I.Sex = Node[FindIndexinArray(Node, "SEX")].Replace("1 SEX ", "").Trim(); //Deterine if there is a brithday -1 means no int BirthTest =FindIndexinArray(Node, "1 BIRT "); if (BirthTest != -1) { // add birthday to Struct I.BDay = Node[BirthTest + 1].Replace("2 DATE ", "").Trim(); } // deterimin if there is a death tag will return -1 if not found int DeathTest = FindIndexinArray(Node, "1 DEAT "); if (DeathTest != -1) { //convert Y or N to true or false ( defaults to False so no need to change unless Y is found. if (Node[DeathTest].Replace("1 DEAT ", "").Trim() == "Y") { //set death I.Dead = true; } } int FamsTest = FindIndexinArray(Node, "1 FAMS "); if (FamsTest != -1) { I.FamS = Node[FamsTest].Replace("1 FAMS ","").Replace("@", "").Trim(); } int FamcTest = FindIndexinArray(Node, "1 FAMC "); if (FamcTest != -1) { I.FamC = Node[FamcTest].Replace("1 FAMC ", "").Replace("@", "").Trim(); } } return I; } private int FindIndexinArray(string[] Arr, string search) { int Val = -1; for (int i = 0; i < Arr.Length; i++) { if (Arr[i].Contains(search)) { Val = i; } } return Val; } } }
Refactorings
No refactoring yet !
Ants
May 25, 2009, May 25, 2009 03:42, permalink
Have you looked at http://msdn.microsoft.com/en-us/magazine/cc188730.aspx ?
Jaume
September 24, 2009, September 24, 2009 05:57, permalink
Thanks very much for your code. I have expanded it to get more instructions. One thing that I have modifyed is the FindIndexInArray. I have substitutedthe for with a while, so it stops in the first occurrence, and also I have overloaded this function with a parameter that is called start, which is put in i = start. This is to account for multiple occurrences of a instrution (for example NOTE o CONT)
Best Regards
Jaume
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
private int FindIndexinArray(string[] Arr, string search, int start) { int Val = -1; int i = start; bool trobat = false; while ((!trobat) && (i < Arr.Length)) { if (Arr[i].Contains(search)) { Val = i; trobat = true; } i++; } return Val; }
Created this reader class for gedcom files ( genealogy format) the format is a flat ASCII file (that sucks) and i return a list of individuals and a list of family info
I'm still fairly new at creating classes ( this is my first real on my own not for answer in a book class)
EDIT:
Someone suggested using seperate classes for the two structures i would be interested if you agree.