BMLT Root Server
Functions
spanish_metaphone.php File Reference

Go to the source code of this file.

Functions

 spanish_metaphone ($string)
 This is a Spanish version of the standard double metaphone tokenizer. More...
 
 spanish_metaphone_string_at ($string, $start, $string_length, $list)
 Search a string for the presence of substrings. More...
 
 spanish_metaphone_is_vowel ($string, $pos)
 See if the character at a given string position is a vowel. More...
 

Function Documentation

spanish_metaphone (   $string)

This is a Spanish version of the standard double metaphone tokenizer.

Metaphone allows matching of strings phonetically, but is very language- dependent. The standard metaphone algorithm is for English, and misses a few rules for Spanish. This is a version that takes Spanish pronunciation into account when generating tokens.

Returns
a metaphone token for the given Spanish string.
Parameters
$stringThe string to tokenize.

Definition at line 52 of file spanish_metaphone.php.

References spanish_metaphone_is_vowel(), and spanish_metaphone_string_at().

Referenced by SplitIntoMetaphone().

54  {
55  //initialize metaphone key string
56  $meta_key = "";
57 
58  //set maximum metaphone key size
59  $key_length = 6;
60 
61  //set current position to the beginning
62  $current_pos = 0;
63 
64  //get string length
65  $string_length = strlen($string);
66 
67  //set to the end of the string
68  $end_of_string_pos = $string_length - 1;
69  $original_string = $string. " ";
70 
71  //Let's replace some spanish characters easily confused
72  $original_string = strtr($original_string, 'bz', 'AEIOUNUVS');
73 
74  //convert string to uppercase
75  $original_string = strtoupper($original_string);
76 
77 
78  // main loop
79  while (strlen($meta_key) < $key_length) {
80  //break out of the loop if greater or equal than the length
81  if ($current_pos >= $string_length) {
82  break;
83  }
84 
85  //get character from the string
86  $current_char = substr($original_string, $current_pos, 1);
87 
88  //if it is a vowel, and it is at the begining of the string,
89  //set it as part of the meta key
90  if (spanish_metaphone_is_vowel($original_string, $current_pos)
91  && ($current_pos == 0)) {
92  $meta_key .= $current_char;
93  $current_pos += 1;
95  $original_string,
96  $current_pos,
97  1,
98  array('D','F','J','K','M','N','P','R','S','T','V')
99  )) {
100  //Let's check for consonants that have a single sound
101  //or already have been replaced because they share the same
102  //sound like 'B' for 'V' and 'S' for 'Z'
103  $meta_key .= $current_char;
104 
105  //increment by two if a repeated letter is found
106  if (substr($original_string, $current_pos + 1, 1) == $current_char) {
107  $current_pos += 2;
108  }
109 
110  //else increment only by one
111  $current_pos += 1;
112  } else //check consonants with similar confusing sounds
113  {
114  switch ($current_char) {
115  case 'C':
116  //special case 'macho', chato,etc.
117  if (substr($original_string, $current_pos + 1, 1)== 'H') {
118  $current_pos += 2;
119  } elseif (substr($original_string, $current_pos + 1, 1)== 'C') {
120  //special case 'accin', 'reaccin',etc.
121  $meta_key .= 'X';
122  $current_pos += 2;
123  break;
124  } elseif (spanish_metaphone_string_at($original_string, $current_pos, 2, array('CE','CI'))) {
125  // special case 'cesar', 'cien', 'cid', 'conciencia'
126  $meta_key .= 'S';
127  $current_pos += 2;
128  break;
129  } // else
130  $meta_key .= 'K';
131  $current_pos += 1;
132  break;
133 
134  case 'G':
135  // special case 'gente', 'ecologia',etc
137  $original_string,
138  $current_pos,
139  2,
140  array('GE','GI')
141  )) {
142  $meta_key .= 'J';
143  $current_pos += 2;
144  break;
145  } // else
146  $meta_key .= 'G';
147  $current_pos += 1;
148  break;
149 
150  //since the letter 'h' is silent in spanish,
151  //let's set the meta key to the vowel after the letter 'h'
152  case 'H':
153  if (spanish_metaphone_is_vowel($original_string, $current_pos + 1)) {
154  $meta_key .= $original_string[$current_pos + 1];
155  $current_pos += 2;
156  break;
157  }
158 
159  // else
160  $meta_key .= 'H';
161  $current_pos += 1;
162  break;
163 
164  case 'Q':
165  if (substr($original_string, $current_pos + 1, 1) == 'U') {
166  $current_pos += 2;
167  } else {
168  $current_pos += 1;
169  }
170 
171  $meta_key .= 'K';
172  break;
173 
174  case 'W':
175  $meta_key .= 'U';
176  $current_pos += 2;
177  break;
178 
179  case 'X':
180  //some mexican spanish words like'Xochimilco','xochitl'
181  if ($current_pos == 0) {
182  $meta_key .= 'S';
183  $current_pos += 2;
184  break;
185  }
186 
187  $meta_key .= 'X';
188  $current_pos += 1;
189  break;
190 
191  default:
192  $current_pos += 1;
193  } // end of switch
194  }//end else
195 
196  //Commented code *** for debugging purposes only ***
197  /*
198  printf("<br>ORIGINAL STRING: '%s'\n", $original_string);
199  printf("<br>CURRENT POSITION: '%s'\n", $current_pos);
200  intf("<br>META KEY STRING: '%s'\n", $meta_key);
201  */
202  } // end of while loop
203 
204  //trim any blank characters
205  $meta_key = trim($meta_key) ;
206 
207  //return the final meta key string
208  return $meta_key;
209 }
spanish_metaphone_string_at($string, $start, $string_length, $list)
Search a string for the presence of substrings.
spanish_metaphone_is_vowel($string, $pos)
See if the character at a given string position is a vowel.
spanish_metaphone_is_vowel (   $string,
  $pos 
)

See if the character at a given string position is a vowel.

Returns
true if the character is a vowel.
Parameters
$stringThe string to search.
$posThe position of the character to test.

Definition at line 254 of file spanish_metaphone.php.

Referenced by spanish_metaphone().

257  {
258  return preg_match("/[AEIOU]/", substr($string, $pos, 1));
259 }
spanish_metaphone_string_at (   $string,
  $start,
  $string_length,
  $list 
)

Search a string for the presence of substrings.

Returns
true if the string contains any of the given substrings.
Parameters
$stringThe main string to search.
$startThe position within the main string to start the search.
$string_lengthThe length of the substrings to test.
$listAn array of substrings.

Definition at line 225 of file spanish_metaphone.php.

Referenced by spanish_metaphone().

230  {
231  if (($start <0) || ($start >= strlen($string))) {
232  return 0;
233  }
234 
235  for ($i=0; $i<count($list); $i++) {
236  if ($list[$i] == substr($string, $start, $string_length)) {
237  return 1;
238  }
239  }
240  return 0;
241 }