BMLT Root Server
spanish_metaphone.php
Go to the documentation of this file.
1 <?php
2 defined('BMLT_EXEC') or die('Cannot Execute Directly'); // Makes sure that this file is in the correct context.
3 
4 // ============================================================================
5 // Fuction Name: spanish_metaphone($string)
6 // ============================================================================
7 // Author: Israel J. Sustaita (isloera@yahoo.com)
8 // Version: 1.0.1
9 // Input: A string
10 // Output: Metaphone key string
11 // Description: This function takes a spanish word and returns its
12 // metaphone sound key.
13 // Comments:
14 // It generates spanish metaphone keys useful for spell
15 // checkers and other purposes.I decided to alter the
16 // metaphone function because I needed to check the spelling
17 // in spanish words.
18 //
19 // History:
20 // 2005-10-14 - Version 1.0.1
21 // - Removed unnecesary code and fixed some minor bugs
22 //
23 // 2005-10-09 - Version 1.0.0
24 // - Initial Release
25 //
26 //
27 // **** Acknowledgements ****
28 //
29 // This Function was adapted from a functional callable version of
30 // DoubleMetaphone created by Geoff Caplan http://www.advantae.com, who
31 // adapted it from the class by Stephen Woodbridge.
32 //
33 //
34 // It also uses the "spanish_metaphone_string_at()" and the
35 // "spanish_metaphone_is_vowel()" functions from the same implementation.
36 //
37 // Source: http://swoodbridge.com/DoubleMetaPhone/
38 //
39 // ============================================================================
40 
41 /**
42  \brief This is a Spanish version of the standard double metaphone tokenizer.
43 
44  Metaphone allows matching of strings phonetically, but is very language-
45  dependent. The standard metaphone algorithm is for English, and misses a
46  few rules for Spanish. This is a version that takes Spanish pronunciation
47  into account when generating tokens.
48 
49  \returns a metaphone token for the given Spanish string.
50 
51 */
53  $string ///< The string to tokenize.
54 ) {
55  //initialize metaphone key string
56  $meta_key = "";
57 
58  //set maximum metaphone key size
59  $key_length = 6;
60 
61  //set current position to the beginning
62  $current_pos = 0;
63 
64  //get string length
65  $string_length = strlen($string);
66 
67  //set to the end of the string
68  $end_of_string_pos = $string_length - 1;
69  $original_string = $string. " ";
70 
71  //Let's replace some spanish characters easily confused
72  $original_string = strtr($original_string, 'bz', 'AEIOUNUVS');
73 
74  //convert string to uppercase
75  $original_string = strtoupper($original_string);
76 
77 
78  // main loop
79  while (strlen($meta_key) < $key_length) {
80  //break out of the loop if greater or equal than the length
81  if ($current_pos >= $string_length) {
82  break;
83  }
84 
85  //get character from the string
86  $current_char = substr($original_string, $current_pos, 1);
87 
88  //if it is a vowel, and it is at the begining of the string,
89  //set it as part of the meta key
90  if (spanish_metaphone_is_vowel($original_string, $current_pos)
91  && ($current_pos == 0)) {
92  $meta_key .= $current_char;
93  $current_pos += 1;
95  $original_string,
96  $current_pos,
97  1,
98  array('D','F','J','K','M','N','P','R','S','T','V')
99  )) {
100  //Let's check for consonants that have a single sound
101  //or already have been replaced because they share the same
102  //sound like 'B' for 'V' and 'S' for 'Z'
103  $meta_key .= $current_char;
104 
105  //increment by two if a repeated letter is found
106  if (substr($original_string, $current_pos + 1, 1) == $current_char) {
107  $current_pos += 2;
108  }
109 
110  //else increment only by one
111  $current_pos += 1;
112  } else //check consonants with similar confusing sounds
113  {
114  switch ($current_char) {
115  case 'C':
116  //special case 'macho', chato,etc.
117  if (substr($original_string, $current_pos + 1, 1)== 'H') {
118  $current_pos += 2;
119  } elseif (substr($original_string, $current_pos + 1, 1)== 'C') {
120  //special case 'accin', 'reaccin',etc.
121  $meta_key .= 'X';
122  $current_pos += 2;
123  break;
124  } elseif (spanish_metaphone_string_at($original_string, $current_pos, 2, array('CE','CI'))) {
125  // special case 'cesar', 'cien', 'cid', 'conciencia'
126  $meta_key .= 'S';
127  $current_pos += 2;
128  break;
129  } // else
130  $meta_key .= 'K';
131  $current_pos += 1;
132  break;
133 
134  case 'G':
135  // special case 'gente', 'ecologia',etc
137  $original_string,
138  $current_pos,
139  2,
140  array('GE','GI')
141  )) {
142  $meta_key .= 'J';
143  $current_pos += 2;
144  break;
145  } // else
146  $meta_key .= 'G';
147  $current_pos += 1;
148  break;
149 
150  //since the letter 'h' is silent in spanish,
151  //let's set the meta key to the vowel after the letter 'h'
152  case 'H':
153  if (spanish_metaphone_is_vowel($original_string, $current_pos + 1)) {
154  $meta_key .= $original_string[$current_pos + 1];
155  $current_pos += 2;
156  break;
157  }
158 
159  // else
160  $meta_key .= 'H';
161  $current_pos += 1;
162  break;
163 
164  case 'Q':
165  if (substr($original_string, $current_pos + 1, 1) == 'U') {
166  $current_pos += 2;
167  } else {
168  $current_pos += 1;
169  }
170 
171  $meta_key .= 'K';
172  break;
173 
174  case 'W':
175  $meta_key .= 'U';
176  $current_pos += 2;
177  break;
178 
179  case 'X':
180  //some mexican spanish words like'Xochimilco','xochitl'
181  if ($current_pos == 0) {
182  $meta_key .= 'S';
183  $current_pos += 2;
184  break;
185  }
186 
187  $meta_key .= 'X';
188  $current_pos += 1;
189  break;
190 
191  default:
192  $current_pos += 1;
193  } // end of switch
194  }//end else
195 
196  //Commented code *** for debugging purposes only ***
197  /*
198  printf("<br>ORIGINAL STRING: '%s'\n", $original_string);
199  printf("<br>CURRENT POSITION: '%s'\n", $current_pos);
200  intf("<br>META KEY STRING: '%s'\n", $meta_key);
201  */
202  } // end of while loop
203 
204  //trim any blank characters
205  $meta_key = trim($meta_key) ;
206 
207  //return the final meta key string
208  return $meta_key;
209 }
210 // ====== End of spanish_metaphone function =======================
211 
212 //***** helper functions *******************************************
213 //******************************************************************
214 
215 /*=================================================================*\
216  # Name: spanish_metaphone_string_at($string, $start, $string_length, $list)
217  # Purpose: Helper function for double_metaphone( )
218  # Return: Bool
219 \*=================================================================*/
220 /**
221  \brief Search a string for the presence of substrings.
222 
223  \returns true if the string contains any of the given substrings.
224 */
226  $string, ///< The main string to search.
227  $start, ///< The position within the main string to start the search.
228  $string_length, ///< The length of the substrings to test.
229  $list ///< An array of substrings.
230 ) {
231  if (($start <0) || ($start >= strlen($string))) {
232  return 0;
233  }
234 
235  for ($i=0; $i<count($list); $i++) {
236  if ($list[$i] == substr($string, $start, $string_length)) {
237  return 1;
238  }
239  }
240  return 0;
241 }
242 
243 
244 /*=================================================================*\
245  # Name: spanish_metaphone_is_vowel($string, $pos)
246  # Purpose: Helper function for double_metaphone( )
247  # Return: Bool
248 \*=================================================================*/
249 /**
250  \brief See if the character at a given string position is a vowel.
251 
252  \returns true if the character is a vowel.
253 */
255  $string, ///< The string to search.
256  $pos ///< The position of the character to test.
257 ) {
258  return preg_match("/[AEIOU]/", substr($string, $pos, 1));
259 }
260 // ******** end of helper functions **************************
spanish_metaphone($string)
This is a Spanish version of the standard double metaphone tokenizer.
spanish_metaphone_string_at($string, $start, $string_length, $list)
Search a string for the presence of substrings.
spanish_metaphone_is_vowel($string, $pos)
See if the character at a given string position is a vowel.
defined('BMLT_EXEC') or define('BMLT_EXEC'
Definition: index.php:3