001 
002 /*
003  *  Descripter 1.0 - Java Script Engines
004  *  Copyright (C) 2010-2015  Jianjun Liu (J.J.Liu)
005  *  
006  *  This program is free software: you can redistribute it and/or modify
007  *  it under the terms of the GNU Affero General Public License as published by
008  *  the Free Software Foundation, either version 3 of the License, or
009  *  (at your option) any later version.
010  *  
011  *  This program is distributed in the hope that it will be useful,
012  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
013  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
014  *  GNU Affero General Public License for more details.
015  *  
016  *  You should have received a copy of the GNU Affero General Public License
017  *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
018  */
019 
020 package org.descripter.js.api.core;
021 
022 import java.util.regex.Matcher;
023 import java.util.regex.Pattern;
024 
025 import org.descripter.js.api.Core;
026 import org.descripter.js.api.Function;
027 import org.descripter.js.api.Key;
028 
029 /**
030  * <p>Emulates JavaScript RegExp objects.</p>
031  * 
032  * @author <a href="mailto:jianjunliu@126.com">J.J.Liu (Jianjun Liu)</a> at <a href="http://www.descripter.org" target="_blank">http://www.descripter.org</a>
033  * @since Descripter 1.0
034  */
035 public class CRegExp extends CObject
036 {
037     private Pattern pattern = null;
038 
039     /**
040      * <p>Constructs a {@link CObject} context of this type.</p>
041      * @param constructor The constructor {@link Function} object.
042      * @param regex The string representation of a regular expression
043      * @param flags The flags of the regular expression
044      * @since Descripter 1.0
045      */
046     public CRegExp(Function<?> constructor, String regex, String flags) {
047         super(constructor);
048         Core core = core();
049         super.put(core._source, regex);
050         boolean g = false, i = false, m = false;
051         for (char c : flags.toCharArray()) {
052             switch (c)
053             {
054                 case 'g':
055                 case 'G':
056                     g = true;
057                     break;
058                 case 'i':
059                 case 'I':
060                     i = true;
061                     break;
062                 case 'm':
063                 case 'M':
064                     m = true;
065                     break;
066                 default:
067             }
068         }
069         super.put(core._global, g);
070         super.put(core._ignoreCase, i);
071         super.put(core._multiline, m);
072     }
073 
074     /**
075      * <p>Constructs a {@link CObject} context of this type.</p>
076      * @param constructor The constructor {@link Function} object.
077      * @param regex The string representation of a regular expression
078      * @since Descripter 1.0
079      */
080     public CRegExp(Function<?> constructor, String regex) {
081         super(constructor);
082         super.put(core()._source, regex);
083     }
084 
085     /**
086      * <p>Gets the internal {@link Pattern} object of the current {@link CRegExp} object.</p>
087      * @return The internal {@link Pattern} object of the current {@link CRegExp} object
088      * @since Descripter 1.0
089      */
090     public final Pattern pattern() {
091         if (pattern == null) {
092             int flags = 0;
093             if (ignoreCase()) {
094                 flags = Pattern.CASE_INSENSITIVE;
095             }
096             if (multiline()) {
097                 flags |= Pattern.MULTILINE;
098             }
099             pattern = Pattern.compile(source(), flags);
100         }
101         return pattern;
102     }
103 
104     /**
105      * <p>Performs powerful, general-purpose pattern matching with the current regular expression instance.</p>
106      * <p>This method is the most powerful of all the regular expression and string 
107      * pattern-matching methods. It is a general-purpose method that is somewhat more 
108      * complex to use than {@link #test(Object)}, {@link CString#search(CRegExp)}, 
109      * {@link CString#replace(CRegExp, String)}, and {@link CString#match(CRegExp)}.</p>
110      * <p>This invocation searches string for text that matches the current regular expression. 
111      * If it finds a match, it returns an array of results; otherwise, it returns 
112      * <tt>null</tt>. Element 0 of the returned array is the matched text. Element 1 is 
113      * the text that matched the first parenthesized subexpression, if any, within the current 
114      * regular expression. Element 2 contains the text that matched the second subexpression, 
115      * and so on. The array length property specifies the number of elements in the array, 
116      * as usual. In addition to the array elements and the length property, the value 
117      * returned by the invocation also has two other properties. The <tt>index</tt> 
118      * property (see {@link CArray#index()}) specifies the character position of the first 
119      * character of the matched text. The <tt>input</tt> property (see {@link CArray#input()}) 
120      * refers to <tt>s</tt>. This returned array is the same as the array that is 
121      * returned by the {@link CString#match(CRegExp)} method, when invoked on a 
122      * non-global regular expression instance.</p>
123      * <p>When this method is invoked on a non-global pattern, it performs the search and 
124      * returns the result described earlier. When the current instance is a global regular 
125      * expression, however, the invocation behaves in a slightly more complex way. It begins 
126      * searching string at the character position specified by the <tt>lastIndex</tt> 
127      * property (see {@link #lastIndex()} of the current 
128      * regular expression. When it finds a match, it sets <tt>lastIndex</tt> to the 
129      * position of the first character after the match. This means that you can invoke 
130      * this method repeatedly in order to loop through all matches in a string. When 
131      * the invocation cannot find any more matches, it returns <tt>null</tt> and 
132      * resets <tt>lastIndex</tt> to zero. If you begin searching a new string 
133      * immediately after successfully finding a match in another string, you must be 
134      * careful to manually reset <tt>lastIndex</tt> to zero.</p>
135      * <p>Note that this invocation always includes full details of every match in the 
136      * array it returns, whether or not the current regular expression is a global pattern. 
137      * This is where this method differs from {@link CString#match(CRegExp)}, which 
138      * returns much less information when used with global patterns. Calling this method 
139      * repeatedly in a loop is the only way to obtain complete pattern-matching 
140      * information for a global pattern.</p>
141      * @param s The string to be tested.
142      * @return An array containing the results of the match or undefined 
143      * <tt>null</tt> if no match was found.
144      * @throws RuntimeException JavaScript throws a <tt>TypeError</tt> if this method 
145      * is invoked with an instance that is not a regular expression.
146      * @see #lastIndex()
147      * @see #test(Object)
148      * @see CString#match(CRegExp)
149      * @see CString#replace(CRegExp, String)
150      * @see CString#replace(CRegExp, Function)
151      * @see CString#search(CRegExp)
152      * @since Descripter 1.0
153      */
154     public final CArray exec(Object s) {
155         Matcher m = pattern().matcher(toString(s));
156         boolean ret = false;
157         if (global()) {
158             ret = m.find(lastIndex());
159             put(core()._lastIndex, ret ? m.end() : 0);
160         } else {
161             ret = m.find();
162         }
163         if (ret) {
164             CArray a = new CArray(core()._Array());
165             a.put(core()._index, m.start());
166             a.put(core()._input, toString(s));
167             for (int i = 0, n = m.groupCount(); i < n; i++) {
168                 a.put(a.length(), m.group(i));
169             }
170             return a;
171         } else {
172             return null;
173         }
174     }
175 
176     /**
177      * <p>Gets the <tt>global</tt> field of the current regular expression instance.</p>
178      * <p>The <tt>global</tt> field is a read-only boolean property of regular expression
179      * instances. It specifies whether a particular regular expression performs global matching, 
180      * that is, whether it was created with the "g" attribute.</p>
181      * @return The <tt>global</tt> property of the current {@link CRegExp} object
182      * @since Descripter 1.0
183      */
184     public final boolean global() {
185         return bool(get(core()._global));
186     }
187 
188     /**
189      * <p>Gets the <tt>ignoreCase</tt> field of the current regular expression instance.</p>
190      * <p>The <tt>ignoreCase</tt> field is a read-only boolean property of regular expression 
191      * instances. It specifies whether a particular regular expression performs case-insensitive 
192      * matching, that is, whether it was created with the "i" attribute.</p>
193      * @return The <tt>ignoreCase</tt> property of the current {@link CRegExp} object
194      * @since Descripter 1.0
195      */
196     public final boolean ignoreCase() {
197         return bool(get(core()._ignoreCase));
198     }
199 
200     /**
201      * <p>Gets the <tt>lastIndex</tt> field of the current regular expression instance.</p>
202      * <p>The <tt>lastIndex</tt> field is a read/write property of regular expression 
203      * instances. For regular expressions with the "g" attribute set, it contains an 
204      * integer that specifies the character position immediately following the last match 
205      * found by the {@link #exec(Object)} and {@link #test(Object)} methods. These methods 
206      * use this property as the starting point for the next search they conduct. This 
207      * allows you to call those methods repeatedly, to loop through all matches in a 
208      * string. Note that <tt>lastIndex</tt> is not used by regular expressions that do 
209      * not have the "g" attribute set and do not represent global patterns.</p>
210      * <p>This property is read/write, so you can set it at any time to specify where in the 
211      * target string the next search should begin. {@link #exec(Object)} and {@link #test(Object)} 
212      * automatically reset <tt>lastIndex</tt> to 0 when they fail to find a match 
213      * (or another match). If you begin to search a new string after a successful match 
214      * of some other string, you have to explicitly set this property to 0.</p>
215      * @return The <tt>lastIndex</tt> property of the current {@link CRegExp} object
216      * @since Descripter 1.0
217      */
218     public final int lastIndex() {
219         return intValue(get(core()._lastIndex));
220     }
221 
222     /**
223      * <p>Gets the <tt>multiline</tt> field of the current regular expression instance.</p>
224      * <p>The <tt>multiline</tt> field is a read-only boolean property of regular expression 
225      * instances. It specifies whether a particular regular expression performs multiline 
226      * matching, that is, whether it was created with the "m" attribute.</p>
227      * @return The <tt>multiline</tt> property of the current {@link CRegExp} object
228      * @since Descripter 1.0
229      */
230     public final boolean multiline() {
231         return bool(get(core()._multiline));
232     }
233 
234     /**
235      * <p>Gets the <tt>source</tt> field of the current regular expression instance.</p>
236      * <p>The <tt>source</tt> field is a read-only string property of regular expression 
237      * instances. It contains the text of the regular expression. This text does not include 
238      * the delimiting slashes used in regular-expression literals, and it does not include 
239      * the "g", "i", and "m" attributes.</p>
240      * @return The <tt>source</tt> property of the current {@link CRegExp} object
241      * @since Descripter 1.0
242      */
243     public final String source() {
244         return toString(get(core()._source));
245     }
246 
247     /**
248      * <p>Tests whether a string contains the pattern represented by the current regular 
249      * expression.</p>
250      * <p></p>
251      * @param s The string to be tested.
252      * @return <tt>true</tt> if <tt>s</tt> contains text that matches the current 
253      * regular expression; false otherwise.
254      * @throws RuntimeException JavaScript throws a <tt>TypeError</tt> if this method 
255      * is invoked with an instance that is not a regular expression.
256      * @see #exec(Object)
257      * @see #lastIndex()
258      * @see CString#match(CRegExp)
259      * @see CString#replace(CRegExp, String)
260      * @see CString#replace(CRegExp, Function)
261      * @see CString#search(CRegExp)
262      * @see CString#substring(Object)
263      * @see CString#substring(Object, Object)
264      * @since Descripter 1.0
265      */
266     public final boolean test(Object s) {
267         Matcher m = pattern().matcher(toString(s));
268         if (global()) {
269             boolean ret = m.find(lastIndex());
270             put(core()._lastIndex, ret ? m.end() : 0);
271             return ret;
272         } else {
273             return m.find();
274         }
275     }
276 
277     /**
278      * <p>Sets the value associated with the specified key.</p>
279      * @param key A {@link Key} to set the value
280      * @param val The value to set
281      * @throws RuntimeException if the current context is read-only.
282      * @since Descripter 1.0
283      */
284     @Override
285     public void put(Integer key, Object val) {
286         if (
287                 !key.equals(core()._global) &&
288                 !key.equals(core()._ignoreCase) &&
289                 !key.equals(core()._multiline) &&
290                 !key.equals(core()._source)) {
291             super.put(key, val);
292         }
293     }
294 
295     /**
296      * <p>Returns a string representation of the current object.</p>
297      * @return The string representation of the current object
298      * @since Descripter 1.0
299      */
300     @Override
301     public final String toString() {
302         StringBuilder sb = new StringBuilder();
303         sb.append("/");
304         sb.append(source());
305         sb.append("/");
306         if (global()) {
307             sb.append('g');
308         }
309         if (ignoreCase()) {
310             sb.append('i');
311         }
312         if (multiline()) {
313             sb.append('m');
314         }
315         return sb.toString();
316     }
317 }